diff --git a/src/ontogpt/evaluation/evaluation_engine.py b/src/ontogpt/evaluation/evaluation_engine.py index 993af4f92..7dafddc2b 100644 --- a/src/ontogpt/evaluation/evaluation_engine.py +++ b/src/ontogpt/evaluation/evaluation_engine.py @@ -48,7 +48,10 @@ def from_set( def label(x): for labeler in labelers: - lbl = labeler.label(x) + if type(labeler) == list: + lbl = labeler[0].label(x) + else: + lbl = labeler.label(x) if lbl: return f"{x} {lbl}" return x diff --git a/src/ontogpt/evaluation/maxo/__init__.py b/src/ontogpt/evaluation/maxo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/ontogpt/evaluation/maxo/eval_maxo.py b/src/ontogpt/evaluation/maxo/eval_maxo.py new file mode 100644 index 000000000..b4709141a --- /dev/null +++ b/src/ontogpt/evaluation/maxo/eval_maxo.py @@ -0,0 +1,319 @@ +""" +MAXO annotation evaluation. + +Annotations in the Medical Action Ontology (MAXO) +may be between a MAXO term and a phenotype, +denoted with a Human Phenotype Ontology (HP) term, +or between a MAXO term and a disease, +denoted with a Mondo Disease Ontology (MONDO) term. + +See: +https://github.com/monarch-initiative/maxo-annotations/ + +This evaluation uses the maxo template to extract +annotations from the text provided in each test case +(see the test_cases directory) and compares them to +the annotations accompanying the case. The existing +annotations are from the set of manual annotations +in the above repository +(see +https://github.com/monarch-initiative/maxo-annotations/ +blob/master/annotations/maxo-annotations.tsv) +though the annotations are not considered disease-specific +for the purposes of this evaluation. + +Note that this evaluation does not consider predicates, +only extraction of any relation involving a grounded MAXO +action and a grounded HP phenotype. + +""" + +import logging +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from random import shuffle +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple + +import yaml +from oaklib import BasicOntologyInterface, get_adapter +from pydantic import BaseModel + +from ontogpt.engines.knowledge_engine import chunk_text +from ontogpt.engines.spires_engine import SPIRESEngine +from ontogpt.evaluation.evaluation_engine import SimilarityScore, SPIRESEvaluationEngine +from ontogpt.templates.maxo import MaxoAnnotations, ActionToSymptomRelationship, Publication + +THIS_DIR = Path(__file__).parent +DATABASE_DIR = Path(__file__).parent / "test_cases" + +# This does not do much now but can be expanded to +# additional predicate types +RMAP = {"TREATS": "TREATS"} + +logger = logging.getLogger(__name__) + + +class PredictionRE(BaseModel): + """A prediction for a relationship extraction task.""" + + test_object: Optional[MaxoAnnotations] = None + """Source of truth to evaluate against.""" + + true_positives: Optional[List[Tuple]] = None + num_true_positives: Optional[int] = None + false_positives: Optional[List[Tuple]] = None + num_false_positives: Optional[int] = None + false_negatives: Optional[List[Tuple]] = None + num_false_negatives: Optional[int] = None + scores: Optional[Dict[str, SimilarityScore]] = None + predicted_object: Optional[MaxoAnnotations] = None + named_entities: Optional[List[Any]] = None + + def calculate_scores(self, labelers: Optional[List[BasicOntologyInterface]] = None): + self.scores = {} + + def label(x): + if labelers: + for labeler in labelers: + if type(labeler) == list: + lbl = labeler[0].label(x) + else: + lbl = labeler.label(x) + if lbl: + return f"{x} {lbl}" + return x + + def all_objects(dm: Optional[MaxoAnnotations]): + if dm is not None: + # set conversion requires lists to be flattened + # and made generic, or they become invalid + dm_flat_triples = [] + for triple in dm.triples: + if triple.subject is not None and triple.object is not None: + if type(triple.object) == list: + flat_object = triple.object[0] + else: + flat_object = triple.object + triple_flat = { + "subject": triple.subject, + # "predicate": triple.predicate, + "object": flat_object, + } + + dm_flat_triples.append(triple_flat) + return list( + set(link["subject"] for link in dm_flat_triples) + | set(link["object"] for link in dm_flat_triples) + ) + else: + return list() + + def pairs(dm: MaxoAnnotations) -> Set: + if dm.triples is not None: + return set( + (label(link.subject), label(link.object[0])) + for link in dm.triples + if link.object is not None + ) + else: + return set() + + self.scores["similarity"] = SimilarityScore.from_set( + all_objects(self.test_object), + all_objects(self.predicted_object), + labelers=labelers, + ) + if self.predicted_object is not None: + pred_pairs = pairs(self.predicted_object) + else: + pred_pairs = set() + if self.test_object is not None: + test_pairs = pairs(self.test_object) + else: + test_pairs = set() + self.true_positives = list(pred_pairs.intersection(test_pairs)) + self.false_positives = list(pred_pairs.difference(test_pairs)) + self.false_negatives = list(test_pairs.difference(pred_pairs)) + self.num_false_negatives = len(self.false_negatives) + self.num_false_positives = len(self.false_positives) + self.num_true_positives = len(self.true_positives) + + +class EvaluationObjectSetRE(BaseModel): + """A result of predicting relation extractions.""" + + precision: float = 0 + recall: float = 0 + f1: float = 0 + + training: Optional[List[MaxoAnnotations]] = None + predictions: Optional[List[PredictionRE]] = None + test: Optional[List[MaxoAnnotations]] = None + + +@dataclass +class EvalMAXO(SPIRESEvaluationEngine): + subject_prefix = "MAXO" + object_prefix = "HP" + + def __post_init__(self): + self.extractor = SPIRESEngine(template="maxo", model=self.model) + + def load_test_cases(self) -> Iterable[MaxoAnnotations]: + return self.load_cases(DATABASE_DIR) + + # Load cases from YAML + # One-to-many relationships are parsed as one-to-one, as we + # may only match part of the set. + # They still need to be list members to validate, though. + def load_cases(self, path: Path) -> Iterable[MaxoAnnotations]: + logger.info(f"Loading {path}") + + triples_by_text = defaultdict(list) + + for casefile in path.glob("*.yaml"): + logger.info(f"Loading case {casefile}") + with open(casefile, "r") as file: + doc = yaml.safe_load(file) + input_text = doc["input_text"] + logger.debug(f"Text: {input_text}") + try: + for r in doc["extracted_object"]["action_to_symptom"]: + for object in r["object"]: + t = ActionToSymptomRelationship.model_validate( + { + "subject": f"{r['subject']}", + "predicate": RMAP[r["predicate"]], + "object": [object], + } + ) + triples_by_text[input_text].append(t) + except KeyError: # some of the test cases may only have other relations + logger.info(f"Ignored {casefile} - no Action to Symptom relations") + continue + i = 0 + for input_text, triples in triples_by_text.items(): + i = i + 1 + title = input_text[:40] + pub = Publication.model_validate( + { + "id": str(i), + "title": title, + "abstract": input_text, + } + ) + logger.debug(f"Triples: {len(triples)} for Title: {title}") + yield MaxoAnnotations.model_validate({"publication": pub, "triples": triples}) + + def eval(self) -> EvaluationObjectSetRE: + """Evaluate the ability to extract relations.""" + maxo_labeler = get_adapter("sqlite:obo:maxo") + hp_labeler = get_adapter("sqlite:obo:hp") + + if self.num_tests and isinstance(self.num_tests, int): + num_test = self.num_tests + else: + num_test = 1 + ke = self.extractor + docs = list(self.load_test_cases()) + shuffle(docs) + eos = EvaluationObjectSetRE( + test=docs[:num_test], + training=[], + predictions=[], + ) + n = 1 + for doc in eos.test: + logger.info(f"Iteration {n} of {num_test}") + n += 1 + logger.info(doc) + text = doc.publication.abstract + predicted_obj = None + named_entities: List[str] = [] # This stores the NEs for the whole document + ke.named_entities = [] # This stores the NEs the extractor knows about + + if self.chunking: + text_list = chunk_text(text) + else: + text_list = iter([text]) + + for chunked_text in text_list: + extraction = ke.extract_from_text(chunked_text) + + if extraction.extracted_object is not None: + # Process all multi-object triples to 1 to 1 triples + # so they may be more directly compared + for extracted_triple in extraction.extracted_object.action_to_symptom: + new_triple = extracted_triple + for object in extracted_triple.object: + new_triple.object = [object] + extraction.extracted_object.triples.append(new_triple) + + logger.info( + f"{len(extraction.extracted_object.triples)} triples from: {chunked_text}" + ) + if not predicted_obj and extraction.extracted_object is not None: + predicted_obj = extraction.extracted_object + else: + if predicted_obj is not None and extraction.extracted_object is not None: + predicted_obj.triples.extend(extraction.extracted_object.triples) + logger.info( + f"{len(predicted_obj.triples)} total triples, after concatenation" + ) + logger.debug(f"concatenated triples: {predicted_obj.triples}") + if extraction.named_entities is not None: + for entity in extraction.named_entities: + if entity not in named_entities: + named_entities.append(entity) + + def included(t: ActionToSymptomRelationship): + if not [var for var in (t.subject, t.object, t.predicate) if var is None]: + return ( + t + and t.subject + and t.object + and t.subject.startswith("MAXO:") + and t.object[0].startswith("HP:") + # and t.predicate.lower() == "treats" + ) + else: + return False + + if predicted_obj is not None: + predicted_obj.triples = [t for t in predicted_obj.triples if included(t)] + duplicate_triples = [] + unique_predicted_triples = [ + t + for t in predicted_obj.triples + if t not in duplicate_triples + and not duplicate_triples.append(t) # type: ignore + ] + predicted_obj.triples = unique_predicted_triples + logger.info(f"{len(predicted_obj.triples)} filtered triples") + pred = PredictionRE( + predicted_object=predicted_obj, test_object=doc, named_entities=named_entities + ) + named_entities.clear() + logger.info("PRED") + logger.info(yaml.dump(data=pred.model_dump())) + logger.info("Calc scores") + pred.calculate_scores(labelers=[maxo_labeler, hp_labeler]) + logger.info(yaml.dump(data=pred.model_dump())) + eos.predictions.append(pred) + self.calc_stats(eos) + return eos + + def calc_stats(self, eos: EvaluationObjectSetRE): + num_true_positives = sum(p.num_true_positives for p in eos.predictions) + num_false_positives = sum(p.num_false_positives for p in eos.predictions) + num_false_negatives = sum(p.num_false_negatives for p in eos.predictions) + if num_true_positives + num_false_positives == 0: + logger.warning("No true positives or false positives") + return + eos.precision = num_true_positives / (num_true_positives + num_false_positives) + eos.recall = num_true_positives / (num_true_positives + num_false_negatives) + if eos.precision + eos.recall == 0: + logger.warning("No precision or recall") + return + eos.f1 = 2 * (eos.precision * eos.recall) / (eos.precision + eos.recall) diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_12958596.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_12958596.yaml new file mode 100644 index 000000000..d1596ff3a --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_12958596.yaml @@ -0,0 +1,25 @@ +--- +input_text: >- + Activation-induced cytidine deaminase (AID) is a 'master molecule' in immunoglobulin (Ig) + class-switch recombination (CSR) and somatic hypermutation (SHM) generation, AID deficiencies are + associated with hyper-IgM phenotypes in humans and mice. We show here that recessive mutations of + the gene encoding uracil-DNA glycosylase (UNG) are associated with profound impairment in CSR at a + DNA precleavage step and with a partial disturbance of the SHM pattern in three patients with hyper-IgM + syndrome. Together with the finding that nuclear UNG expression was induced in activated B cells, + these data support a model of CSR and SHM in which AID deaminates cytosine into uracil in targeted DNA + (immunoglobulin switch or variable regions), followed by uracil removal by UNG. + +named_entities: + - id: MONDO:0011971 + label: Immunodeficiency With Hyper-igm, Type 5 + - id: MAXO:0001480 + label: immunoglobulin infusion therapy + - id: HP:0004315 + label: Decreased circulating IgG level + +extracted_object: + action_to_disease: + - subject: MAXO:0001480 + predicate: TREATS + object: + - MONDO:0011971 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301368.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301368.yaml new file mode 100644 index 000000000..3e22aba8a --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301368.yaml @@ -0,0 +1,25 @@ +--- +input_text: >- + "Saethre-Chotzen Syndrome + Management: Treatment of manifestations: Ongoing management by an established + craniofacial team which may include cranioplasty in the first year of life + and midface surgery in childhood as needed for dental malocclusion, + swallowing difficulties, and respiratory problems. If a cleft palate is + present, surgical repair usually follows cranioplasty. As needed: orthodontic + treatment and/or orthognathic surgery at the completion of facial growth; + developmental intervention; routine treatment of hearing loss; ophthalmologic + evaluation and, if ptosis is present, intervention to prevent amblyopia, with + surgical repair during early childhood as needed." +named_entities: + - id: MONDO:0007042 + label: Saethre-chotzen Syndrome + - id: MAXO:0000004 + label: surgical procedure + - id: HP:0000175 + label: cleft palate +extracted_object: + action_to_symptom: + - subject: MAXO:0000004 + predicate: TREATS + object: + - HP:0000175 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301450.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301450.yaml new file mode 100644 index 000000000..4f2517499 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301450.yaml @@ -0,0 +1,36 @@ +--- +input_text: >- + " + Alagille Syndrome + Table 5. + Treatment of Liver Manifestations in Individuals with Alagille Syndrome + Manifestation/Concern Treatment Considerations/Other + Pruritus & xanthomas Choloretic agents (ursodeoxycholic acid) & other + medications (cholestyramine, rifampin, naltrexone) Combination of + therapies often required; biliary diversion may be performed for severe + pruritus refractory to medical therapy. + End-stage liver disease Liver transplantation + Poor growth Optimized nutrition; replacement of fat-soluble vitamins + as needed Nasogastric feeds or gastrostomy tube may be required to + maintain caloric intake." +named_entities: + - id: MONDO:0016862 + label: Alagille Syndrome + - id: MAXO:0001135 + label: replacement of fat-soluble vitamins + - id: MAXO:0001175 + label: Liver transplantation + - id: HP:0001508 + label: Poor growth + - id: HP:0001399 + label: End-stage liver disease +extracted_object: + action_to_symptom: + - subject: MAXO:0001135 + predicate: TREATS + object: + - HP:0001508 + - subject: MAXO:0001175 + predicate: TREATS + object: + - HP:0001399 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301519.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301519.yaml new file mode 100644 index 000000000..5639bc4e6 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301519.yaml @@ -0,0 +1,49 @@ +--- +input_text: >- + + APC-associated polyposis conditions include (classic or attenuated) familial adenomatous polyposis (FAP) and gastric adenocarcinoma + and proximal polyposis of the stomach (GAPPS). + Resection of all colonic polyps larger than 5 mm found on colonic surveillance. There is an absolute indication for colectomy when + CRC is diagnosed or suspected, or when there are significant symptoms (e.g., bleeding, obstruction). Relative indications for colectomy + include presence of multiple adenomas larger than 10 mm that cannot be reasonably removed endoscopically, a significant increase in adenoma + number between surveillance exams, presence of adenomas with high-grade dysplasia, or inability to adequately survey the colon (e.g., due to + innumerable diminutive adenomas or limited access to or compliance with colonoscopy). Endoscopic or surgical removal of duodenal adenomas is + considered if polyps exhibit villous change or severe dysplasia, exceed 1 cm in diameter, or exhibit advanced stage using Spigelman scoring system. + Gastrectomy is considered if advanced gastric neoplasia is found on upper endoscopy. Osteomas may be removed for cosmetic reasons. + Desmoid tumors may be surgically excised or treated with nonsteroidal anti-inflammatory drugs (NSAIDs), anti-estrogens, + cytotoxic chemotherapy, and/or radiation if at advanced stage. Standard treatment when needed for adrenal masses and thyroid carcinoma. + Several studies have shown that NSAIDs and erlotinib have caused regression of adenomas and decreased the polyp burden in individuals with FAP, + though there are currently no FDA-approved chemopreventive agents for FAP, given an unclear effect on subsequent cancer risk. + +named_entities: + - id: MONDO:0021056 + label: denomatous Polyposis Coli + - id: MAXO:0000014 + label: radiation therapy + - id: HP:0100245 + label: Desmoid tumors + - id: MAXO:0000646 + label: cancer chemotherapy + - id: MAXO:0000635 + label: anti-estrogen agent therapy + - id: MAXO:0000221 + label: nonsteroidal anti-inflammatory agent therapy + +extracted_object: + action_to_symptom: + - subject: MAXO:0000014 + predicate: TREATS + object: + - HP:0100245 + - subject: MAXO:0000646 + predicate: TREATS + object: + - HP:0100245 + - subject: MAXO:0000635 + predicate: TREATS + object: + - HP:0100245 + - subject: MAXO:0000221 + predicate: TREATS + object: + - HP:0100245 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301527.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301527.yaml new file mode 100644 index 000000000..857e34af6 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301527.yaml @@ -0,0 +1,35 @@ +--- +input_text: >- + " + Adult Refsum disease (ARD is associated with elevated plasma phytanic acid levels, late childhood-onset (or later) retinitis pigmentosa, + and variable combinations of anosmia, polyneuropathy, deafness, ataxia, and ichthyosis. + Plasmapheresis or lipid apheresis to decrease phytanic acid levels is used only for acute arrhythmias or extreme weakness. + Dietary restriction of phytanic acid intake helps resolve ichthyosis, sensory neuropathy, and ataxia. A high-calorie diet + and avoidance of fasting prevent mobilization of phytanic acid stored in adipose tissue into the plasma. Hypercaloric parenteral + infusions are required during periods of severe illness or postoperatively. Supportive treatment includes hydrating creams for + ichthyosis and drugs for cardiac arrhythmias and cardiomyopathy + " + +named_entities: + - id: MAXO:0010119 + label: dietary phytanic acid intake avoidance + - id: HP:0010571 + label: Elevated circulating phytanic acid concentration + - id: HP:0008064 + label: Ichthyosis + - id: MONDO:0009958 + label: Refsum Disease + +extracted_object: + action_to_disease: + - subject: MAXO:0010119 + predicate: TREATS + object: + - MONDO:0009958 + + action_to_symptom: + - subject: MAXO:0010119 + predicate: TREATS + object: + - HP:0008064 + - HP:0010571 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301572.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301572.yaml new file mode 100644 index 000000000..752634695 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301572.yaml @@ -0,0 +1,27 @@ +--- +input_text: >- + "Bloom syndrome (BSyn) is characterized by severe pre- and postnatal growth deficiency, immune abnormalities, sensitivity to sunlight, + insulin resistance, and a high risk for many cancers that occur at an early age. + Agents/circumstances to avoid: Sun exposure to the face and other exposed skin, particularly in infancy and early childhood, + should be avoided. Exposure to ionizing radiation should be minimized. Dose reductions and shortened courses of chemotherapy + when needed to avoid significant side effects and toxicity (including secondary malignancies). Alkylating agents and radiation + therapy are considered high risk and are avoided when possible in those with BSyn." + +named_entities: + - id: MONDO:0008876 + label: Bloom Syndrome + - id: MAXO:0000054 + label: radiation exposure avoidance + - id: MAXO:0000055 + label: sunlight avoidance + +extracted_object: + action_to_disease: + - subject: MAXO:0000054 + predicate: PREVENTS + object: + - MONDO:0008876 + - subject: MAXO:0000055 + predicate: PREVENTS + object: + - MONDO:0008876 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301644.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301644.yaml new file mode 100644 index 000000000..4943c2513 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301644.yaml @@ -0,0 +1,49 @@ +--- +input_text: >- + "Spinocerebellar Ataxia Type 20 + Evaluations Following Initial Diagnosis + To establish the extent of disease in an individual diagnosed with + spinocerebellar ataxia type 20 (SCA20), the evaluations summarized in this + section (if not performed as part of the evaluation that led to the diagnosis) + are recommended: + Careful clinical and neurologic evaluation + Speech assessment + Consultation with a clinical geneticist and/or genetic counselor + Treatment of Manifestations + Affected persons should be followed by a neurologist with consultation from + physiatrists and physical and occupational therapists. + Although neither exercise nor physical therapy has been shown to stem the + progression of incoordination or muscle weakness, individuals should maintain + activity. Canes and walkers help prevent falls. Modification of the home with + such conveniences as grab bars, raised toilet seats, and ramps to accommodate + motorized chairs may be necessary. Speech therapy and communication devices + such as writing pads and computer-based devices may benefit those with + dysarthria or dysphonia. Weighted eating utensils and dressing hooks help + maintain a sense of independence. Weight control is important because obesity + can exacerbate difficulties with ambulation and mobility. When dysphagia + becomes troublesome, videofluoroscopic swallow evaluation can identify the + consistency of food least likely to trigger aspiration." +named_entities: + - id: MONDO:0012098 + label: spinocerebellar ataxia type 20 + - id: MAXO:0000930 + label: Speech therapy + - id: MAXO:0000011 + label: physical therapy + - id: HP:0001260 + label: dysarthria + - id: HP:0001618 + label: dysphonia + - id: HP:0002066 + label: difficulties with ambulation +extracted_object: + action_to_symptom: + - subject: MAXO:0000930 + predicate: TREATS + object: + - HP:0001260 + - HP:0001618 + - subject: MAXO:0000011 + predicate: TREATS + object: + - HP:0002066 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301675.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301675.yaml new file mode 100644 index 000000000..f68a9779c --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301675.yaml @@ -0,0 +1,40 @@ +--- +input_text: >- + "McKusick-Kaufman Syndrome + Clinical characteristics: McKusick-Kaufman syndrome (MKS) is characterized by + the combination of postaxial polydactyly (PAP), congenital heart disease + (CHD), and hydrometrocolpos (HMC) in females and genital malformations in + males (most commonly hypospadias, cryptorchidism, and chordee). HMC in infants + usually presents as a large cystic abdominal mass arising out of the pelvis, + caused by dilatation of the vagina and uterus as a result of the accumulation + of cervical secretions from maternal estrogen stimulation. HMC can be caused + by failure of the distal third of the vagina to develop (vaginal agenesis), + a transverse vaginal membrane, or an imperforate hymen. PAP is the presence of + additional digits on the ulnar side of the hand and the fibular side of the + foot. A variety of congenital heart defects have been reported including + atrioventricular canal, atrial septal defect, ventricular septal defect, + or a complex congenital heart malformation. Diagnosis/testing: The clinical + diagnosis of MKS can be established in a proband based on clinical diagnostic + criteria of HMC and PAP in the absence of clinical or molecular genetic + findings suggestive of an alternative diagnosis. The molecular diagnosis can + be established in proband with suggestive findings and biallelic pathogenic + variants in MKKS identified by molecular genetic testing. However, care must + be taken to ensure that the proband does not have Bardet-Biedl syndrome, an + allelic condition with considerable clinical overlap and age-dependent + features including retinal dystrophy, obesity, and intellectual disability. + Management: Treatment of manifestations: Surgical repair of the obstruction + causing HMC and drainage of the accumulated fluid. Treatment for polydactyly + and congenital heart defects and any other anomalies is standard." +named_entities: + - id: MONDO:0009367 + label: McKusick-Kaufman syndrome + - id: MAXO:0000004 + label: surgical repair + - id: HP:0030010 + label: hydrometrocolpos +extracted_object: + action_to_symptom: + - subject: MAXO:0000004 + predicate: TREATS + object: + - HP:0030010 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301765.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301765.yaml new file mode 100644 index 000000000..e70652d1e --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_20301765.yaml @@ -0,0 +1,25 @@ +--- +input_text: >- + Von Willebrand disease (VWD), a congenital bleeding disorder caused by deficient or defective plasma von Willebrand factor (VWF), + may only become apparent on hemostatic challenge, and bleeding history may become more apparent with increasing age. + Affected individuals benefit from care in a comprehensive bleeding disorders program. + The two main treatments are desmopressin (1-deamino-8-D-arginine vasopressin [DDAVP]) + and clotting factor concentrates (recombinant and plasma-derived) containing both VWF + and FVIII (VWF/FVIII concentrate). Indirect hemostatic treatments that can reduce symptoms + include fibrinolytic inhibitors; hormones for menorrhagia are also beneficial. + Individuals with VWD should receive prompt treatment for severe bleeding episodes. + Pregnant women with VWD are at increased risk for bleeding complications at or following childbirth. +named_entities: + - id: MAXO:0000446 + label: desmopressin agent therapy + - id: HP:0012147 + label: Reduced quantity of Von Willebrand factor + - id: MONDO:0008668 + label: Von Willebrand Disease, Type 1 + +extracted_object: + action_to_disease: + - subject: MAXO:0000446 + predicate: TREATS + object: + - MONDO:0008668 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_2063868.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_2063868.yaml new file mode 100644 index 000000000..9f40d4f38 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_2063868.yaml @@ -0,0 +1,24 @@ +--- +input_text: >- + delta-Aminolevulinate dehydratase deficient porphyria, a recently recognized inborn error of heme biosynthesis, + results from the markedly deficient activity of the heme biosynthetic enzyme, delta-aminolevulinate dehydratase (ALA-D). + The four homozygotes described to date with this disorder have remarkably distinct phenotypes, ranging from a severely + affected infant with failure to thrive to an essentially asymptomatic 68-year-old male. To investigate the molecular nature + of the lesions causing the severe infantile-onset form, total RNA was isolated from cultured lymphoblasts of the affected homozygote, + RNA was reverse-transcribed to cDNA, and the 990-bp ALA-D-coding region was amplified by the PCR. Heterozygosity for an RsaI RFLP within + the ALA-dehydratase-coding region permitted identification of the paternal and maternal mutant alleles prior to sequencing. + The maternal mutation (designated G133R), a G-to-A transition of nucleotide 397, predicted a glycine-to-arginine substitution at residue + 133 at the carboxyl end of the highly conserved zinc-binding site in the enzyme subunit. + +named_entities: + - id: MAXO:0001175 + label: liver transplantation + - id: MONDO:0013000 + label: Porphyria, Acute Hepatic + +extracted_object: + action_to_disease: + - subject: MAXO:0001175 + predicate: TREATS + object: + - MONDO:0013000 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_26110198.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_26110198.yaml new file mode 100644 index 000000000..bf4255ad4 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_26110198.yaml @@ -0,0 +1,41 @@ +--- +input_text: >- + "Prolidase Deficiency + Treatment of manifestations: Skin ulcers may require treatment by a wound + care specialist; topical proline (often 5%) or topical 5% proline-5% glycine + ointment applied with dressing changes has been successful in some affected + individuals. Standard treatment for developmental delay / intellectual + disability, seizures, infections, reactive airways disease / pulmonary + hypertension, SLE-like features, hemophagocytic lymphohistiocytosis, mast + cell activation, osteopenia, dental anomalies, and refractive errors. Anemia + and thrombocytopenia rarely require treatment, but packed red blood cell or + platelet transfusions may be considered in those with severe anemia or + thrombocytopenia, respectively." +named_entities: + - id: MONDO:0008221 + label: Prolidase Deficiency + - id: MAXO:0000061 + label: antibacterial agent therapy + - id: HP:0200042 + label: Skin ulcer + - id: MAXO:0000756 + label: blood transfusion + - id: HP:0001903 + label: anemia + - id: MAXO:0010020 + label: amino acid supplementation +extracted_object: + action_to_symptom: + - subject: MAXO:0000061 + predicate: TREATS + object: + - HP:0200042 + - subject: MAXO:0000756 + predicate: TREATS + object: + - HP:0001903 + action_to_disease: + - subject: MAXO:0010020 + predicate: TREATS + object: + - MONDO:0008221 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_27077170.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_27077170.yaml new file mode 100644 index 000000000..1b1687f67 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_27077170.yaml @@ -0,0 +1,26 @@ +--- +input_text: >- + "Adams-Oliver Syndrome – RETIRED CHAPTER, FOR HISTORICAL + REFERENCE ONLY + Adams-Oliver syndrome (AOS) is characterized by aplasia cutis + congenita (ACC) of the scalp and terminal transverse limb + defects (TTLD). + Management. Treatment of manifestations: ACC. Care by a + pediatric dermatologist and/or plastic surgeon depending on + severity. Goals of non-operative therapy are to prevent infection + and promote healing. Large and/or deep lesions with calvarial + involvement require acute care and may eventually also require + reconstruction by a neurosurgeon." +named_entities: + - id: MONDO:0013895 + label: Adams-oliver Syndrome 3 + - id: MAXO:0000946 + label: neurosurgical procedure + - id: HP:0001057 + label: aplasia cutis congenita +extracted_object: + action_to_symptom: + - subject: MAXO:0000946 + predicate: TREATS + object: + - HP:0001057 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_28406602.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_28406602.yaml new file mode 100644 index 000000000..dae100b0b --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_28406602.yaml @@ -0,0 +1,72 @@ +--- +input_text: >- + "Myhre Syndrome + The diagnosis of Myhre syndrome is established in a proband + with characteristic clinical findings and a heterozygous + pathogenic (or likely pathogenic) variant in SMAD4 detected + by molecular genetic testing. Management. Treatment of + manifestations: Feeding therapy with a low threshold for a + clinical and/or radiographic swallowing study; referral to + nutrition for poor weight gain or obesity; medical therapy + for systemic and/or pulmonary hypertension; long-term + tracheostomy may be required for those with complete or + recurrent tracheal stenosis; aggressive medical management + for constipation; physical therapy to keep joints mobile; + hearing aids may be helpful for those with hearing loss; + some keloids can be treated with intralesional steroids + with minimal invasiveness for lesion removal; and standard + treatment for orofacial clefting / velopharyngeal + insufficiency, congenital heart defects / pericardial disease, + restrictive lung disease, gastrointestinal stenosis, + developmental delay / intellectual disability, refractive + errors / strabismus / cataracts, persistent middle ear + effusions, immunodeficiency, diabetes mellitus, and + pubertal/menstrual irregularities. + Orofacial clefting /Velopharyngeal insufficiency + Standard treatment, ideally by craniofacial team + Multidisciplinary teams may incl surgical team (craniofacial + surgeon), clinical geneticist, otolaryngologist, pediatrician, + radiologist, psychologist, multiple dental specialists, + audiologist, speech therapist, & social worker. + Restrictive lung disease Symptomatic & standard treatment + per pulmonologist Oxygen supplementation as necessary" +named_entities: + - id: MONDO:0007688 + label: Myhre Syndrome + - id: MAXO:0000011 + label: physical therapy + - id: HP:0001376 + label: Limitation of joint mobility + - id: HP:0001387 + label: Joint stiffness + - id: MAXO:0000066 + label: Oxygen supplementation + - id: HP:0002093 + label: restrictive lung disease + - id: MAXO:0000504 + label: tracheostomy + - id: HP:0004894 + label: tracheal stenosis + - id: MAXO:0000930 + label: speech therapist + - id: HP:0001608 + label: Abnormality of the voice +extracted_object: + action_to_symptom: + - subject: MAXO:0000011 + predicate: TREATS + object: + - HP:0001376 + - HP:0001387 + - subject: MAXO:0000066 + predicate: TREATS + object: + - HP:0002093 + - subject: MAXO:0000504 + predicate: TREATS + object: + - HP:0004894 + - subject: MAXO:0000930 + predicate: TREATS + object: + - HP:0001608 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_29478819.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_29478819.yaml new file mode 100644 index 000000000..a297935dd --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_29478819.yaml @@ -0,0 +1,31 @@ +--- +input_text: >- + To overcome these challenges, a novel Blind Start design was utilized in a study of vestronidase alfa + in mucopolysaccharidosis VII (Sly syndrome), an ultra-rare lysosomal disease, that demonstrates the strengths of + this approach in a challenging drug-development setting. Twelve subjects were randomized to 1 of 4 blinded groups, + each crossing over to active treatment in a blinded fashion at different timepoints with efficacy analysis comparing + the last assessment before cross over to after 24 weeks of treatment. Study assessments included Percentage change from baseline + in urinary GAG (uGAG); a Multi-Domain Responder Index (MDRI) using prespecified minimal important differences (6-Minute Walk Test, + Forced Vital Capacity, shoulder flexion, visual acuity, and Bruininks-Oseretsky Test of Motor Proficiency); fatigue as assessed by + the Pediatric Quality of Life Inventory™ Multidimensional Fatigue Scale; and safety. + We report a novel approach used in development of vestronidase alfa (recombinant human β-glucuronidase or rhGUS), + an enzyme replacement therapy (ERT) for MPS VII, that included a unique Phase 3 Blind Start study design with a + variable placebo run-in period masking the start of vestronidase alfa, and allowing all subjects to be assessed for efficacy. + Another unique feature was the use of a Multi-Domain Responder Index (MDRI) that allowed translation of multiple clinical measures + into a combination responder endpoint assessed using minimally important difference (MID) thresholds without penalizing for + non-assessable endpoints in a heterogeneous patient population. + +named_entities: + - id: MONDO:0009662 + label: Mucopolysaccharidosis Type Vii + - id: MAXO:0000933 + label: enzyme replacement or supplementation therapy + - id: HP:0003541 + label: Urinary glycosaminoglycan excretion + +extracted_object: + action_to_symptom: + - subject: MAXO:0000933 + predicate: TREATS + object: + - HP:0003541 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_30488337.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_30488337.yaml new file mode 100644 index 000000000..af613db6b --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_30488337.yaml @@ -0,0 +1,26 @@ +--- +input_text: >- + "Canaloplasty in the Treatment of Open-Angle Glaucoma: A Review of Patient Selection and Outcomes + Canaloplasty is a relatively new non-penetrating surgery for the reduction of intraocular pressure + in patients affected by glaucoma. The technique uses a microcatheter to perform a 360 º cannulation of + Schlemm's canal and leaves in place a tension suture providing an inward distension. It aims to restore + the physiological outflow pathways of the aqueous humour and is independent of external wound healing + Several studies have shown that canaloplasty is effective in reducing intraocular pressure and has a low rate + of complications, especially compared with trabeculectomy, the gold standard for glaucoma surgery. Currently, + canaloplasty is indicated in patients with open-angle glaucoma, having a mild to moderate disease, and the combination + with cataract phacoemulsification may provide further intraocular pressure reduction." + +named_entities: + - id: MAXO:0000959 + label: canaloplasty + - id: HP:0012108 + label: Open angle glaucoma + - id: MONDO:0007665 + label: Glaucoma, Primary Open Angle + +extracted_object: + action_to_disease: + - subject: MAXO:0000959 + predicate: TREATS + object: + - MONDO:0007665 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_36977302.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_36977302.yaml new file mode 100644 index 000000000..582a17d19 --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_36977302.yaml @@ -0,0 +1,25 @@ +--- +input_text: >- + Corneal dystrophies are a group of non-inflammatory inherited disorders of the cornea. This review considers treatment options for epithelial-stromal + and stromal corneal dystrophies namely Reis-Bücklers, Thiel-Behnke, lattice, Avellino, granular, macular and Schnyder corneal dystrophies. + Where there is visual reduction, treatment options may include either phototherapeutic keratectomy (PTK) or corneal transplantation. + Due to the anterior location of the deposits in Reis-Bücklers and Thiel-Behnke dystrophies, PTK is considered the treatment of choice. + For lattice, Avellino, granular and macular corneal dystrophies, PTK provides temporary visual improvement; however, with recurrences, + repeat PTK or a corneal transplant would be needed. For Schnyder dystrophy, should treatment be required, PTK may be the preferred option + due to the potential for recurrence of the disease in corneal transplantation. This review discusses the literature and evidence base for + the treatment of corneal dystrophies in terms of visual outcomes and recurrence rate. + +named_entities: + - id: MONDO:0012043 + label: Corneal Dystrophy, Reis-bucklers Type + - id: MAXO:0010034 + label: corneal transplantation + - id: HP:0001131 + label: Corneal dystrophy + +extracted_object: + action_to_symptom: + - subject: MAXO:0010034 + predicate: TREATS + object: + - HP:0001131 diff --git a/src/ontogpt/evaluation/maxo/test_cases/maxo_case_9543069.yaml b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_9543069.yaml new file mode 100644 index 000000000..3ceb7ec3d --- /dev/null +++ b/src/ontogpt/evaluation/maxo/test_cases/maxo_case_9543069.yaml @@ -0,0 +1,27 @@ +--- +input_text: >- + A 12-year-old girl with Sly disease (mucopolysaccharidosis VII; beta-glucuronidase deficiency), who is homozygous for the A619V mutation, + had a successful allogeneic BMT, donored by an HLA-identical unrelated female to replace the deficient enzyme. Within 5 months after BMT, + the enzyme activity of the recipient's lymphocytes increased to normal range. No signs of acute or chronic GVHD were observed. + For the successive 31 months post-BMT, beta-glucuronidase activity in her lymphocytes was maintained at almost normal levels and excretion + of glycosaminoglycans in the urine was greatly diminished. Ultrastructural findings demonstrated no abnormal vacuoles and inclusion bodies + in the cytoplasm of her rectal mucosal cells. Coincident with the restoration of the enzyme activity, clinical improvement was dramatic. + Especially notable were improvements in motor function. The patient was able to walk alone for a long time without aid, + and she even became able to ride a bicycle and take a bath. In addition, recurrent infections of the upper respiratory tract and + the middle ears decreased in frequency and severity, and dyspnea on exertion, severe snoring and vertigo have substantially improved. + Thus, allogeneic BMT in this patient produced a better quality of life and provided a more promising outlook. + +named_entities: + - id: MONDO:0009662 + label: Mucopolysaccharidosis Type Vii + - id: MAXO:0001479 + label: allogeneic hematopoietic stem cell transplantation + - id: HP:0003541 + label: Urinary glycosaminoglycan excretion + +extracted_object: + action_to_symptom: + - subject: MAXO:0001479 + predicate: TREATS + object: + - HP:0003541 diff --git a/src/ontogpt/evaluation/resolver.py b/src/ontogpt/evaluation/resolver.py index 38a5b714e..023035394 100644 --- a/src/ontogpt/evaluation/resolver.py +++ b/src/ontogpt/evaluation/resolver.py @@ -5,9 +5,10 @@ from ontogpt.evaluation.ctd.eval_ctd import EvalCTD from ontogpt.evaluation.ctd.eval_ctd_ner import EvalCTDNER +from ontogpt.evaluation.maxo.eval_maxo import EvalMAXO from ontogpt.evaluation.evaluation_engine import SPIRESEvaluationEngine -resolver = ClassResolver([EvalCTD, EvalCTDNER], base=SPIRESEvaluationEngine) +resolver = ClassResolver([EvalCTD, EvalCTDNER, EvalMAXO], base=SPIRESEvaluationEngine) def create_evaluator( diff --git a/src/ontogpt/templates/maxo.py b/src/ontogpt/templates/maxo.py index c33692516..e79d54fd7 100644 --- a/src/ontogpt/templates/maxo.py +++ b/src/ontogpt/templates/maxo.py @@ -33,15 +33,6 @@ class NullDataOptions(str, Enum): -class MaxoAnnotations(ConfiguredBaseModel): - - action: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of medical actions.""") - disease: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of diseases.""") - symptom: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of symptoms.""") - action_to_disease: Optional[List[ActionToDiseaseRelationship]] = Field(default_factory=list) - action_to_symptom: Optional[List[ActionToSymptomRelationship]] = Field(default_factory=list) - - class ExtractionResult(ConfiguredBaseModel): """ A result of extracting knowledge on text @@ -127,11 +118,32 @@ class ActionToSymptomRelationship(Triple): class TextWithTriples(ConfiguredBaseModel): + """ + A text containing one or more relations of the Triple type. + """ + publication: Optional[Publication] = Field(None) + triples: Optional[List[Triple]] = Field(default_factory=list) + + +class MaxoAnnotations(TextWithTriples): + action: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of medical actions.""") + disease: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of diseases.""") + symptom: Optional[List[str]] = Field(default_factory=list, description="""Semicolon-separated list of symptoms.""") + action_to_disease: Optional[List[ActionToDiseaseRelationship]] = Field(default_factory=list) + action_to_symptom: Optional[List[ActionToSymptomRelationship]] = Field(default_factory=list) publication: Optional[Publication] = Field(None) triples: Optional[List[Triple]] = Field(default_factory=list) +class TextWithEntity(ConfiguredBaseModel): + """ + A text containing one or more instances of a single type of entity. + """ + publication: Optional[Publication] = Field(None) + entities: Optional[List[str]] = Field(default_factory=list) + + class RelationshipType(NamedEntity): id: str = Field(..., description="""A unique identifier for the named entity""") @@ -157,7 +169,6 @@ class AnnotatorResult(ConfiguredBaseModel): # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model -MaxoAnnotations.model_rebuild() ExtractionResult.model_rebuild() NamedEntity.model_rebuild() Action.model_rebuild() @@ -168,6 +179,8 @@ class AnnotatorResult(ConfiguredBaseModel): ActionToDiseaseRelationship.model_rebuild() ActionToSymptomRelationship.model_rebuild() TextWithTriples.model_rebuild() +MaxoAnnotations.model_rebuild() +TextWithEntity.model_rebuild() RelationshipType.model_rebuild() Publication.model_rebuild() AnnotatorResult.model_rebuild() diff --git a/src/ontogpt/templates/maxo.yaml b/src/ontogpt/templates/maxo.yaml index 282ff6fe5..944547db9 100644 --- a/src/ontogpt/templates/maxo.yaml +++ b/src/ontogpt/templates/maxo.yaml @@ -27,6 +27,7 @@ classes: MaxoAnnotations: tree_root: true + is_a: TextWithTriples attributes: action: annotations: @@ -56,7 +57,7 @@ classes: action_to_symptom: annotations: prompt: >- - A triple representing a relationship between a medical action + A triple representing a relationship between a medical action (A clinically prescribed procedure, therapy, intervention, or recommendation) and a symptom, for example, a chest X-ray IS USED TO DIAGNOSE pleural effusion.