Skip to content

Commit

Permalink
Revise MIC tempalate to extract nutrient to disease rels
Browse files Browse the repository at this point in the history
  • Loading branch information
caufieldjh committed Dec 18, 2024
1 parent af485d7 commit 880f6f7
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 83 deletions.
107 changes: 53 additions & 54 deletions src/ontogpt/templates/mic.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,18 @@ def __contains__(self, key:str) -> bool:
return key in self.root


linkml_meta = LinkMLMeta({'default_prefix': 'micronutrient',
linkml_meta = LinkMLMeta({'default_prefix': 'mic',
'default_range': 'string',
'description': 'A template for micronutrient information from text, including '
'its participation in biochemical pathways and relationships '
'to genes and diseases.',
'id': 'http://w3id.org/ontogpt/micronutrient',
'to genes and diseases. Intended for use with the '
'Micronutrient Information Center, a resource curated and '
'managed by the Linus Pauling Institute at Oregon State '
'University.',
'id': 'http://w3id.org/ontogpt/mic',
'imports': ['linkml:types', 'core'],
'license': 'https://creativecommons.org/publicdomain/zero/1.0/',
'name': 'micronutrient',
'name': 'mic',
'prefixes': {'GO': {'prefix_prefix': 'GO',
'prefix_reference': 'http://purl.obolibrary.org/obo/GO_'},
'chebi': {'prefix_prefix': 'chebi',
Expand All @@ -80,12 +83,12 @@ def __contains__(self, key:str) -> bool:
'prefix_reference': 'http://purl.obolibrary.org/obo/foodon_'},
'linkml': {'prefix_prefix': 'linkml',
'prefix_reference': 'https://w3id.org/linkml/'},
'micronutrient': {'prefix_prefix': 'micronutrient',
'prefix_reference': 'http://w3id.org/ontogpt/micronutrient'},
'mic': {'prefix_prefix': 'mic',
'prefix_reference': 'http://w3id.org/ontogpt/mic'},
'rdf': {'prefix_prefix': 'rdf',
'prefix_reference': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}},
'source_file': 'src/ontogpt/templates/mic.yaml',
'title': 'Food Extraction Template'} )
'title': 'Micronutrient Information Extraction Template'} )

class NullDataOptions(str, Enum):
UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION"
Expand Down Expand Up @@ -237,10 +240,31 @@ class AnnotatorResult(ConfiguredBaseModel):


class Document(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/micronutrient', 'tree_root': True})

nutrientTerms: Optional[List[str]] = Field(None, description="""A semicolon-separated list of any names of nutrients or micronutrients, e.g., riboflavin, chromium, fiber""", json_schema_extra = { "linkml_meta": {'alias': 'nutrientTerms', 'domain_of': ['Document']} })
nutrientToPathwayRelationships: Optional[List[str]] = Field(None, description="""A semicolon-separated list of relationships between nutrients and biochemical pathways, e.g., riboflavin IS INVOLVED IN citric acid cycle""", json_schema_extra = { "linkml_meta": {'alias': 'nutrientToPathwayRelationships', 'domain_of': ['Document']} })
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/mic', 'tree_root': True})

nutrientTerms: Optional[List[str]] = Field(None, description="""A list of any names of nutrients or micronutrients.""", json_schema_extra = { "linkml_meta": {'alias': 'nutrientTerms',
'annotations': {'prompt': {'tag': 'prompt',
'value': 'A semicolon-separated list of names of '
'chemicals, nutrients, or micronutrients '
'mentioned in the input document.'},
'prompt.examples': {'tag': 'prompt.examples',
'value': 'biotin; cobalamin; iodine; '
'zinc; coenzyme Q10'}},
'domain_of': ['Document']} })
nutrientToDiseaseRelationships: Optional[List[NutrientToDiseaseRelationship]] = Field(None, description="""A list of relationships between nutrients and biochemical diseases.""", json_schema_extra = { "linkml_meta": {'alias': 'nutrientToDiseaseRelationships',
'annotations': {'prompt': {'tag': 'prompt',
'value': 'A semicolon-separated list of '
'relationships between a single nutrient '
'(including vitamins, minerals, and '
'micronutrients) and a single disease, '
'with a type of relationship connecting '
'them both. Represent the relationship as '
'triples, e.g., "Nutrient HAS '
'RELATIONSHIP WITH Disease". '
'Relationships may include TREATS, '
'PREVENTS, INCREASES RISK OF, DECREASES '
'RISK OF, or others.'}},
'domain_of': ['Document']} })
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
Expand Down Expand Up @@ -272,11 +296,12 @@ def pattern_original_spans(cls, v):

class NutrientTerm(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'sqlite:obo:foodon, sqlite:obo:chebi'},
'value': 'sqlite:obo:chebi'},
'prompt': {'tag': 'prompt',
'value': 'The name of a nutrient.'}},
'from_schema': 'http://w3id.org/ontogpt/micronutrient',
'id_prefixes': ['FOODON', 'CHEBI']})
'value': 'The name of a nutrient, including '
'vitamins and minerals.'}},
'from_schema': 'http://w3id.org/ontogpt/mic',
'id_prefixes': ['CHEBI']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
Expand Down Expand Up @@ -307,12 +332,13 @@ def pattern_original_spans(cls, v):
return v


class Pathway(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators', 'value': 'sqlite:obo:go'},
class Disease(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'sqlite:obo:mondo'},
'prompt': {'tag': 'prompt',
'value': 'The name of a biochemical pathway.'}},
'from_schema': 'http://w3id.org/ontogpt/micronutrient',
'id_prefixes': ['GO']})
'value': 'The name of a disease.'}},
'from_schema': 'http://w3id.org/ontogpt/mic',
'id_prefixes': ['MONDO']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
Expand Down Expand Up @@ -343,39 +369,12 @@ def pattern_original_spans(cls, v):
return v


class NutrientToPathwayRelationship(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/micronutrient'})
class NutrientToDiseaseRelationship(CompoundExpression):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/mic'})

nutrient: Optional[str] = Field(None, description="""The name of a nutrient.""", json_schema_extra = { "linkml_meta": {'alias': 'nutrient', 'domain_of': ['NutrientToPathwayRelationship']} })
pathway: Optional[str] = Field(None, description="""The name of a biochemical pathway.""", json_schema_extra = { "linkml_meta": {'alias': 'pathway', 'domain_of': ['NutrientToPathwayRelationship']} })
relationship: Optional[str] = Field(None, description="""The relationship between the nutrient and the pathway, for example \"IS INVOLVED IN\"""", json_schema_extra = { "linkml_meta": {'alias': 'relationship', 'domain_of': ['NutrientToPathwayRelationship']} })
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })
original_spans: Optional[List[str]] = Field(None, description="""The coordinates of the original text span from which the named entity was extracted, inclusive. For example, \"10:25\" means the span starting from the 10th character and ending with the 25th character. The first character in the text has index 0. Newlines are treated as single characters. Multivalued as there may be multiple spans for a single text.""", json_schema_extra = { "linkml_meta": {'alias': 'original_spans',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['This is determined during grounding and normalization',
'But is based on the full input text'],
'domain_of': ['NamedEntity']} })

@field_validator('original_spans')
def pattern_original_spans(cls, v):
pattern=re.compile(r"^\d+:\d+$")
if isinstance(v,list):
for element in v:
if isinstance(v, str) and not pattern.match(element):
raise ValueError(f"Invalid original_spans format: {element}")
elif isinstance(v,str):
if not pattern.match(v):
raise ValueError(f"Invalid original_spans format: {v}")
return v
nutrient: Optional[str] = Field(None, description="""The name of the nutrient defined in the triple, including vitamins and minerals.""", json_schema_extra = { "linkml_meta": {'alias': 'nutrient', 'domain_of': ['NutrientToDiseaseRelationship']} })
relationship: Optional[str] = Field(None, description="""The name of a type of relationship between the nutrient and the disease.""", json_schema_extra = { "linkml_meta": {'alias': 'relationship', 'domain_of': ['NutrientToDiseaseRelationship']} })
disease: Optional[str] = Field(None, description="""The name of the disease defined in the triple.""", json_schema_extra = { "linkml_meta": {'alias': 'disease', 'domain_of': ['NutrientToDiseaseRelationship']} })


# Model rebuild
Expand All @@ -391,6 +390,6 @@ def pattern_original_spans(cls, v):
AnnotatorResult.model_rebuild()
Document.model_rebuild()
NutrientTerm.model_rebuild()
Pathway.model_rebuild()
NutrientToPathwayRelationship.model_rebuild()
Disease.model_rebuild()
NutrientToDiseaseRelationship.model_rebuild()

78 changes: 49 additions & 29 deletions src/ontogpt/templates/mic.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
id: http://w3id.org/ontogpt/micronutrient
name: micronutrient
title: Food Extraction Template
id: http://w3id.org/ontogpt/mic
name: mic
title: Micronutrient Information Extraction Template
description: >-
A template for micronutrient information from text,
including its participation in biochemical pathways
and relationships to genes and diseases.
Intended for use with the Micronutrient Information
Center, a resource curated and managed by the Linus
Pauling Institute at Oregon State University.
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
foodon: http://purl.obolibrary.org/obo/foodon_
chebi: http://purl.obolibrary.org/obo/CHEBI_
GO: http://purl.obolibrary.org/obo/GO_
micronutrient: http://w3id.org/ontogpt/micronutrient
mic: http://w3id.org/ontogpt/mic
linkml: https://w3id.org/linkml/

default_prefix: micronutrient
default_prefix: mic
default_range: string

imports:
Expand All @@ -30,48 +33,65 @@ classes:
range: NutrientTerm
multivalued: true
description: >-
A semicolon-separated list of any names of nutrients
or micronutrients, e.g., riboflavin, chromium, fiber
nutrientToPathwayRelationships:
range: NutrientToPathwayRelationship
A list of any names of nutrients or micronutrients.
annotations:
prompt: >-
A semicolon-separated list of names of chemicals,
nutrients, or micronutrients mentioned in the
input document.
prompt.examples: >-
biotin; cobalamin; iodine; zinc; coenzyme Q10
nutrientToDiseaseRelationships:
range: NutrientToDiseaseRelationship
multivalued: true
description: >-
A semicolon-separated list of relationships between
nutrients and biochemical pathways, e.g., riboflavin
IS INVOLVED IN citric acid cycle
A list of relationships between nutrients and
biochemical diseases.
annotations:
prompt: >-
A semicolon-separated list of relationships
between a single nutrient (including vitamins,
minerals, and micronutrients) and a single
disease, with a type of relationship connecting
them both. Represent the relationship as
triples, e.g., "Nutrient HAS RELATIONSHIP WITH
Disease". Relationships may include TREATS,
PREVENTS, INCREASES RISK OF, DECREASES RISK OF,
or others.
NutrientTerm:
is_a: NamedEntity
id_prefixes:
- FOODON
- CHEBI
annotations:
annotators: sqlite:obo:foodon, sqlite:obo:chebi
annotators: sqlite:obo:chebi
prompt: >-
The name of a nutrient.
The name of a nutrient, including vitamins and
minerals.
Pathway:
Disease:
is_a: NamedEntity
id_prefixes:
- GO
- MONDO
annotations:
annotators: sqlite:obo:go
annotators: sqlite:obo:mondo
prompt: >-
The name of a biochemical pathway.
The name of a disease.
NutrientToPathwayRelationship:
is_a: NamedEntity
NutrientToDiseaseRelationship:
is_a: CompoundExpression
attributes:
nutrient:
range: NutrientTerm
description: >-
The name of a nutrient.
pathway:
range: Pathway
description: >-
The name of a biochemical pathway.
The name of the nutrient defined in the triple,
including vitamins and minerals.
relationship:
range: string
range: RelationshipType
description: >-
The name of a type of relationship between the
nutrient and the disease.
disease:
range: Disease
description: >-
The relationship between the nutrient and the pathway,
for example "IS INVOLVED IN"
The name of the disease defined in the triple.

0 comments on commit 880f6f7

Please sign in to comment.