Skip to content

Commit

Permalink
Populate has_direct_input and has_direct_output in `minerva_objec…
Browse files Browse the repository at this point in the history
…t_to_model`
  • Loading branch information
pkalita-lbl committed Aug 30, 2024
1 parent 88975ae commit a636e05
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 23 deletions.
74 changes: 51 additions & 23 deletions src/gocam/translation/minerva_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from collections import defaultdict
from dataclasses import dataclass, field
from typing import DefaultDict, Dict, Iterator, List, Optional, Set
from typing import DefaultDict, Dict, Iterator, List, Optional, Set, Tuple

import requests
import yaml
Expand All @@ -11,12 +11,13 @@
BiologicalProcessAssociation,
CausalAssociation,
CellularAnatomicalEntityAssociation,
EvidenceItem,
EnabledByAssociation,
EnabledByProteinComplexAssociation,
EnabledByGeneProductAssociation,
EnabledByProteinComplexAssociation,
EvidenceItem,
Model,
MolecularFunctionAssociation,
MoleculeAssociation,
Object,
ProvenanceInfo,
)
Expand All @@ -25,6 +26,8 @@
PART_OF = "BFO:0000050"
HAS_PART = "BFO:0000051"
OCCURS_IN = "BFO:0000066"
HAS_INPUT = "RO:0002233"
HAS_OUTPUT = "RO:0002234"

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -170,7 +173,7 @@ def minerva_object_to_model(obj: Dict) -> Model:
id2obj: Dict[str, Dict] = {}
activities: List[Activity] = []
activities_by_mf_id: DefaultDict[str, List[Activity]] = defaultdict(list)
facts_by_property = defaultdict(list)
facts_by_property: DefaultDict[str, List[Dict]] = defaultdict(list)

def _cls(obj: Dict) -> Optional[str]:
if obj.get("type", None) == "complement":
Expand Down Expand Up @@ -209,6 +212,19 @@ def _evidence_from_fact(fact: Dict) -> List[EvidenceItem]:
evs.append(ev)
return evs

def _iter_activities_by_fact_subject(
*,
fact_property: str,
) -> Iterator[Tuple[Activity, str, List[EvidenceItem]]]:
for fact in facts_by_property.get(fact_property, []):
s, o = fact["subject"], fact["object"]
if o not in individual_to_term:
logger.warning(f"Missing {o} in {individual_to_term}")
continue
for activity in activities_by_mf_id.get(s, []):
evs = _evidence_from_fact(fact)
yield activity, individual_to_term[o], evs

for individual in obj["individuals"]:
typs = [x["label"] for x in individual.get("root-type", []) if x]
typ: Optional[str] = None
Expand Down Expand Up @@ -281,27 +297,39 @@ def _evidence_from_fact(fact: Dict) -> List[EvidenceItem]:
activities.append(activity)
activities_by_mf_id[s].append(activity)

for fact in facts_by_property.get(PART_OF, []):
s, o = fact["subject"], fact["object"]
if o not in individual_to_term:
logger.warning(f"Missing {o} in {individual_to_term}")
continue
for a in activities_by_mf_id.get(s, []):
evs = _evidence_from_fact(fact)
a.part_of = BiologicalProcessAssociation(
term=individual_to_term[o], evidence=evs
)
for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=PART_OF
):
if activity.part_of is not None:
logger.warning(f"Overwriting part_of for Activity: {activity.id}")
activity.part_of = BiologicalProcessAssociation(term=term, evidence=evs)

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=OCCURS_IN
):
if activity.occurs_in is not None:
logger.warning(f"Overwriting occurs_in for Activity: {activity.id}")
activity.occurs_in = CellularAnatomicalEntityAssociation(
term=term, evidence=evs
)

for fact in facts_by_property.get(OCCURS_IN, []):
s, o = fact["subject"], fact["object"]
if o not in individual_to_term:
logger.warning(f"Missing {o} in {individual_to_term}")
continue
for a in activities_by_mf_id.get(s, []):
evs = _evidence_from_fact(fact)
a.occurs_in = CellularAnatomicalEntityAssociation(
term=individual_to_term[o], evidence=evs
for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_INPUT
):
if activity.has_direct_input is not None:
logger.warning(
f"Overwriting has_direct_input for Activity: {activity.id}"
)
activity.has_direct_input = MoleculeAssociation(term=term, evidence=evs)

for activity, term, evs in _iter_activities_by_fact_subject(
fact_property=HAS_OUTPUT
):
if activity.has_direct_output is not None:
logger.warning(
f"Overwriting has_direct_output for Activity: {activity.id}"
)
activity.has_direct_output = MoleculeAssociation(term=term, evidence=evs)

for fact_property, facts in facts_by_property.items():
for fact in facts:
Expand Down
1 change: 1 addition & 0 deletions tests/input/minerva-665912ed00002626.json

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions tests/test_translation/test_minerva_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,39 @@ def test_protein_complex():
"MGI:MGI:1929608",
"MGI:MGI:103038",
]


def test_has_direct_input_and_has_direct_output():
"""Test that direct input/output molecule associations are added to activities"""
mw = MinervaWrapper()
with open(INPUT_DIR / "minerva-665912ed00002626.json", "r") as f:
minerva_object = json.load(f)
model = mw.minerva_object_to_model(minerva_object)

activities_with_direct_input = []
activities_with_direct_output = []
for activity in model.activities:
if activity.has_direct_input:
activities_with_direct_input.append(activity)
if activity.has_direct_output:
activities_with_direct_output.append(activity)

# Basic sanity check on the number of activities with direct input/output
assert len(activities_with_direct_input) == 3
assert len(activities_with_direct_output) == 7

# Verify that one activity has uric acid as a direct input
uric_acid_input_activities = [
a
for a in activities_with_direct_input
if a.has_direct_input.term == "CHEBI:27226"
]
assert len(uric_acid_input_activities) == 1

# Verify that three activities have urea as a direct output
urea_output_activities = [
a
for a in activities_with_direct_output
if a.has_direct_output.term == "CHEBI:16199"
]
assert len(urea_output_activities) == 3

0 comments on commit a636e05

Please sign in to comment.