Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add medical action association table to UI (plus fixture & enum regen) #937

Merged
merged 14 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/test-backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
pull_request:
workflow_dispatch:

env:
PYTHONPATH: ${{ github.workspace }}/backend

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix for #935

# defaults:
# run:
# working-directory: ./backend
Expand Down Expand Up @@ -40,14 +43,16 @@ jobs:
# run pytest
#----------------------------------------------
- name: Run tests
run: poetry -C backend run pytest backend/tests
run: |
poetry -C backend run pytest tests
shell: bash

#----------------------------------------------
# coverage report
#----------------------------------------------
- name: Generate coverage results
run: |
poetry -C backend run coverage run -m pytest backend/tests
poetry -C backend run coverage run -m pytest tests
poetry -C backend run coverage xml
poetry -C backend run coverage report -m
shell: bash
Expand Down
116 changes: 32 additions & 84 deletions backend/src/monarch_py/datamodels/category_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,119 +4,60 @@
class EntityCategory(Enum):
"""Entity categories"""

SEQUENCE_VARIANT = "biolink:SequenceVariant"
GENE = "biolink:Gene"
GENOTYPE = "biolink:Genotype"
PHENOTYPIC_FEATURE = "biolink:PhenotypicFeature"
BIOLOGICAL_PROCESS_OR_ACTIVITY = "biolink:BiologicalProcessOrActivity"
DISEASE = "biolink:Disease"
GROSS_ANATOMICAL_STRUCTURE = "biolink:GrossAnatomicalStructure"
CELL = "biolink:Cell"
PATHWAY = "biolink:Pathway"
NAMED_THING = "biolink:NamedThing"
GENOTYPE = "biolink:Genotype"
ANATOMICAL_ENTITY = "biolink:AnatomicalEntity"
CELLULAR_COMPONENT = "biolink:CellularComponent"
MOLECULAR_ENTITY = "biolink:MolecularEntity"
BIOLOGICAL_PROCESS = "biolink:BiologicalProcess"
MACROMOLECULAR_COMPLEX = "biolink:MacromolecularComplex"
MOLECULAR_ACTIVITY = "biolink:MolecularActivity"
DISEASE = "biolink:Disease"
NAMED_THING = "biolink:NamedThing"
PATHWAY = "biolink:Pathway"
PROTEIN = "biolink:Protein"
CELLULAR_ORGANISM = "biolink:CellularOrganism"
VERTEBRATE = "biolink:Vertebrate"
VIRUS = "biolink:Virus"
BEHAVIORAL_FEATURE = "biolink:BehavioralFeature"
CHEMICAL_ENTITY = "biolink:ChemicalEntity"
LIFE_STAGE = "biolink:LifeStage"
PATHOLOGICAL_PROCESS = "biolink:PathologicalProcess"
DRUG = "biolink:Drug"
SMALL_MOLECULE = "biolink:SmallMolecule"
SEQUENCE_VARIANT = "biolink:SequenceVariant"
MOLECULAR_ACTIVITY = "biolink:MolecularActivity"
CELLULAR_COMPONENT = "biolink:CellularComponent"
CELL = "biolink:Cell"
ORGANISM_TAXON = "biolink:OrganismTaxon"
INFORMATION_CONTENT_ENTITY = "biolink:InformationContentEntity"
NUCLEIC_ACID_ENTITY = "biolink:NucleicAcidEntity"
EVIDENCE_TYPE = "biolink:EvidenceType"
RNAPRODUCT = "biolink:RNAProduct"
TRANSCRIPT = "biolink:Transcript"
FUNGUS = "biolink:Fungus"
PLANT = "biolink:Plant"
PROCESSED_MATERIAL = "biolink:ProcessedMaterial"
ACTIVITY = "biolink:Activity"
AGENT = "biolink:Agent"
CONFIDENCE_LEVEL = "biolink:ConfidenceLevel"
DATASET = "biolink:Dataset"
ENVIRONMENTAL_FEATURE = "biolink:EnvironmentalFeature"
GENETIC_INHERITANCE = "biolink:GeneticInheritance"
HAPLOTYPE = "biolink:Haplotype"
INVERTEBRATE = "biolink:Invertebrate"
MAMMAL = "biolink:Mammal"
POPULATION_OF_INDIVIDUAL_ORGANISMS = "biolink:PopulationOfIndividualOrganisms"
PROTEIN_FAMILY = "biolink:ProteinFamily"
PUBLICATION = "biolink:Publication"
ACCESSIBLE_DNA_REGION = "biolink:AccessibleDnaRegion"
BACTERIUM = "biolink:Bacterium"
BIOLOGICAL_SEX = "biolink:BiologicalSex"
CELL_LINE = "biolink:CellLine"
CHEMICAL_EXPOSURE = "biolink:ChemicalExposure"
CHEMICAL_MIXTURE = "biolink:ChemicalMixture"
DATASET_DISTRIBUTION = "biolink:DatasetDistribution"
DIAGNOSTIC_AID = "biolink:DiagnosticAid"
DRUG_EXPOSURE = "biolink:DrugExposure"
ENVIRONMENTAL_PROCESS = "biolink:EnvironmentalProcess"
EVENT = "biolink:Event"
EXON = "biolink:Exon"
GENOME = "biolink:Genome"
GENOTYPIC_SEX = "biolink:GenotypicSex"
HUMAN = "biolink:Human"
INDIVIDUAL_ORGANISM = "biolink:IndividualOrganism"
MATERIAL_SAMPLE = "biolink:MaterialSample"
MICRO_RNA = "biolink:MicroRNA"
ORGANISMAL_ENTITY = "biolink:OrganismalEntity"
PATENT = "biolink:Patent"
PHENOTYPIC_SEX = "biolink:PhenotypicSex"
POLYPEPTIDE = "biolink:Polypeptide"
PROTEIN_DOMAIN = "biolink:ProteinDomain"
REAGENT_TARGETED_GENE = "biolink:ReagentTargetedGene"
REGULATORY_REGION = "biolink:RegulatoryRegion"
SI_RNA = "biolink:SiRNA"
SNV = "biolink:Snv"
STUDY = "biolink:Study"
STUDY_VARIABLE = "biolink:StudyVariable"
TRANSCRIPTION_FACTOR_BINDING_SITE = "biolink:TranscriptionFactorBindingSite"
TREATMENT = "biolink:Treatment"
WEB_PAGE = "biolink:WebPage"
ZYGOSITY = "biolink:Zygosity"
MOLECULAR_ENTITY = "biolink:MolecularEntity"
LIFE_STAGE = "biolink:LifeStage"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to presume that all of these deletions and additions make sense, although I admit I didn't check to make sure we don't need these.



class AssociationCategory(Enum):
"""Association categories"""

PAIRWISE_GENE_TO_GENE_INTERACTION = "biolink:PairwiseGeneToGeneInteraction"
GENE_TO_EXPRESSION_SITE_ASSOCIATION = "biolink:GeneToExpressionSiteAssociation"
VARIANT_TO_GENE_ASSOCIATION = "biolink:VariantToGeneAssociation"
MACROMOLECULAR_MACHINE_TO_BIOLOGICAL_PROCESS_ASSOCIATION = (
"biolink:MacromolecularMachineToBiologicalProcessAssociation"
)
GENE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:GeneToPhenotypicFeatureAssociation"
ASSOCIATION = "biolink:Association"
MACROMOLECULAR_MACHINE_TO_MOLECULAR_ACTIVITY_ASSOCIATION = (
"biolink:MacromolecularMachineToMolecularActivityAssociation"
)
MACROMOLECULAR_MACHINE_TO_CELLULAR_COMPONENT_ASSOCIATION = (
"biolink:MacromolecularMachineToCellularComponentAssociation"
)
ASSOCIATION = "biolink:Association"
GENE_TO_GENE_HOMOLOGY_ASSOCIATION = "biolink:GeneToGeneHomologyAssociation"
GENOTYPE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:GenotypeToPhenotypicFeatureAssociation"
DISEASE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:DiseaseToPhenotypicFeatureAssociation"
GENE_TO_PATHWAY_ASSOCIATION = "biolink:GeneToPathwayAssociation"
DISEASE_OR_PHENOTYPIC_FEATURE_TO_LOCATION_ASSOCIATION = "biolink:DiseaseOrPhenotypicFeatureToLocationAssociation"
CHEMICAL_TO_PATHWAY_ASSOCIATION = "biolink:ChemicalToPathwayAssociation"
VARIANT_TO_GENE_ASSOCIATION = "biolink:VariantToGeneAssociation"
VARIANT_TO_DISEASE_ASSOCIATION = "biolink:VariantToDiseaseAssociation"
GENOTYPE_TO_DISEASE_ASSOCIATION = "biolink:GenotypeToDiseaseAssociation"
CORRELATED_GENE_TO_DISEASE_ASSOCIATION = "biolink:CorrelatedGeneToDiseaseAssociation"
CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation"
DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE_ASSOCIATION = (
"biolink:DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation"
)
VARIANT_TO_DISEASE_ASSOCIATION = "biolink:VariantToDiseaseAssociation"
CAUSAL_GENE_TO_DISEASE_ASSOCIATION = "biolink:CausalGeneToDiseaseAssociation"
CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation"
DISEASE_OR_PHENOTYPIC_FEATURE_TO_LOCATION_ASSOCIATION = "biolink:DiseaseOrPhenotypicFeatureToLocationAssociation"
CHEMICAL_OR_DRUG_OR_TREATMENT_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = (
"biolink:ChemicalOrDrugOrTreatmentToDiseaseOrPhenotypicFeatureAssociation"
)
VARIANT_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:VariantToPhenotypicFeatureAssociation"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, I'll presume your reorganization is for improving readability or logical consistency.



class AssociationPredicate(Enum):
Expand All @@ -125,33 +66,40 @@ class AssociationPredicate(Enum):
INTERACTS_WITH = "biolink:interacts_with"
EXPRESSED_IN = "biolink:expressed_in"
HAS_PHENOTYPE = "biolink:has_phenotype"
IS_SEQUENCE_VARIANT_OF = "biolink:is_sequence_variant_of"
ENABLES = "biolink:enables"
ACTIVELY_INVOLVED_IN = "biolink:actively_involved_in"
ORTHOLOGOUS_TO = "biolink:orthologous_to"
LOCATED_IN = "biolink:located_in"
SUBCLASS_OF = "biolink:subclass_of"
LOCATED_IN = "biolink:located_in"
RELATED_TO = "biolink:related_to"
PARTICIPATES_IN = "biolink:participates_in"
ACTS_UPSTREAM_OF_OR_WITHIN = "biolink:acts_upstream_of_or_within"
ACTIVE_IN = "biolink:active_in"
IS_ACTIVE_IN = "biolink:is_active_in"
PART_OF = "biolink:part_of"
MODEL_OF = "biolink:model_of"
CAUSES = "biolink:causes"
IS_SEQUENCE_VARIANT_OF = "biolink:is_sequence_variant_of"
MODEL_OF = "biolink:model_of"
ACTS_UPSTREAM_OF = "biolink:acts_upstream_of"
TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT = "biolink:treats_or_applied_or_studied_to_treat"
HAS_MODE_OF_INHERITANCE = "biolink:has_mode_of_inheritance"
CONTRIBUTES_TO = "biolink:contributes_to"
GENE_ASSOCIATED_WITH_CONDITION = "biolink:gene_associated_with_condition"
TREATS_OR_APPLIED_OR_STUDIED_TO_TREAT = "biolink:treats_or_applied_or_studied_to_treat"
CONTRIBUTES_TO = "biolink:contributes_to"
ASSOCIATED_WITH_INCREASED_LIKELIHOOD_OF = "biolink:associated_with_increased_likelihood_of"
COLOCALIZES_WITH = "biolink:colocalizes_with"
GENETICALLY_ASSOCIATED_WITH = "biolink:genetically_associated_with"
DISEASE_HAS_LOCATION = "biolink:disease_has_location"
ACTS_UPSTREAM_OF_POSITIVE_EFFECT = "biolink:acts_upstream_of_positive_effect"
ACTS_UPSTREAM_OF_OR_WITHIN_POSITIVE_EFFECT = "biolink:acts_upstream_of_or_within_positive_effect"
AMELIORATES_CONDITION = "biolink:ameliorates_condition"
ACTS_UPSTREAM_OF_NEGATIVE_EFFECT = "biolink:acts_upstream_of_negative_effect"
ACTS_UPSTREAM_OF_OR_WITHIN_NEGATIVE_EFFECT = "biolink:acts_upstream_of_or_within_negative_effect"
PREVENTATIVE_FOR_CONDITION = "biolink:preventative_for_condition"
CONTRAINDICATED_IN = "biolink:contraindicated_in"


class MappingPredicate(Enum):
"""Mapping predicates"""

EXACT_MATCH = "skos:exactMatch"
CLOSE_MATCH = "skos:closeMatch"
BROAD_MATCH = "skos:broadMatch"
9 changes: 8 additions & 1 deletion backend/src/monarch_py/implementations/solr/solr_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@

def parse_association_counts(query_result: SolrQueryResult, entity: str) -> AssociationCountList:
subject_query = f'AND (subject:"{entity}" OR subject_closure:"{entity}")'
object_query = f'AND (object:"{entity}" OR object_closure:"{entity}")'
object_query = f'AND (object:"{entity}" OR object_closure:"{entity}" OR disease_context_qualifier:"{entity}" OR disease_context_qualifier_closure:"{entity}")'
association_count_dict: Dict[str, AssociationCount] = {}
for k, v in query_result.facet_counts.facet_queries.items():
if v > 0:
Expand Down Expand Up @@ -258,6 +258,13 @@
document.get("object_closure") and any(e in document.get("object_closure") for e in entity)
):
direction = AssociationDirectionEnum.incoming
elif document.get("disease_context_qualifier") in entity or (

Check warning on line 261 in backend/src/monarch_py/implementations/solr/solr_parsers.py

View check run for this annotation

Codecov / codecov/patch

backend/src/monarch_py/implementations/solr/solr_parsers.py#L261

Added line #L261 was not covered by tests
document.get("disease_context_qualifier_closure")
and any(e in document.get("disease_context_qualifier_closure") for e in entity)
):
# This is a special case for disease_context_qualifier, if an association between two other entities
# only occurs within the context of a disease, we can treat it like an incoming association
direction = AssociationDirectionEnum.incoming

Check warning on line 267 in backend/src/monarch_py/implementations/solr/solr_parsers.py

View check run for this annotation

Codecov / codecov/patch

backend/src/monarch_py/implementations/solr/solr_parsers.py#L267

Added line #L267 was not covered by tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codecov is complaining that 261 and 267 aren't covered by tests. It seems reasonable to ignore this but if it's easy to make a test here it would be nice to cover these edge cases.

else:
raise ValueError(f"Entity {entity} not found in association {document}")
return direction
11 changes: 6 additions & 5 deletions backend/src/monarch_py/implementations/solr/solr_query_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def build_association_query(
offset: int = 0,
limit: int = 20,
) -> SolrQuery:

entity_fields = ["subject", "object", "disease_context_qualifier"]
"""Populate a SolrQuery object with association filters"""
query = SolrQuery(start=offset, rows=limit)
query.add_field_filter_query("category", None if not category else [c for c in category])
Expand All @@ -54,12 +56,10 @@ def build_association_query(
query.add_filter_query(" OR ".join([f'object:"{o}" OR object_closure:"{o}"' for o in object]))
if entity:
if direct:
query.add_filter_query(" OR ".join([f'subject:"{e}" OR object:"{e}"' for e in entity]))
query.add_filter_query(" OR ".join([f'{field}:"{e}"' for e in entity for field in entity_fields]))
else:
query.add_filter_query(
" OR ".join(
[f'subject:"{e}" OR subject_closure:"{e}" OR object:"{e}" OR object_closure:"{e}"' for e in entity]
)
" OR ".join([f'{field}:"{e}" OR {field}_closure:"{e}"' for e in entity for field in entity_fields])
)
if q:
# We don't yet have tokenization strategies for the association index, initially we'll limit searching to
Expand Down Expand Up @@ -118,7 +118,8 @@ def build_association_table_query(

def build_association_counts_query(entity: str) -> SolrQuery:
subject_query = f'AND (subject:"{entity}" OR subject_closure:"{entity}")'
object_query = f'AND (object:"{entity}" OR object_closure:"{entity}")'
object_query = f'AND (object:"{entity}" OR object_closure:"{entity}" OR disease_context_qualifier:"{entity}" OR disease_context_qualifier_closure:"{entity}")'

# Run the same facet_queries constrained to matches against either the subject or object
# to know which kind of label will be needed in the UI to refer to the opposite side of the association
facet_queries = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pystow
import tarfile


@dataclass
class SpacyImplementation(TextAnnotatorInterface):
"""Implementation of Monarch Interfaces for SPACY"""
Expand All @@ -32,22 +33,31 @@ def init_spacy(self, grounding_implementation: GroundingInterface):
with tarfile.open(model_archive, "r:gz") as tar:
tar.extractall(path=model_dir.parent)

model_subdir = next((d for d in model_archive.parent.iterdir() if d.is_dir() and d.name.startswith("en_core_sci")), None)
model_subdir = next(
(d for d in model_archive.parent.iterdir() if d.is_dir() and d.name.startswith("en_core_sci")), None
)

if model_subdir:
inner_model_dir = next((d for d in model_subdir.iterdir() if d.is_dir() and d.name.startswith("en_core_sci") and "egg-info" not in d.name), None)
inner_model_dir = next(
(
d
for d in model_subdir.iterdir()
if d.is_dir() and d.name.startswith("en_core_sci") and "egg-info" not in d.name
),
None,
)
if inner_model_dir:
# Load the model
self.nlp = spacy.load(str(str(model_archive.parent / model_subdir.name / inner_model_dir.name / model_subdir.name)))
self.nlp = spacy.load(
str(str(model_archive.parent / model_subdir.name / inner_model_dir.name / model_subdir.name))
)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything in this file is just formatting changes that got picked up

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check, I'm ignoring it then.

# Assign the grounding implementation
self.grounding_implementation = grounding_implementation

# Test the model with a sample sentence
self.nlp("Nystagmus, strabismus, fundus, ocular albinism, lewis.")



def get_annotated_entities(self, text) -> List[TextAnnotationResult]:
"""Annotate text using SPACY"""
results: List[TextAnnotationResult] = []
Expand Down
5 changes: 4 additions & 1 deletion backend/src/monarch_py/utils/association_type_mappings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,7 @@
category: "biolink:GenotypeToPhenotypicFeatureAssociation"
- subject_label: Disease Model
object_label: Disease Model
category: "biolink:GenotypeToDiseaseAssociation"
category: "biolink:GenotypeToDiseaseAssociation"
- subject_label: Medical Action
object_label: Medical Action
category: "biolink:ChemicalOrDrugOrTreatmentToDiseaseOrPhenotypicFeatureAssociation"
8 changes: 7 additions & 1 deletion backend/tests/fixtures/association_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@ def association_counts():
"items": [
{
"label": "Disease to Phenotype",
"count": 4082,
"count": 4077,
"category": "biolink:DiseaseToPhenotypicFeatureAssociation",
},
{"label": "Gene to Phenotype", "count": 6350, "category": "biolink:GeneToPhenotypicFeatureAssociation"},
{"label": "Causal Gene", "count": 125, "category": "biolink:CausalGeneToDiseaseAssociation"},
{"label": "Correlated Gene", "count": 150, "category": "biolink:CorrelatedGeneToDiseaseAssociation"},
{"label": "Variant to Disease", "count": 1, "category": "biolink:VariantToDiseaseAssociation"},
{"label": "Disease Model", "count": 238, "category": "biolink:GenotypeToDiseaseAssociation"},
{
"label": "Medical Action",
"count": 4,
"category": "biolink:ChemicalOrDrugOrTreatmentToDiseaseOrPhenotypicFeatureAssociation",
},
]
}
Loading
Loading