diff --git a/backend/src/monarch_py/api/association.py b/backend/src/monarch_py/api/association.py index d61cc910c..64dbd7646 100644 --- a/backend/src/monarch_py/api/association.py +++ b/backend/src/monarch_py/api/association.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Union +from typing import List, Union from fastapi import APIRouter, Depends, Query from monarch_py.api.additional_models import PaginationParams diff --git a/backend/src/monarch_py/api/semsim.py b/backend/src/monarch_py/api/semsim.py index 2bfc0ad5d..ae4157879 100644 --- a/backend/src/monarch_py/api/semsim.py +++ b/backend/src/monarch_py/api/semsim.py @@ -48,10 +48,10 @@ def _post_compare(request: SemsimCompareRequest): } """ - return semsimian().compare(request.subjects, request.objects) + return semsimian().compare(subjects=request.subjects, objects=request.objects) -@router.get("/search/{termset}/{category}") +@router.get("/search/{termset}/{group}") def _search( termset: str = Path(..., title="Termset to search"), group: SemsimSearchGroup = Path(..., title="Group of entities to search within (e.g. Human Genes)"), @@ -75,8 +75,8 @@ def _search( group: {group} """ ) - - results = semsimian().search(termset=termset.split(","), prefix=parse_similarity_prefix(group), limit=limit) + terms = [term.strip() for term in termset.split(",")] + results = semsimian().search(termset=terms, prefix=parse_similarity_prefix(group), limit=limit) return results @@ -94,4 +94,6 @@ def _post_search(request: SemsimSearchRequest): } """ - return semsimian().search(request.termset, parse_similarity_prefix(request.group.value), request.limit) + return semsimian().search( + termset=request.termset, prefix=parse_similarity_prefix(request.group.value), limit=request.limit + ) diff --git a/backend/src/monarch_py/datamodels/category_enums.py b/backend/src/monarch_py/datamodels/category_enums.py index 4b265afab..9608f1137 100644 --- a/backend/src/monarch_py/datamodels/category_enums.py +++ b/backend/src/monarch_py/datamodels/category_enums.py @@ -3,139 +3,149 @@ class EntityCategory(Enum): """Entity categories""" - GENE = 'biolink:Gene' - PHENOTYPIC_FEATURE = 'biolink:PhenotypicFeature' - BIOLOGICAL_PROCESS_OR_ACTIVITY = 'biolink:BiologicalProcessOrActivity' - GROSS_ANATOMICAL_STRUCTURE = 'biolink:GrossAnatomicalStructure' - DISEASE = 'biolink:Disease' - PATHWAY = 'biolink:Pathway' - CELL = 'biolink:Cell' - NAMED_THING = 'biolink:NamedThing' - ANATOMICAL_ENTITY = 'biolink:AnatomicalEntity' - CELLULAR_COMPONENT = 'biolink:CellularComponent' - MOLECULAR_ENTITY = 'biolink:MolecularEntity' - BIOLOGICAL_PROCESS = 'biolink:BiologicalProcess' - MACROMOLECULAR_COMPLEX = 'biolink:MacromolecularComplex' - MOLECULAR_ACTIVITY = 'biolink:MolecularActivity' - PROTEIN = 'biolink:Protein' - CELLULAR_ORGANISM = 'biolink:CellularOrganism' - PHENOTYPIC_QUALITY = 'biolink:PhenotypicQuality' - VERTEBRATE = 'biolink:Vertebrate' - VIRUS = 'biolink:Virus' - BEHAVIORAL_FEATURE = 'biolink:BehavioralFeature' - LIFE_STAGE = 'biolink:LifeStage' - PATHOLOGICAL_PROCESS = 'biolink:PathologicalProcess' - CHEMICAL_ENTITY = 'biolink:ChemicalEntity' - DRUG = 'biolink:Drug' - INFORMATION_CONTENT_ENTITY = 'biolink:InformationContentEntity' - SEQUENCE_VARIANT = 'biolink:SequenceVariant' - SMALL_MOLECULE = 'biolink:SmallMolecule' - ORGANISM_TAXON = 'biolink:OrganismTaxon' - NUCLEIC_ACID_ENTITY = 'biolink:NucleicAcidEntity' - EVIDENCE_TYPE = 'biolink:EvidenceType' - GEOGRAPHIC_EXPOSURE = 'biolink:GeographicExposure' - RNAPRODUCT = 'biolink:RNAProduct' - TRANSCRIPT = 'biolink:Transcript' - FUNGUS = 'biolink:Fungus' - PLANT = 'biolink:Plant' - POPULATION_OF_INDIVIDUAL_ORGANISMS = 'biolink:PopulationOfIndividualOrganisms' - INVERTEBRATE = 'biolink:Invertebrate' - PROTEIN_DOMAIN = 'biolink:ProteinDomain' - PROTEIN_FAMILY = 'biolink:ProteinFamily' - ACTIVITY = 'biolink:Activity' - AGENT = 'biolink:Agent' - CHEMICAL_EXPOSURE = 'biolink:ChemicalExposure' - CONFIDENCE_LEVEL = 'biolink:ConfidenceLevel' - DATASET = 'biolink:Dataset' - ENVIRONMENTAL_FEATURE = 'biolink:EnvironmentalFeature' - EXON = 'biolink:Exon' - GENETIC_INHERITANCE = 'biolink:GeneticInheritance' - GENOME = 'biolink:Genome' - GENOTYPE = 'biolink:Genotype' - HAPLOTYPE = 'biolink:Haplotype' - HUMAN = 'biolink:Human' - INDIVIDUAL_ORGANISM = 'biolink:IndividualOrganism' - MAMMAL = 'biolink:Mammal' - MATERIAL_SAMPLE = 'biolink:MaterialSample' - MICRO_RNA = 'biolink:MicroRNA' - PATENT = 'biolink:Patent' - PUBLICATION = 'biolink:Publication' - REGULATORY_REGION = 'biolink:RegulatoryRegion' - STUDY = 'biolink:Study' - TREATMENT = 'biolink:Treatment' - WEB_PAGE = 'biolink:WebPage' - ACCESSIBLE_DNA_REGION = 'biolink:AccessibleDnaRegion' - ARTICLE = 'biolink:Article' - ATTRIBUTE = 'biolink:Attribute' - BACTERIUM = 'biolink:Bacterium' - BIOLOGICAL_ENTITY = 'biolink:BiologicalEntity' - BIOLOGICAL_SEX = 'biolink:BiologicalSex' - CELL_LINE = 'biolink:CellLine' - CHEMICAL_MIXTURE = 'biolink:ChemicalMixture' - CODING_SEQUENCE = 'biolink:CodingSequence' - DATASET_DISTRIBUTION = 'biolink:DatasetDistribution' - DIAGNOSTIC_AID = 'biolink:DiagnosticAid' - DRUG_EXPOSURE = 'biolink:DrugExposure' - ENVIRONMENTAL_PROCESS = 'biolink:EnvironmentalProcess' - EVENT = 'biolink:Event' - GENOTYPIC_SEX = 'biolink:GenotypicSex' - NONCODING_RNAPRODUCT = 'biolink:NoncodingRNAProduct' - ORGANISMAL_ENTITY = 'biolink:OrganismalEntity' - PHENOTYPIC_SEX = 'biolink:PhenotypicSex' - POLYPEPTIDE = 'biolink:Polypeptide' - PROCEDURE = 'biolink:Procedure' - PROCESSED_MATERIAL = 'biolink:ProcessedMaterial' - REAGENT_TARGETED_GENE = 'biolink:ReagentTargetedGene' - SI_RNA = 'biolink:SiRNA' - SNV = 'biolink:Snv' - STUDY_VARIABLE = 'biolink:StudyVariable' - TRANSCRIPTION_FACTOR_BINDING_SITE = 'biolink:TranscriptionFactorBindingSite' - ZYGOSITY = 'biolink:Zygosity' - + + GENE = "biolink:Gene" + PHENOTYPIC_FEATURE = "biolink:PhenotypicFeature" + BIOLOGICAL_PROCESS_OR_ACTIVITY = "biolink:BiologicalProcessOrActivity" + GROSS_ANATOMICAL_STRUCTURE = "biolink:GrossAnatomicalStructure" + DISEASE = "biolink:Disease" + PATHWAY = "biolink:Pathway" + CELL = "biolink:Cell" + NAMED_THING = "biolink:NamedThing" + ANATOMICAL_ENTITY = "biolink:AnatomicalEntity" + CELLULAR_COMPONENT = "biolink:CellularComponent" + MOLECULAR_ENTITY = "biolink:MolecularEntity" + BIOLOGICAL_PROCESS = "biolink:BiologicalProcess" + MACROMOLECULAR_COMPLEX = "biolink:MacromolecularComplex" + MOLECULAR_ACTIVITY = "biolink:MolecularActivity" + PROTEIN = "biolink:Protein" + CELLULAR_ORGANISM = "biolink:CellularOrganism" + PHENOTYPIC_QUALITY = "biolink:PhenotypicQuality" + VERTEBRATE = "biolink:Vertebrate" + VIRUS = "biolink:Virus" + BEHAVIORAL_FEATURE = "biolink:BehavioralFeature" + LIFE_STAGE = "biolink:LifeStage" + PATHOLOGICAL_PROCESS = "biolink:PathologicalProcess" + CHEMICAL_ENTITY = "biolink:ChemicalEntity" + DRUG = "biolink:Drug" + INFORMATION_CONTENT_ENTITY = "biolink:InformationContentEntity" + SEQUENCE_VARIANT = "biolink:SequenceVariant" + SMALL_MOLECULE = "biolink:SmallMolecule" + ORGANISM_TAXON = "biolink:OrganismTaxon" + NUCLEIC_ACID_ENTITY = "biolink:NucleicAcidEntity" + EVIDENCE_TYPE = "biolink:EvidenceType" + GEOGRAPHIC_EXPOSURE = "biolink:GeographicExposure" + RNAPRODUCT = "biolink:RNAProduct" + TRANSCRIPT = "biolink:Transcript" + FUNGUS = "biolink:Fungus" + PLANT = "biolink:Plant" + POPULATION_OF_INDIVIDUAL_ORGANISMS = "biolink:PopulationOfIndividualOrganisms" + INVERTEBRATE = "biolink:Invertebrate" + PROTEIN_DOMAIN = "biolink:ProteinDomain" + PROTEIN_FAMILY = "biolink:ProteinFamily" + ACTIVITY = "biolink:Activity" + AGENT = "biolink:Agent" + CHEMICAL_EXPOSURE = "biolink:ChemicalExposure" + CONFIDENCE_LEVEL = "biolink:ConfidenceLevel" + DATASET = "biolink:Dataset" + ENVIRONMENTAL_FEATURE = "biolink:EnvironmentalFeature" + EXON = "biolink:Exon" + GENETIC_INHERITANCE = "biolink:GeneticInheritance" + GENOME = "biolink:Genome" + GENOTYPE = "biolink:Genotype" + HAPLOTYPE = "biolink:Haplotype" + HUMAN = "biolink:Human" + INDIVIDUAL_ORGANISM = "biolink:IndividualOrganism" + MAMMAL = "biolink:Mammal" + MATERIAL_SAMPLE = "biolink:MaterialSample" + MICRO_RNA = "biolink:MicroRNA" + PATENT = "biolink:Patent" + PUBLICATION = "biolink:Publication" + REGULATORY_REGION = "biolink:RegulatoryRegion" + STUDY = "biolink:Study" + TREATMENT = "biolink:Treatment" + WEB_PAGE = "biolink:WebPage" + ACCESSIBLE_DNA_REGION = "biolink:AccessibleDnaRegion" + ARTICLE = "biolink:Article" + ATTRIBUTE = "biolink:Attribute" + BACTERIUM = "biolink:Bacterium" + BIOLOGICAL_ENTITY = "biolink:BiologicalEntity" + BIOLOGICAL_SEX = "biolink:BiologicalSex" + CELL_LINE = "biolink:CellLine" + CHEMICAL_MIXTURE = "biolink:ChemicalMixture" + CODING_SEQUENCE = "biolink:CodingSequence" + DATASET_DISTRIBUTION = "biolink:DatasetDistribution" + DIAGNOSTIC_AID = "biolink:DiagnosticAid" + DRUG_EXPOSURE = "biolink:DrugExposure" + ENVIRONMENTAL_PROCESS = "biolink:EnvironmentalProcess" + EVENT = "biolink:Event" + GENOTYPIC_SEX = "biolink:GenotypicSex" + NONCODING_RNAPRODUCT = "biolink:NoncodingRNAProduct" + ORGANISMAL_ENTITY = "biolink:OrganismalEntity" + PHENOTYPIC_SEX = "biolink:PhenotypicSex" + POLYPEPTIDE = "biolink:Polypeptide" + PROCEDURE = "biolink:Procedure" + PROCESSED_MATERIAL = "biolink:ProcessedMaterial" + REAGENT_TARGETED_GENE = "biolink:ReagentTargetedGene" + SI_RNA = "biolink:SiRNA" + SNV = "biolink:Snv" + STUDY_VARIABLE = "biolink:StudyVariable" + TRANSCRIPTION_FACTOR_BINDING_SITE = "biolink:TranscriptionFactorBindingSite" + ZYGOSITY = "biolink:Zygosity" + class AssociationCategory(Enum): """Association categories""" - PAIRWISE_GENE_TO_GENE_INTERACTION = 'biolink:PairwiseGeneToGeneInteraction' - GENE_TO_EXPRESSION_SITE_ASSOCIATION = 'biolink:GeneToExpressionSiteAssociation' - MACROMOLECULAR_MACHINE_TO_BIOLOGICAL_PROCESS_ASSOCIATION = 'biolink:MacromolecularMachineToBiologicalProcessAssociation' - GENE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = 'biolink:GeneToPhenotypicFeatureAssociation' - MACROMOLECULAR_MACHINE_TO_MOLECULAR_ACTIVITY_ASSOCIATION = 'biolink:MacromolecularMachineToMolecularActivityAssociation' - MACROMOLECULAR_MACHINE_TO_CELLULAR_COMPONENT_ASSOCIATION = 'biolink:MacromolecularMachineToCellularComponentAssociation' - ASSOCIATION = 'biolink:Association' - GENE_TO_GENE_HOMOLOGY_ASSOCIATION = 'biolink:GeneToGeneHomologyAssociation' - DISEASE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = 'biolink:DiseaseToPhenotypicFeatureAssociation' - GENE_TO_PATHWAY_ASSOCIATION = 'biolink:GeneToPathwayAssociation' - CHEMICAL_TO_PATHWAY_ASSOCIATION = 'biolink:ChemicalToPathwayAssociation' - CORRELATED_GENE_TO_DISEASE_ASSOCIATION = 'biolink:CorrelatedGeneToDiseaseAssociation' - DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE_ASSOCIATION = 'biolink:DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation' - CAUSAL_GENE_TO_DISEASE_ASSOCIATION = 'biolink:CausalGeneToDiseaseAssociation' - CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = 'biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation' - + + PAIRWISE_GENE_TO_GENE_INTERACTION = "biolink:PairwiseGeneToGeneInteraction" + GENE_TO_EXPRESSION_SITE_ASSOCIATION = "biolink:GeneToExpressionSiteAssociation" + MACROMOLECULAR_MACHINE_TO_BIOLOGICAL_PROCESS_ASSOCIATION = ( + "biolink:MacromolecularMachineToBiologicalProcessAssociation" + ) + GENE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:GeneToPhenotypicFeatureAssociation" + MACROMOLECULAR_MACHINE_TO_MOLECULAR_ACTIVITY_ASSOCIATION = ( + "biolink:MacromolecularMachineToMolecularActivityAssociation" + ) + MACROMOLECULAR_MACHINE_TO_CELLULAR_COMPONENT_ASSOCIATION = ( + "biolink:MacromolecularMachineToCellularComponentAssociation" + ) + ASSOCIATION = "biolink:Association" + GENE_TO_GENE_HOMOLOGY_ASSOCIATION = "biolink:GeneToGeneHomologyAssociation" + DISEASE_TO_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:DiseaseToPhenotypicFeatureAssociation" + GENE_TO_PATHWAY_ASSOCIATION = "biolink:GeneToPathwayAssociation" + CHEMICAL_TO_PATHWAY_ASSOCIATION = "biolink:ChemicalToPathwayAssociation" + CORRELATED_GENE_TO_DISEASE_ASSOCIATION = "biolink:CorrelatedGeneToDiseaseAssociation" + DISEASE_OR_PHENOTYPIC_FEATURE_TO_GENETIC_INHERITANCE_ASSOCIATION = ( + "biolink:DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation" + ) + CAUSAL_GENE_TO_DISEASE_ASSOCIATION = "biolink:CausalGeneToDiseaseAssociation" + CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE_ASSOCIATION = "biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation" + class AssociationPredicate(Enum): """Association predicates""" - INTERACTS_WITH = 'biolink:interacts_with' - EXPRESSED_IN = 'biolink:expressed_in' - HAS_PHENOTYPE = 'biolink:has_phenotype' - ENABLES = 'biolink:enables' - ACTIVELY_INVOLVED_IN = 'biolink:actively_involved_in' - ORTHOLOGOUS_TO = 'biolink:orthologous_to' - LOCATED_IN = 'biolink:located_in' - SUBCLASS_OF = 'biolink:subclass_of' - PARTICIPATES_IN = 'biolink:participates_in' - RELATED_TO = 'biolink:related_to' - ACTS_UPSTREAM_OF_OR_WITHIN = 'biolink:acts_upstream_of_or_within' - ACTIVE_IN = 'biolink:active_in' - PART_OF = 'biolink:part_of' - ACTS_UPSTREAM_OF = 'biolink:acts_upstream_of' - HAS_MODE_OF_INHERITANCE = 'biolink:has_mode_of_inheritance' - GENE_ASSOCIATED_WITH_CONDITION = 'biolink:gene_associated_with_condition' - CONTRIBUTES_TO = 'biolink:contributes_to' - CAUSES = 'biolink:causes' - COLOCALIZES_WITH = 'biolink:colocalizes_with' - ACTS_UPSTREAM_OF_OR_WITHIN_POSITIVE_EFFECT = 'biolink:acts_upstream_of_or_within_positive_effect' - ACTS_UPSTREAM_OF_POSITIVE_EFFECT = 'biolink:acts_upstream_of_positive_effect' - AFFECTS = 'biolink:affects' - ACTS_UPSTREAM_OF_OR_WITHIN_NEGATIVE_EFFECT = 'biolink:acts_upstream_of_or_within_negative_effect' - ACTS_UPSTREAM_OF_NEGATIVE_EFFECT = 'biolink:acts_upstream_of_negative_effect' - + + INTERACTS_WITH = "biolink:interacts_with" + EXPRESSED_IN = "biolink:expressed_in" + HAS_PHENOTYPE = "biolink:has_phenotype" + ENABLES = "biolink:enables" + ACTIVELY_INVOLVED_IN = "biolink:actively_involved_in" + ORTHOLOGOUS_TO = "biolink:orthologous_to" + LOCATED_IN = "biolink:located_in" + SUBCLASS_OF = "biolink:subclass_of" + PARTICIPATES_IN = "biolink:participates_in" + RELATED_TO = "biolink:related_to" + ACTS_UPSTREAM_OF_OR_WITHIN = "biolink:acts_upstream_of_or_within" + ACTIVE_IN = "biolink:active_in" + PART_OF = "biolink:part_of" + ACTS_UPSTREAM_OF = "biolink:acts_upstream_of" + HAS_MODE_OF_INHERITANCE = "biolink:has_mode_of_inheritance" + GENE_ASSOCIATED_WITH_CONDITION = "biolink:gene_associated_with_condition" + CONTRIBUTES_TO = "biolink:contributes_to" + CAUSES = "biolink:causes" + COLOCALIZES_WITH = "biolink:colocalizes_with" + ACTS_UPSTREAM_OF_OR_WITHIN_POSITIVE_EFFECT = "biolink:acts_upstream_of_or_within_positive_effect" + ACTS_UPSTREAM_OF_POSITIVE_EFFECT = "biolink:acts_upstream_of_positive_effect" + AFFECTS = "biolink:affects" + ACTS_UPSTREAM_OF_OR_WITHIN_NEGATIVE_EFFECT = "biolink:acts_upstream_of_or_within_negative_effect" + ACTS_UPSTREAM_OF_NEGATIVE_EFFECT = "biolink:acts_upstream_of_negative_effect" diff --git a/backend/tests/api/test_semsim_router.py b/backend/tests/api/test_semsim_router.py index 3c5e13b59..17413231b 100644 --- a/backend/tests/api/test_semsim_router.py +++ b/backend/tests/api/test_semsim_router.py @@ -1,10 +1,63 @@ import pytest from fastapi.testclient import TestClient +from fastapi import status +from unittest.mock import MagicMock, patch + +from monarch_py.api.additional_models import SemsimSearchGroup from monarch_py.api.semsim import router client = TestClient(router) -@pytest.mark.skip(reason="Not implemented") -def test_semsim(semsim): - ... +@patch("monarch_py.api.config.SemsimianHTTPRequester.compare") +def test_get_compare(mock_compare): + mock_compare.return_value = MagicMock() + + subjects = "HP:123,HP:456" + objects = "HP:789,HP:101112" + + response = client.get(f"/compare/{subjects}/{objects}") + + assert response.status_code == status.HTTP_200_OK + mock_compare.assert_called_once_with(subjects=["HP:123", "HP:456"], objects=["HP:789", "HP:101112"]) + + +@patch("monarch_py.api.config.SemsimianHTTPRequester.compare") +def test_post_compare(mock_compare): + mock_compare.return_value = MagicMock() + + subjects = ["HP:123", "HP:456"] + objects = ["HP:789", "HP:101112"] + + response = client.post(f"/compare/", json={"subjects": subjects, "objects": objects}) + + assert response.status_code == status.HTTP_200_OK + mock_compare.assert_called_once_with(subjects=subjects, objects=objects) + + +@patch("monarch_py.api.config.SemsimianHTTPRequester.search") +@pytest.mark.parametrize("termset", ["HP:123,HP:456", "HP:123, HP:456", " HP:123, HP:456 "]) +def test_get_search(mock_search, termset: str): + mock_search.return_value = MagicMock() + + group = SemsimSearchGroup.HGNC + limit = 5 + + response = client.get(f"/search/{termset}/{group.value}?limit={limit}") + + assert response.status_code == status.HTTP_200_OK + mock_search.assert_called_once_with(termset=["HP:123", "HP:456"], prefix=group.name, limit=limit) + + +@patch("monarch_py.api.config.SemsimianHTTPRequester.search") +def test_post_search(mock_search): + mock_search.return_value = MagicMock() + + termset = ["HP:123", "HP:456"] + group = SemsimSearchGroup.HGNC + limit = 5 + + response = client.post(f"/search/", json={"termset": termset, "group": group.value, "limit": limit}) + + assert response.status_code == status.HTTP_200_OK + mock_search.assert_called_once_with(termset=["HP:123", "HP:456"], prefix=group.name, limit=limit)