Skip to content

Commit

Permalink
Compact associations (#602)
Browse files Browse the repository at this point in the history
Closes #595

- Adds `compact` optional arg for `get_associations` in Solr and SQL
implementations
- add compact assoc. fixture
- add test for solr assoc parser
  • Loading branch information
glass-ships authored Feb 27, 2024
1 parent 5996d28 commit 50f5fa6
Show file tree
Hide file tree
Showing 20 changed files with 657 additions and 127 deletions.
6 changes: 6 additions & 0 deletions backend/src/monarch_py/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ def associations(
"-d",
help="Whether to exclude associations with subject/object as ancestors",
),
compact: bool = typer.Option(
False,
"--compact",
"-C",
help="Whether to return a compact representation of the associations",
),
limit: int = typer.Option(20, "--limit", "-l", help="The number of associations to return"),
offset: int = typer.Option(0, "--offset", help="The offset of the first association to be retrieved"),
fmt: str = typer.Option(
Expand Down
23 changes: 23 additions & 0 deletions backend/src/monarch_py/datamodels/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,17 @@ class Association(ConfiguredBaseModel):
)


class CompactAssociation(ConfiguredBaseModel):

category: Optional[str] = Field(None)
subject: str = Field(...)
subject_label: Optional[str] = Field(None, description="""The name of the subject entity""")
predicate: str = Field(...)
object: str = Field(...)
object_label: Optional[str] = Field(None, description="""The name of the object entity""")
negated: Optional[bool] = Field(None)


class AssociationCountList(ConfiguredBaseModel):
"""
Container class for a list of association counts
Expand Down Expand Up @@ -538,6 +549,16 @@ class AssociationResults(Results):
total: int = Field(..., description="""total number of items matching a query""")


class CompactAssociationResults(Results):

items: List[CompactAssociation] = Field(
default_factory=list, description="""A collection of items, with the type to be overriden by slot_usage"""
)
limit: int = Field(..., description="""number of items to return in a response""")
offset: int = Field(..., description="""offset into the total number of items""")
total: int = Field(..., description="""total number of items matching a query""")


class AssociationTableResults(Results):

items: List[DirectionalAssociation] = Field(
Expand Down Expand Up @@ -745,6 +766,7 @@ class SemsimSearchResult(ConfiguredBaseModel):
# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
Association.model_rebuild()
CompactAssociation.model_rebuild()
AssociationCountList.model_rebuild()
AssociationTypeMapping.model_rebuild()
DirectionalAssociation.model_rebuild()
Expand All @@ -760,6 +782,7 @@ class SemsimSearchResult(ConfiguredBaseModel):
NodeHierarchy.model_rebuild()
Results.model_rebuild()
AssociationResults.model_rebuild()
CompactAssociationResults.model_rebuild()
AssociationTableResults.model_rebuild()
CategoryGroupedAssociationResults.model_rebuild()
EntityResults.model_rebuild()
Expand Down
17 changes: 16 additions & 1 deletion backend/src/monarch_py/datamodels/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ classes:
slot_usage:
items:
range: Association
CompactAssociation:
slots:
- category
- subject
- subject_label
- predicate
- object
- object_label
- negated
CompactAssociationResults:
is_a: Results
slots:
- items
slot_usage:
items:
range: CompactAssociation
AssociationTableResults:
is_a: Results
slots:
Expand Down Expand Up @@ -281,7 +297,6 @@ classes:
slot_usage:
items:
range: SearchResult

TextAnnotationResult:
slots:
- text
Expand Down
16 changes: 10 additions & 6 deletions backend/src/monarch_py/implementations/solr/solr_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import requests
from monarch_py.datamodels.model import (
Association,
CompactAssociation,
AssociationCountList,
AssociationResults,
AssociationTableResults,
Expand Down Expand Up @@ -240,9 +241,10 @@ def get_associations(
entity: List[str] = None,
direct: bool = False,
q: str = None,
compact: bool = False,
offset: int = 0,
limit: int = 20,
) -> AssociationResults:
) -> Union[AssociationResults, CompactAssociation]:
"""Retrieve paginated association records, with filter options
Args:
Expand All @@ -254,6 +256,7 @@ def get_associations(
object_closure: Filter to only associations with the specified term ID as an ancestor of the object. Defaults to None
entity: Filter to only associations where the specified entities are the subject or the object. Defaults to None.
q: Query string to search within matches. Defaults to None.
compact: Return compact results with fewer fields. Defaults to False.
offset: Result offset, for pagination. Defaults to 0.
limit: Limit results to specified number. Defaults to 20.
Expand All @@ -262,17 +265,17 @@ def get_associations(
"""
solr = SolrService(base_url=self.base_url, core=core.ASSOCIATION)
query = build_association_query(
category=[c.value for c in category] if category else None,
predicate=[p.value for p in predicate] if predicate else None,
category=[c.value for c in category] if category else [],
predicate=[p.value for p in predicate] if predicate else [],
subject=[subject] if isinstance(subject, str) else subject,
object=[object] if isinstance(object, str) else object,
entity=[entity] if isinstance(entity, str) else entity,
subject_closure=subject_closure,
object_closure=object_closure,
subject_category=[c.value for c in subject_category] if subject_category else None,
subject_category=[c.value for c in subject_category] if subject_category else [],
subject_namespace=[subject_namespace] if isinstance(subject_namespace, str) else subject_namespace,
subject_taxon=[subject_taxon] if isinstance(subject_taxon, str) else subject_taxon,
object_category=[c.value for c in object_category] if object_category else None,
object_category=[c.value for c in object_category] if object_category else [],
object_taxon=[object_taxon] if isinstance(object_taxon, str) else object_taxon,
object_namespace=[object_namespace] if isinstance(object_namespace, str) else object_namespace,
direct=direct,
Expand All @@ -281,7 +284,8 @@ def get_associations(
limit=limit,
)
query_result = solr.query(query)
associations = parse_associations(query_result, offset, limit)

associations = parse_associations(query_result, compact, offset, limit)
return associations

def get_histopheno(self, subject_closure: str = None) -> HistoPheno:
Expand Down
51 changes: 35 additions & 16 deletions backend/src/monarch_py/implementations/solr/solr_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

from monarch_py.datamodels.model import (
Association,
CompactAssociation,
CompactAssociationResults,
AssociationCount,
AssociationCountList,
AssociationDirectionEnum,
Expand Down Expand Up @@ -33,26 +35,44 @@

def parse_associations(
query_result: SolrQueryResult,
compact: bool = False,
offset: int = 0,
limit: int = 20,
) -> AssociationResults:
associations = []
for doc in query_result.response.docs:
try:
association = Association(**doc)
except ValidationError:
logger.error(f"Validation error for {doc}")
raise ValidationError
association.provided_by_link = get_provided_by_link(association.provided_by) if association.provided_by else []
association.has_evidence_links = (
get_links_for_field(association.has_evidence) if association.has_evidence else []
)
association.publications_links = (
get_links_for_field(association.publications) if association.publications else []
)
associations.append(association)
total = query_result.response.num_found
return AssociationResults(items=associations, limit=limit, offset=offset, total=total)
if compact:
associations = [
CompactAssociation(
category=doc.get("category"),
subject=doc.get("subject"),
subject_label=doc.get("subject_label"),
predicate=doc.get("predicate"),
object=doc.get("object"),
object_label=doc.get("object_label"),
negated=doc.get("negated"),
)
for doc in query_result.response.docs
]
return CompactAssociationResults(items=associations, limit=limit, offset=offset, total=total)
else:
for doc in query_result.response.docs:
try:
association = Association(**doc)
except ValidationError:
logger.error(f"Validation error for {doc}")
raise ValidationError
association.provided_by_link = (
get_provided_by_link(association.provided_by) if association.provided_by else []
)
association.has_evidence_links = (
get_links_for_field(association.has_evidence) if association.has_evidence else []
)
association.publications_links = (
get_links_for_field(association.publications) if association.publications else []
)
associations.append(association)
return AssociationResults(items=associations, limit=limit, offset=offset, total=total)


def parse_association_counts(query_result: SolrQueryResult, entity: str) -> AssociationCountList:
Expand Down Expand Up @@ -88,7 +108,6 @@ def parse_association_counts(query_result: SolrQueryResult, entity: str) -> Asso
def parse_entity(solr_document: Dict) -> Entity:
try:
entity = Entity(**solr_document)

entity.uri = converter.expand(entity.id)
except ValidationError:
logger.error(f"Validation error for {solr_document}")
Expand Down
101 changes: 66 additions & 35 deletions backend/src/monarch_py/implementations/sql/sql_implementation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
from dataclasses import dataclass
from typing import List
from typing import List, Union

import pystow
from loguru import logger
from pydantic import ValidationError

from monarch_py.datamodels.model import Association, AssociationResults, Entity, Node, NodeHierarchy
from monarch_py.datamodels.model import (
Association,
CompactAssociation,
CompactAssociationResults,
AssociationResults,
Entity,
Node,
NodeHierarchy,
)
from monarch_py.interfaces.association_interface import AssociationInterface
from monarch_py.interfaces.entity_interface import EntityInterface
from monarch_py.service.curie_service import converter
Expand Down Expand Up @@ -168,9 +176,10 @@ def get_associations(
object_closure: str = None,
entity: List[str] = None,
direct: bool = None,
compact: bool = False,
offset: int = 0,
limit: int = 20,
) -> AssociationResults:
) -> Union[AssociationResults, CompactAssociationResults]:
"""Retrieve paginated association records, with filter options
Args:
Expand All @@ -182,6 +191,7 @@ def get_associations(
object_closure (str, optional): Filter to only associations the specified term ID as an ancestor of the object. Defaults to None.
entity (str, optional): Filter to only associations where the specified entity is the subject or the object. Defaults to None.
association_type (str, optional): Filter to only associations matching the specified association label. Defaults to None.
compact (bool, optional): Whether to return compact or full association records. Defaults to False.
offset (int, optional): Result offset, for pagination. Defaults to 0.
limit (int, optional): Limit results to specified number. Defaults to 20.
Expand Down Expand Up @@ -239,35 +249,56 @@ def get_associations(
total = count[f"COUNT(*)"]

associations = []
for row in results:
result = {
"id": row["id"],
"original_subject": row["original_subject"],
"predicate": row["predicate"],
"original_object": row["original_object"],
"category": row["category"],
"aggregator_knowledge_source": row["aggregator_knowledge_source"].split("|"),
"primary_knowledge_source": row["primary_knowledge_source"],
"publications": row["publications"].split("|"),
"qualifiers": row["qualifiers"].split("|"),
"provided_by": row["provided_by"],
"has_evidence": row["has_evidence"].split("|"),
"stage_qualifier": row["stage_qualifier"],
"negated": False if not row["negated"] else True,
"frequency_qualifier": row["frequency_qualifier"],
"onset_qualifier": row["onset_qualifier"],
"sex_qualifier": row["sex_qualifier"],
"subject": row["subject"],
"object": row["object"],
}
# Convert empty strings to null value
for key in result:
result[key] = None if not result[key] else result[key]
try:
associations.append(Association(**result))
except ValidationError:
logger.error(f"Validation error for {row}")
raise

results = AssociationResults(items=associations, limit=limit, offset=offset, total=total)
return results
if compact:
for row in results:
result = {
"category": row["category"],
"subject": row["subject"],
"subject_label": row["subject_label"],
"predicate": row["predicate"],
"object": row["object"],
"object_label": row["object_label"],
"negated": False if not row["negated"] else True,
}
# Convert empty strings to null value
for key in result:
result[key] = None if not result[key] else result[key]
try:
associations.append(CompactAssociation(**result))
except ValidationError:
logger.error(f"Validation error for {row}")
raise
return CompactAssociationResults(items=associations, limit=limit, offset=offset, total=total)
else:
for row in results:
result = {
"id": row["id"],
"original_subject": row["original_subject"],
"predicate": row["predicate"],
"original_object": row["original_object"],
"category": row["category"],
"aggregator_knowledge_source": row["aggregator_knowledge_source"].split("|"),
"primary_knowledge_source": row["primary_knowledge_source"],
"publications": row["publications"].split("|"),
"qualifiers": row["qualifiers"].split("|"),
"provided_by": row["provided_by"],
"has_evidence": row["has_evidence"].split("|"),
"stage_qualifier": row["stage_qualifier"],
"negated": False if not row["negated"] else True,
"frequency_qualifier": row["frequency_qualifier"],
"onset_qualifier": row["onset_qualifier"],
"sex_qualifier": row["sex_qualifier"],
"subject": row["subject"],
"object": row["object"],
}
# Convert empty strings to null value
for key in result:
result[key] = None if not result[key] else result[key]
if isinstance(result[key], list) and len(result[key]) == 1 and not result[key][0]:
result[key] = []
try:
associations.append(Association(**result))
except ValidationError:
logger.error(f"Validation error for {row}")
raise
return AssociationResults(items=associations, limit=limit, offset=offset, total=total)
6 changes: 6 additions & 0 deletions backend/src/monarch_py/solr_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ def associations(
"-d",
help="Whether to exclude associations with subject/object as ancestors",
),
compact: bool = typer.Option(
False,
"--compact",
"-C",
help="Whether to return a compact representation of the associations",
),
limit: int = typer.Option(20, "--limit", "-l", help="The number of associations to return"),
offset: int = typer.Option(0, "--offset", help="The offset of the first association to be retrieved"),
fmt: str = typer.Option(
Expand Down
8 changes: 8 additions & 0 deletions backend/src/monarch_py/sql_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ def associations(
"-d",
help="Whether to exclude associations with subject/object as ancestors",
),
compact: bool = typer.Option(
False,
"--compact",
"-C",
help="Whether to return a compact representation of the associations",
),
limit: int = typer.Option(20, "--limit", "-l", help="The number of associations to return"),
offset: int = typer.Option(0, "--offset", help="The offset of the first association to be retrieved"),
fmt: str = typer.Option(
Expand All @@ -107,6 +113,8 @@ def associations(
predicate: A comma-separated list of predicates
object: A comma-separated list of objects
entity: A comma-separated list of entities
direct: Whether to exclude associations with subject/object as ancestors
compact: Whether to return a compact representation of the associations
limit: The number of associations to return
offset: The offset of the first association to be retrieved
fmt: The format of the output (json, yaml, tsv, table)
Expand Down
Loading

0 comments on commit 50f5fa6

Please sign in to comment.