Skip to content

Commit

Permalink
wip: test cases for annotated variants
Browse files Browse the repository at this point in the history
  • Loading branch information
bencap committed Jan 27, 2025
1 parent 1dbe123 commit 07de0cd
Show file tree
Hide file tree
Showing 12 changed files with 517 additions and 159 deletions.
46 changes: 34 additions & 12 deletions src/mavedb/lib/annotation/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,67 @@
from ga4gh.core.entity_models import Agent, Extension, AgentSubtype

from mavedb import __version__
from mavedb.models.user import User

logger = logging.getLogger(__name__)


def mavedb_api_agent():
"""
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the current MaveDB API version.
"""
version_at_time_of_generation = Extension(
name="mavedb_api_version",
name="mavedbApiVersion",
value=__version__,
# TODO: stitched from constant base URL?
description="The MaveDB API version used to generate this record. See: https://github.com/VariantEffect/mavedb-api/releases",
)

return Agent(
subtype=AgentSubtype.SOFTWARE,
label="mavedb api",
description=f"mavedb api agent, version {__version__}",
label="MaveDB API",
description=f"MaveDB API agent, version {__version__}",
extensions=[version_at_time_of_generation],
)


def mavedb_vrs_agent(version: str):
"""
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the passed MaveDB VRS mapping version.
"""
version_at_time_of_variant_generation = Extension(
name="mavedb_vrs_version",
name="mavedbVrsVersion",
value=version,
# TODO: stitched from constant base URL?
description="The VRS mapping version used to generate this record. See: https://github.com/VariantEffect/dcd_mapping2/releases",
)

return Agent(
subtype=AgentSubtype.SOFTWARE,
label="mavedb vrs mapper",
description=f"mavedb vrs mapping agent, version {version_at_time_of_variant_generation.value}",
label="MaveDB VRS mapper",
description=f"MaveDB VRS mapping agent, version {version_at_time_of_variant_generation.value}",
extensions=[version_at_time_of_variant_generation],
)


# TODO: Versioned software.
def mavedb_user_agent(user: User) -> Agent:
"""
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the passed MaveDB user.
"""
return Agent(
id=user.username,
subtype=AgentSubtype.PERSON,
label="MaveDB ORCid authenticated user",
)


# TODO: Ideally, this becomes versioned software.
def pillar_project_calibration_agent():
"""
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the pillar project calibration software.
"""
return Agent(
subtype=AgentSubtype.SOFTWARE,
label="pillar project variant calibrator",
label="Pillar project variant calibrator",
reportedIn="https://github.com/Dzeiberg/mave_calibration",
)
4 changes: 2 additions & 2 deletions src/mavedb/lib/annotation/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def functional_classification_of_variant(mapped_variant: MappedVariant) -> Optio
score_ranges = ScoreRanges(**jsonable_encoder(mapped_variant.variant.score_set.score_ranges))

# This property of this column is guaranteed to be defined.
functional_score: float = mapped_variant.variant.data["score"] # type: ignore
functional_score: float = mapped_variant.variant.data["score_data"]["score"] # type: ignore
for range in score_ranges.ranges:
lower_bound, upper_bound = inf_or_float(range.range[0], lower=True), inf_or_float(range.range[1], lower=False)
if functional_score > lower_bound and functional_score <= upper_bound:
Expand All @@ -72,7 +72,7 @@ def pillar_project_clinical_classification_of_variant(
# NOTE: It is presumed these thresholds are ordered.

# This property of this column is guaranteed to be defined.
functional_score: float = mapped_variant.variant.data["score"] # type: ignore
functional_score: float = mapped_variant.variant.data["score_data"]["score"] # type: ignore

most_extreme_evidence_strength = 0
for idx, threshold in enumerate(score_calibration.thresholds):
Expand Down
115 changes: 109 additions & 6 deletions src/mavedb/lib/annotation/contribution.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,132 @@
import logging
from datetime import datetime
from typing import Union

from ga4gh.core.entity_models import Contribution
from ga4gh.core.entity_models import Contribution, Coding, Extension

from mavedb.models.experiment_set import ExperimentSet
from mavedb.models.experiment import Experiment
from mavedb.models.score_set import ScoreSet
from mavedb.models.mapped_variant import MappedVariant
from mavedb.lib.annotation.agent import mavedb_api_agent, mavedb_vrs_agent, pillar_project_calibration_agent
from mavedb.models.variant import Variant
from mavedb.models.user import User
from mavedb.lib.annotation.agent import (
mavedb_api_agent,
mavedb_vrs_agent,
mavedb_user_agent,
pillar_project_calibration_agent,
)
from mavedb.lib.annotation.method import mavedb_api_as_method, mavedb_vrs_as_method, pillar_project_calibration_method


logger = logging.getLogger(__name__)

# Non-exhaustive
ResourceWithCreationModificationDates = Union[ExperimentSet, Experiment, ScoreSet, MappedVariant, Variant]


def mavedb_api_contribution() -> Contribution:
return Contribution(contributor=[mavedb_api_agent()], date=datetime.today(), specifiedBy=[mavedb_api_as_method()])
"""
Create a [VA Contribution](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/activities/contribution.html#contribution)
object for an arbitary contribution from the MaveDB API/software distribution.
"""
return Contribution(
contributor=[mavedb_api_agent()],
date=datetime.today().strftime("%Y-%m-%d"),
specifiedBy=[mavedb_api_as_method()],
activityType=Coding(
label="application programming interface",
system="http://purl.obolibrary.org/obo/swo.owl",
systemVersion="2023-03-05",
code="SWO_9000054",
),
)


def mavedb_vrs_contribution(mapped_variant: MappedVariant) -> Contribution:
"""
Create a [VA Contribution](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/activities/contribution.html#contribution)
object from the provided mapped variant.
"""
return Contribution(
contributor=[mavedb_vrs_agent(mapped_variant.mapping_api_version)],
date=mapped_variant.mapped_date,
date=datetime.strftime(mapped_variant.mapped_date, "%Y-%m-%d"),
specifiedBy=[mavedb_vrs_as_method()],
activityType=Coding(
label="planned process",
system="http://purl.obolibrary.org/obo/swo.owl",
systemVersion="2023-03-05",
code="OBI_0000011",
),
)


def pillar_project_calibration_contribution() -> Contribution:
"""
Create a [VA Contribution](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/activities/contribution.html#contribution)
object for a sofware agent which performs calibrations on an arbitrary data set.
"""
return Contribution(
contributor=[pillar_project_calibration_agent()],
specifiedBy=[pillar_project_calibration_method()],
activityType=Coding(
label="planned process",
system="http://purl.obolibrary.org/obo/swo.owl",
systemVersion="2023-03-05",
code="OBI_0000011",
),
)


def mavedb_creator_contribution(created_resource: ResourceWithCreationModificationDates, creator: User) -> Contribution:
"""
Create a [VA Contribution](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/activities/contribution.html#contribution)
object from the provided createable resource (a resource with both a creation date and creator).
"""
return Contribution(
contributor=[pillar_project_calibration_agent()], specifiedBy=[pillar_project_calibration_method()]
contributor=[mavedb_user_agent(creator)],
date=datetime.strftime(created_resource.creation_date, "%Y-%m-%d"),
label="Resource First Submitted",
activityType=Coding(
label="submitter role",
system="http://purl.obolibrary.org/obo/cro.owl",
code="CRO_0000105",
systemVersion="v2019-08-16",
),
extensions=[Extension(name="resourceType", value=created_resource.__class__.__name__)],
)


def mavedb_modifier_contribution(
modified_resource: ResourceWithCreationModificationDates, modifier: User
) -> Contribution:
"""
Create a [VA Contribution](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/activities/contribution.html#contribution)
object from the provided modifiable resource (a resource with both a modification date and modifier).
"""
return Contribution(
contributor=[mavedb_user_agent(modifier)],
date=datetime.strftime(modified_resource.modification_date, "%Y-%m-%d"),
label="Resource Last Updated",
activityType=Coding(
label="modifier role",
system="http://purl.obolibrary.org/obo/cro.owl",
code="CRO_0000103",
systemVersion="v2019-08-16",
),
extensions=[Extension(name="resourceType", value=modified_resource.__class__.__name__)],
)


# TODO: Although we would ideally provide a contribution object for the publisher of the data set, we don't
# save which user actually published it. We could proxy this by just using the creator, but this is
# not always strictly accurate.
#
# ResourceWithPublicationDate = Union[ExperimentSet, Experiment, ScoreSet]
# def mavedb_publisher_contribution(published_resource: ResourceWithCreatorModifier, publisher: User) -> Contribution:
# return Contribution(
# contributor=[mavedb_user_agent(publisher)],
# date=datetime.strftime(published_resource.publication_date, "%Y-%m-%d"),
# label="Resource First Published",
# activityType=Coding(label="author role", system="http://purl.obolibrary.org/obo/cro.owl", code="CRO_0000001", systemVersion="v2019-08-16"),
# extensions=[Extension(name="resourceType", value=published_resource.__class__)],
# )
17 changes: 11 additions & 6 deletions src/mavedb/lib/annotation/dataset.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
import logging
from datetime import datetime

from ga4gh.core.entity_models import DataSet

from mavedb.models.score_set import ScoreSet

from mavedb.lib.annotation.contribution import mavedb_creator_contribution, mavedb_modifier_contribution
from mavedb.lib.annotation.document import score_set_to_document

logger = logging.getLogger(__name__)


# TODO: How deep should these objects get? See: https://github.com/ga4gh/va-spec/blob/1.x/examples/mavedb/pten-variant-example.json
def score_set_to_data_set(score_set: ScoreSet) -> DataSet:
"""
Transforms a score set object to a GA4GH DataSet object.
Create a [VA Data Set](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/information-entities/dataset.html#data-set)
object from the provided MaveDB score set.
"""
return DataSet(
id=score_set.urn,
subtype="variant effect data set",
label="Variant effect data set",
license=score_set.license.short_name,
# TODO: Stitch together constants to create this string.
# TODO: Better to use this IRI reference, or the actual `Document` GA4GH object?
reportedIn=score_set_to_document(score_set),
reportedIn=[score_set_to_document(score_set)],
contributions=[
mavedb_creator_contribution(score_set, score_set.created_by),
mavedb_modifier_contribution(score_set, score_set.modified_by),
],
releaseDate=datetime.strftime(score_set.published_date, "%Y-%m-%d"),
)
59 changes: 45 additions & 14 deletions src/mavedb/lib/annotation/document.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import urllib.parse

from ga4gh.core.entity_models import Document
from ga4gh.core.entity_models import Document, IRI

from mavedb.models.experiment import Experiment
from mavedb.models.score_set import ScoreSet
Expand All @@ -13,34 +13,65 @@
BASE_URL = "https://mavedb.org"


def experiment_as_iri(experiment: Experiment) -> IRI:
"""
Create an IRI as described in <https://datatracker.ietf.org/doc/html/rfc3986#section-4.1> for the provided MaveDB experiment. Within
the context of VA-Spec, these can be used interchangeably with an equivalent document object for brevity.
"""
return f"{BASE_URL}/experiments/{experiment.urn}"


def experiment_to_document(experiment: Experiment) -> Document:
"""
Create a [VA Document](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/information-entities/document.html#document)
object from the provided MaveDB experiment.
"""
return Document(
id=experiment.urn,
# TODO: necessary? better as label?
subtype="MaveDB experiment",
label="MaveDB experiment",
title=experiment.title,
urls=f"{BASE_URL}/experiments/{experiment.urn}",
urls=[experiment_as_iri(experiment)],
)


def score_set_as_iri(score_set: ScoreSet) -> IRI:
"""
Create an IRI as described in <https://datatracker.ietf.org/doc/html/rfc3986#section-4.1> for the provided MaveDB score set. Within
the context of VA-Spec, these can be used interchangeably with an equivalent document object for brevity.
"""
return f"{BASE_URL}/score-sets/{score_set.urn}"


def score_set_to_document(score_set: ScoreSet) -> Document:
"""
Create a [VA Document](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/information-entities/document.html#document)
object from the provided MaveDB score set.
"""
return Document(
id=score_set.urn,
# TODO: necessary? better as label?
subtype="MaveDB score set",
label="MaveDB score set",
title=score_set.title,
urls=f"{BASE_URL}/score-sets/{score_set.urn}",
urls=[score_set_as_iri(score_set)],
)


def variant_as_iri(variant: Variant) -> IRI:
"""
Create an IRI as described in <https://datatracker.ietf.org/doc/html/rfc3986#section-4.1> for the provided MaveDB variant. Within
the context of VA-Spec, these can be used interchangeably with an equivalent document object for brevity.
"""
# TODO: We should decide if this should instead link to the variant measurement page.
# TODO(#372): nullable URN.
return f"https://mavedb.org/score-sets/{variant.score_set.urn}?variant={urllib.parse.quote_plus(variant.urn)}" # type: ignore


def variant_to_document(variant: Variant) -> Document:
"""
Create a [VA Document](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/information-entities/document.html#document)
object from the provided MaveDB variant.
"""
return Document(
id=variant.urn,
# TODO: necessary? better as label?
subtype="MaveDB variant",
# TODO: Maybe the variant specific page? Maybe nothing, if we don't guarantee a
# variant specific page exists?
# TODO: This should be stitched from constants
# TODO(#372)
urls=f"https://mavedb.org/score-sets/{variant.score_set.urn}?variant={urllib.parse.quote_plus(variant.urn)}", # type: ignore
label="MaveDB variant",
urls=[variant_as_iri(variant)],
)
33 changes: 0 additions & 33 deletions src/mavedb/lib/annotation/evidence.py

This file was deleted.

Loading

0 comments on commit 07de0cd

Please sign in to comment.