Skip to content

Commit

Permalink
G3-421: Add mapping fix for symbol differences between AON and GW
Browse files Browse the repository at this point in the history
  • Loading branch information
bergsalex committed Sep 3, 2024
1 parent 2919859 commit 3b863ac
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "geneweaver-client"
version = "0.10.0"
version = "0.10.1a0"
description = "A Python Client for the Geneweaver API"
authors = ["Jax Computational Sciences <[email protected]>"]
readme = "README.md"
Expand Down
62 changes: 59 additions & 3 deletions src/geneweaver/client/api/mapping.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Cross-API Geneset Symbol Mapping."""

import re
from typing import List, Optional

from geneweaver.client.api import aon, genes, genesets
from geneweaver.client.utils.aon import map_symbols
from geneweaver.core.enum import GeneIdentifier, Species
from geneweaver.core.mapping import AON_ID_TYPE_FOR_SPECIES


def ensembl_mouse_mapping(
Expand All @@ -27,11 +29,10 @@ def ensembl_mouse_mapping(

gene_id_type = GeneIdentifier.ENSEMBLE_GENE

if species == Species.HOMO_SAPIENS:
gene_id_type = GeneIdentifier.HGNC
if species != Species.MUS_MUSCULUS:
gene_id_type = AON_ID_TYPE_FOR_SPECIES[species]

response = genesets.get_values(access_token, geneset_id, gene_id_type, in_threshold)

if species == Species.MUS_MUSCULUS:
result = [
{"gene_id": item["symbol"], "score": item["value"]}
Expand All @@ -44,6 +45,7 @@ def ensembl_mouse_mapping(
else:
algorithm_id = None

response = clean_identifiers_for_aon(response, species)
aon_response = aon.ortholog_mapping(
[g["symbol"] for g in response["data"]],
Species.MUS_MUSCULUS,
Expand Down Expand Up @@ -73,3 +75,57 @@ def ensembl_mouse_mapping(
result = [{"gene_id": k, "score": v} for k, v in ensembl_result.items()]

return result


IDENTIFIER_PREFIX_MAP = {
Species.DANIO_RERIO: "ZFIN",
Species.DROSOPHILA_MELANOGASTER: "FB",
Species.CAENORHABDITIS_ELEGANS: "WB",
Species.SACCHAROMYCES_CEREVISIAE: "SGD",
}


def clean_identifiers_for_aon(data: dict, species: Species) -> dict:
"""Clean up identifiers for AON mapping."""
if species in [
Species.DANIO_RERIO,
Species.DROSOPHILA_MELANOGASTER,
Species.CAENORHABDITIS_ELEGANS,
Species.SACCHAROMYCES_CEREVISIAE,
]:
data = {
"data": [
{
"symbol": f"{IDENTIFIER_PREFIX_MAP[species]}:{item['symbol']}",
"value": item["value"],
}
for item in data["data"]
]
}
elif species == Species.RATTUS_NORVEGICUS:
data = {
"data": [
{
"symbol": insert_colon_delimiter(item["symbol"]),
"value": item["value"],
}
for item in data["data"]
]
}

return data


def insert_colon_delimiter(identifier: str) -> str:
"""Separates a prefix from ID and adds a colon delimiter between them."""
if ":" in identifier:
return identifier

match = re.match(r"([A-Za-z]+)(\d+)", identifier)

if match:
prefix, suffix = match.groups()
return f"{prefix}:{suffix}"
else:
# If the identifier does not match the expected pattern
raise ValueError("Identifier format is invalid")

0 comments on commit 3b863ac

Please sign in to comment.