diff --git a/pyproject.toml b/pyproject.toml index df5ebc5..d42cfa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "geneweaver-client" -version = "0.10.0" +version = "0.10.1a0" description = "A Python Client for the Geneweaver API" authors = ["Jax Computational Sciences "] readme = "README.md" diff --git a/src/geneweaver/client/api/mapping.py b/src/geneweaver/client/api/mapping.py index 4ff7188..8d3542d 100644 --- a/src/geneweaver/client/api/mapping.py +++ b/src/geneweaver/client/api/mapping.py @@ -1,10 +1,12 @@ """Cross-API Geneset Symbol Mapping.""" +import re from typing import List, Optional from geneweaver.client.api import aon, genes, genesets from geneweaver.client.utils.aon import map_symbols from geneweaver.core.enum import GeneIdentifier, Species +from geneweaver.core.mapping import AON_ID_TYPE_FOR_SPECIES def ensembl_mouse_mapping( @@ -27,11 +29,10 @@ def ensembl_mouse_mapping( gene_id_type = GeneIdentifier.ENSEMBLE_GENE - if species == Species.HOMO_SAPIENS: - gene_id_type = GeneIdentifier.HGNC + if species != Species.MUS_MUSCULUS: + gene_id_type = AON_ID_TYPE_FOR_SPECIES[species] response = genesets.get_values(access_token, geneset_id, gene_id_type, in_threshold) - if species == Species.MUS_MUSCULUS: result = [ {"gene_id": item["symbol"], "score": item["value"]} @@ -44,6 +45,7 @@ def ensembl_mouse_mapping( else: algorithm_id = None + response = clean_identifiers_for_aon(response, species) aon_response = aon.ortholog_mapping( [g["symbol"] for g in response["data"]], Species.MUS_MUSCULUS, @@ -73,3 +75,57 @@ def ensembl_mouse_mapping( result = [{"gene_id": k, "score": v} for k, v in ensembl_result.items()] return result + + +IDENTIFIER_PREFIX_MAP = { + Species.DANIO_RERIO: "ZFIN", + Species.DROSOPHILA_MELANOGASTER: "FB", + Species.CAENORHABDITIS_ELEGANS: "WB", + Species.SACCHAROMYCES_CEREVISIAE: "SGD", +} + + +def clean_identifiers_for_aon(data: dict, species: Species) -> dict: + """Clean up identifiers for AON mapping.""" + if species in [ + Species.DANIO_RERIO, + Species.DROSOPHILA_MELANOGASTER, + Species.CAENORHABDITIS_ELEGANS, + Species.SACCHAROMYCES_CEREVISIAE, + ]: + data = { + "data": [ + { + "symbol": f"{IDENTIFIER_PREFIX_MAP[species]}:{item['symbol']}", + "value": item["value"], + } + for item in data["data"] + ] + } + elif species == Species.RATTUS_NORVEGICUS: + data = { + "data": [ + { + "symbol": insert_colon_delimiter(item["symbol"]), + "value": item["value"], + } + for item in data["data"] + ] + } + + return data + + +def insert_colon_delimiter(identifier: str) -> str: + """Separates a prefix from ID and adds a colon delimiter between them.""" + if ":" in identifier: + return identifier + + match = re.match(r"([A-Za-z]+)(\d+)", identifier) + + if match: + prefix, suffix = match.groups() + return f"{prefix}:{suffix}" + else: + # If the identifier does not match the expected pattern + raise ValueError("Identifier format is invalid")