diff --git a/README.md b/README.md index c5a2bcb5..87d5ba6a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -[![Build Status](https://travis-ci.org/alliance-genome/agr_loader.svg?branch=develop)](https://travis-ci.org/alliance-genome/agr_loader) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/5259a0e847c04c72a4a9c4f34fabfed5)](https://www.codacy.com/project/christabone/agr_loader/dashboard?utm_source=github.com&utm_medium=referral&utm_content=alliance-genome/agr_loader&utm_campaign=Badge_Grade_Dashboard) # Alliance of Genome Resources Loader @@ -74,3 +73,4 @@ _Reminder_: authentification needs to be renewed every time you get an error lik ``` Error response from daemon: pull access denied for 100225593120.dkr.ecr.us-east-1.amazonaws.com/agr_neo4j_env, repository does not exist or may require 'docker login': denied: Your authorization token has expired. Reauthenticate and try again. ``` + diff --git a/src/etl/disease_etl.py b/src/etl/disease_etl.py index 38633f73..6f08c086 100644 --- a/src/etl/disease_etl.py +++ b/src/etl/disease_etl.py @@ -315,7 +315,7 @@ def process_pages(self, dp, xrefs, pages): for page in pages: if (self.data_provider == 'RGD' or self.data_provider == 'HUMAN') and prefix == 'DOID': display_name = 'RGD' - elif (self.data_provider == 'RGD' or self.data_provider == 'HUMAN') and prefix == 'OMIM': + elif (self.data_provider == 'RGD' or self.data_provider == 'HUMAN') and (prefix == 'OMIM' or prefix == 'MIM'): display_name = 'OMIM' else: display_name = cross_ref_id.split(":")[0] diff --git a/src/etl/helpers/etl_helper.py b/src/etl/helpers/etl_helper.py index 2e3bf442..3a055c96 100644 --- a/src/etl/helpers/etl_helper.py +++ b/src/etl/helpers/etl_helper.py @@ -154,7 +154,7 @@ def get_xref_dict(local_id, prefix, cross_ref_type, page, def get_complete_url_ont(self, local_id, global_id, key=None): """Get Complete 'ont'.""" page = None - if 'OMIM:PS' in global_id: + if 'OMIM:PS' in global_id or 'MIM:PS' in global_id: page = 'ont' if not key: # split not done before hand diff --git a/src/etl/helpers/resource_descriptor_helper_2.py b/src/etl/helpers/resource_descriptor_helper_2.py index 7c9844e4..9e8ce5aa 100644 --- a/src/etl/helpers/resource_descriptor_helper_2.py +++ b/src/etl/helpers/resource_descriptor_helper_2.py @@ -302,7 +302,13 @@ def return_url(self, identifier, page): def return_url_from_identifier(self, identifier, page=None): """Return URL for an identifier.""" db_prefix, identifier_stripped, separator = self.split_identifier(identifier) - + + # Normalize the identifier + # Special case for EFO. + if db_prefix and db_prefix.upper() == "EFO": + if not identifier_stripped.startswith("EFO_"): + identifier_stripped = "EFO_" + identifier_stripped + key = self.get_key(db_prefix, identifier) if not key: return None diff --git a/src/test/specific_tests.py b/src/test/specific_tests.py index 07b75707..4cf27ff0 100644 --- a/src/test/specific_tests.py +++ b/src/test/specific_tests.py @@ -1168,7 +1168,7 @@ def test_human_dej_has_omim_full_url_cross_reference(): """Test Human DEJ has OMIM Full URL Cross Reference""" query = """MATCH (g:Gene)--(dej:DiseaseEntityJoin)--(cr:CrossReference) - WHERE cr.crossRefCompleteUrl = 'https://www.omim.org/entry/605242' + WHERE cr.crossRefCompleteUrl = 'https://www.omim.org/MIM:605242' RETURN count(cr) AS counter""" with Neo4jHelper.run_single_query(query) as result: for record in result: diff --git a/src/test/unit_tests.py b/src/test/unit_tests.py index 40dc0038..4f000660 100644 --- a/src/test/unit_tests.py +++ b/src/test/unit_tests.py @@ -73,7 +73,7 @@ def test_url_lookup(self): 'result': 'https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&code=C5604'}, {'local_id': 'badregexdoesnotmatch', 'global_id': 'MESH:badregexdoesnotmatch', 'result': 'https://www.ncbi.nlm.nih.gov/mesh/badregexdoesnotmatch'}, - {'local_id': 'Cdiff', 'global_id': 'MIM:1111', 'result': 'https://www.omim.org/entry/1111'}] + {'local_id': 'Cdiff', 'global_id': 'MIM:1111', 'result': 'https://www.omim.org/MIM:1111'}] for item in lookups: url = self.etlh.get_complete_url_ont(item['local_id'], item['global_id'])