Skip to content

Commit

Permalink
Merge pull request #48 from cthoyt/add-bioregistry-prefixes
Browse files Browse the repository at this point in the history
Add prefix synonyms from the Bioregistry
  • Loading branch information
sierra-moxon authored Nov 6, 2023
2 parents fc03487 + d6c5624 commit 71d3db0
Show file tree
Hide file tree
Showing 8 changed files with 1,683 additions and 42 deletions.
413 changes: 413 additions & 0 deletions src/prefixmaps/data/bioregistry.csv

Large diffs are not rendered by default.

413 changes: 413 additions & 0 deletions src/prefixmaps/data/bioregistry.upper.csv

Large diffs are not rendered by default.

434 changes: 415 additions & 19 deletions src/prefixmaps/data/merged.csv

Large diffs are not rendered by default.

434 changes: 415 additions & 19 deletions src/prefixmaps/data/merged.oak.csv

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/prefixmaps/data/prefixcc.csv
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ prefixcc,environ,http://eulersharp.sourceforge.net/2003/03swap/environment#,cano
prefixcc,eol,http://purl.org/biodiversity/eol/,canonical
prefixcc,ep,http://eprints.org/ontology/,namespace_alias
prefixcc,epcis,https://ns.gs1.org/epcis/,canonical
prefixcc,epo,http://data.europa.eu/a4g/ontology#,canonical
prefixcc,eppl,https://w3id.org/ep-plan#,canonical
prefixcc,epplan,https://w3id.org/ep-plan#,namespace_alias
prefixcc,eppo,https://gd.eppo.int/taxon/,canonical
Expand Down Expand Up @@ -929,7 +930,7 @@ prefixcc,gfo,http://www.onto-med.de/ontologies/gfo.owl#,canonical
prefixcc,gg,http://www.gemeentegeschiedenis.nl/gg-schema#,canonical
prefixcc,ggbn,http://data.ggbn.org/schemas/ggbn/terms/,canonical
prefixcc,ghga,http://w3id.org/ghga/,canonical
prefixcc,gist,http://ontologies.semanticarts.com/gist#,canonical
prefixcc,gist,http://ontologies.semanticarts.com/gist/,canonical
prefixcc,giving,http://ontologi.es/giving#,canonical
prefixcc,gl,http://schema.geolink.org/,canonical
prefixcc,gldp,http://www.w3.org/ns/people#,canonical
Expand Down Expand Up @@ -1209,7 +1210,7 @@ prefixcc,lda,http://purl.org/linked-data/api/vocab#,namespace_alias
prefixcc,ldap,http://purl.org/net/ldap/,canonical
prefixcc,ldc,https://tac.nist.gov/tracks/SM-KBP/2018/ontologies/SeedlingOntology#,canonical
prefixcc,lden,http://www.linklion.org/lden/,canonical
prefixcc,ldes,http://w3id.org/ldes#,canonical
prefixcc,ldes,https://w3id.org/ldes#,canonical
prefixcc,ldl,https://w3id.org/ldpdl/ns#,canonical
prefixcc,ldn,https://www.w3.org/TR/ldn/#,canonical
prefixcc,ldp,http://www.w3.org/ns/ldp#,canonical
Expand Down
6 changes: 5 additions & 1 deletion src/prefixmaps/data/w3id.csv
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ w3id,drone,https://w3id.org/drone/,canonical
w3id,dsa,https://w3id.org/dsa/,canonical
w3id,dsd,https://w3id.org/dsd/,canonical
w3id,dso,https://w3id.org/dso/,canonical
w3id,dspace,https://w3id.org/dspace/,canonical
w3id,dstv,https://w3id.org/dstv/,canonical
w3id,dsv-ontology,https://w3id.org/dsv-ontology/,canonical
w3id,dts,https://w3id.org/dts/,canonical
Expand Down Expand Up @@ -351,6 +352,7 @@ w3id,euro-nmd-kpis,https://w3id.org/euro-nmd-kpis/,canonical
w3id,EUTaxO,https://w3id.org/EUTaxO/,canonical
w3id,evdoxus,https://w3id.org/evdoxus/,canonical
w3id,EVENTSKG-Dataset,https://w3id.org/EVENTSKG-Dataset/,canonical
w3id,everycred,https://w3id.org/everycred/,canonical
w3id,EVI,https://w3id.org/EVI/,canonical
w3id,EXACT,https://w3id.org/EXACT/,canonical
w3id,examode,https://w3id.org/examode/,canonical
Expand Down Expand Up @@ -402,6 +404,7 @@ w3id,fog,https://w3id.org/fog/,canonical
w3id,food,https://w3id.org/food/,canonical
w3id,foodallergen,https://w3id.org/foodallergen/,canonical
w3id,foodie,https://w3id.org/foodie/,canonical
w3id,foodpairing_inspire_kg,https://w3id.org/foodpairing_inspire_kg/,canonical
w3id,foops,https://w3id.org/foops/,canonical
w3id,forecasting4eepsa,https://w3id.org/forecasting4eepsa/,canonical
w3id,fox,https://w3id.org/fox/,canonical
Expand Down Expand Up @@ -619,6 +622,7 @@ w3id,media,https://w3id.org/media/,canonical
w3id,mediatype,https://w3id.org/mediatype/,canonical
w3id,medred,https://w3id.org/medred/,canonical
w3id,medtag,https://w3id.org/medtag/,canonical
w3id,meta,https://w3id.org/meta/,canonical
w3id,meta-share,https://w3id.org/meta-share/,canonical
w3id,metadocencia,https://w3id.org/metadocencia/,canonical
w3id,metamodeling,https://w3id.org/metamodeling/,canonical
Expand Down Expand Up @@ -700,7 +704,6 @@ w3id,oac,https://w3id.org/oac/,canonical
w3id,oak,https://w3id.org/oak/,canonical
w3id,obda,https://w3id.org/obda/,canonical
w3id,obelisk,https://w3id.org/obelisk/,canonical
w3id,obio,https://w3id.org/obio/,canonical
w3id,obo,https://w3id.org/obo/,canonical
w3id,obpa,https://w3id.org/obpa/,canonical
w3id,obpd,https://w3id.org/obpd/,canonical
Expand All @@ -716,6 +719,7 @@ w3id,oebDatasets,https://w3id.org/oebDatasets/,canonical
w3id,oerbase,https://w3id.org/oerbase/,canonical
w3id,ofo,https://w3id.org/ofo/,canonical
w3id,oho,https://w3id.org/oho/,canonical
w3id,okg,https://w3id.org/okg/,canonical
w3id,okn,https://w3id.org/okn/,canonical
w3id,olu,https://w3id.org/olu/,canonical
w3id,omg,https://w3id.org/omg/,canonical
Expand Down
10 changes: 9 additions & 1 deletion src/prefixmaps/ingest/ingest_bioregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
# Problematic records, look into later
SKIP = {"gro"}

#: Problematic synonym records cause the issue in https://github.com/linkml/prefixmaps/issues/50
PROBLEMATIC_CURIE_PREFIX_SYNONYMS_RECORDS = {"wikidata"}


def from_bioregistry_upper(**kwargs) -> Context:
"""
Expand Down Expand Up @@ -76,5 +79,10 @@ def from_bioregistry(upper=False, canonical_idorg=True, filter_dubious=True) ->
continue
preferred = record.prefix == bioregistry.get_preferred_prefix(record.prefix)
context.add_prefix(record.prefix, record.uri_prefix, preferred=preferred)
# TODO add synonyms, do in later PR since it will increase diff and complexity of review
if record.prefix not in PROBLEMATIC_CURIE_PREFIX_SYNONYMS_RECORDS:
for s in record.prefix_synonyms:
context.add_prefix(s, record.uri_prefix, preferred=preferred)
# TODO future, add URI prefix synonyms
# for s in record.uri_prefix_synonyms:
# context.add_prefix(record.prefix, s, status=StatusType.namespace_alias, preferred=preferred)
return context
10 changes: 10 additions & 0 deletions tests/test_core/test_prefixmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import unittest

import prefixmaps
from prefixmaps.datamodel.context import StatusType
from prefixmaps.io.parser import context_from_file, load_context, load_multi_context
from prefixmaps.io.writer import context_to_file
Expand Down Expand Up @@ -183,3 +184,12 @@ def test_from_upstream(self):
# def test_meta(self):
# ctxts = load_contexts_meta()
# print(ctxts)

def test_synonyms(self):
canonical = "PUBMED:1234"
converter = prefixmaps.load_converter("merged")
# TODO "pmid:1234", "pubmed:1234"
others = ["PMID:1234", "MEDLINE:1234", canonical]
for curie in others:
with self.subTest(curie=curie):
self.assertEqual(canonical, converter.standardize_curie(curie))

0 comments on commit 71d3db0

Please sign in to comment.