Skip to content

Commit

Permalink
Merge pull request #18 from phyloref/cleaned-up-taxonomic-units
Browse files Browse the repository at this point in the history
This PR cleans up taxonomic units in several ways:
- Taxonomic units have clear types (stored in the `@type` property), indicating whether they are taxon concepts (with or without a citation) or specimens, allowing us to more easily add taxonomic units for apomorphies and phylorefs later on. (closes #20).
- RDF properties used by taxon concepts and names have been rewritten to bring them more into line with the TDWG ontologies TaxonConcept and TaxonName, including the use of `TaxonName:nameComplete` instead of `dwc:scientificName` (closes #8).
- External references are stored in the `@id` element rather than in a separate property.
- Added support for nomenclatural codes in scientific names (as per phyloref/klados#143). Note that this PR includes an earlier version of the nomenclature code system; this is updated and improved in #26.

This PR also introduces a new `TaxonConceptWrapper` as an analogue to `SpecimenWrapper` and renames the `ScientificNameWrapper` to the shorter `TaxonNameWrapper`.
  • Loading branch information
gaurav authored Jul 11, 2019
2 parents 9f01b8d + edfecfc commit dbedb49
Show file tree
Hide file tree
Showing 16 changed files with 951 additions and 439 deletions.
10 changes: 7 additions & 3 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
/** The Phyx Cache Manager -- used to manage caches across this library. */
const { PhyxCacheManager } = require('./utils/PhyxCacheManager');

/** A ScientificNameWrapper for wrapping scientific names. */
const { ScientificNameWrapper } = require('./wrappers/ScientificNameWrapper');
/** A TaxonConceptWrapper for wrapping taxonomic concepts. */
const { TaxonConceptWrapper } = require('./wrappers/TaxonConceptWrapper');

/** A TaxonNameWrapper for wrapping taxonomic names. */
const { TaxonNameWrapper } = require('./wrappers/TaxonNameWrapper');

/** A SpecimenWrapper for wrapping specimens. */
const { SpecimenWrapper } = require('./wrappers/SpecimenWrapper');
Expand All @@ -46,7 +49,8 @@ const { PhyxWrapper } = require('./wrappers/PhyxWrapper');

/* Exports */
module.exports = {
ScientificNameWrapper,
TaxonConceptWrapper,
TaxonNameWrapper,
SpecimenWrapper,
TaxonomicUnitWrapper,
TaxonomicUnitMatcher,
Expand Down
144 changes: 74 additions & 70 deletions src/matchers/TaxonomicUnitMatcher.js
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
const { has } = require('lodash');
const { ScientificNameWrapper } = require('../wrappers/ScientificNameWrapper');
const { TaxonomicUnitWrapper } = require('../wrappers/TaxonomicUnitWrapper');
const { TaxonConceptWrapper } = require('../wrappers/TaxonConceptWrapper');
const { TaxonNameWrapper } = require('../wrappers/TaxonNameWrapper');
const { SpecimenWrapper } = require('../wrappers/SpecimenWrapper');

/* Taxonomic unit matching */

/**
* The TaxonomicUnitMatcher matches pairs of taxonomic units and provides
* a consistent report on:
* - Which taxonomic units have matched, and
* - Why the match occurred.
*
* In Model 2.0, we start by using direct matching in OWL, so this should no longer
* be needed. However, I'll leave this around to provide matching in the
* Curation Tool UI and in case it's needed again later.
*/
class TaxonomicUnitMatcher {
// A taxonomic unit matcher tests for taxonomic matches between pairs of
// taxonomic units.

/**
* Create a Taxonomic Unit Matcher to match two taxonomic units. Matching
* will occur immediately, so when this method returns, you can check
* tuMatch.matched and tuMatch.matchReason to determine if the two TUs matched
* and why.
*/
constructor(tunit1, tunit2) {
// Construct a Taxonomic Unit Matcher to compare the two provided
// taxonomic units.
this.tunit1 = tunit1;
this.tunit2 = tunit2;

Expand All @@ -22,8 +33,8 @@ class TaxonomicUnitMatcher {
this.match();
}

/** Return this TUMatch as a JSON object for insertion into the PHYX file. */
asJSONLD(idURI) {
// Return this TUMatch as a JSON object for insertion into the PHYX file.
if (!this.matched) return undefined;

return {
Expand All @@ -37,12 +48,12 @@ class TaxonomicUnitMatcher {
};
}

/** Try to match the two taxonomic units using a number of matching methods. */
match() {
// Try to match the two taxonomic units using a number of matching methods.
if (
this.matchByBinomialName()
this.matchByNameComplete()
|| this.matchByExternalReferences()
|| this.matchBySpecimenIdentifier()
|| this.matchByOccurrenceID()
) {
this.matched = true;
} else {
Expand All @@ -51,78 +62,71 @@ class TaxonomicUnitMatcher {
}
}

matchByBinomialName() {
// Try to match by binomial name, and return true if it could be matched.

// Do both TUnits have scientificNames?
if (!has(this.tunit1, 'scientificNames') || !has(this.tunit2, 'scientificNames')) return false;

return this.tunit1.scientificNames.some((scname1) => {
const scname1wrapped = new ScientificNameWrapper(scname1);
return this.tunit2.scientificNames.some((scname2) => {
const scname2wrapped = new ScientificNameWrapper(scname2);

const result = scname1wrapped.binomialName !== undefined
&& scname2wrapped.binomialName !== undefined
&& scname1wrapped.binomialName.trim().length > 0
&& scname1wrapped.binomialName.trim() === scname2wrapped.binomialName.trim();
/** Try to match by nameComplete, and return true if it could be matched. */
matchByNameComplete() {
// Note that this doesn't apply just to taxon concepts -- we try to match
// any taxonomic units that have nameComplete, which might be taxon concepts
// OR specimens with taxonomic units.
const wrappedTName1 = new TaxonConceptWrapper(this.tunit1);
const wrappedTName2 = new TaxonConceptWrapper(this.tunit2);

if (result) {
this.matchReason = `Scientific name '${scname1wrapped.scientificName}' and scientific name '${scname2wrapped.scientificName}' share the same binomial name`;
}
if (
wrappedTName1.nameComplete && wrappedTName2.nameComplete
&& wrappedTName1.nameComplete === wrappedTName2.nameComplete
) {
this.matchReason = `Taxon name '${wrappedTName1.label}' and taxon name '${wrappedTName2.label}' share the same complete name`;
return true;
}

return result;
});
});
return false;
}

/** Match by external references. */
matchByExternalReferences() {
// Try to match by external references.

if (has(this.tunit1, 'externalReferences') && has(this.tunit2, 'externalReferences')) {
// Each external reference is a URL as a string. We will lowercase it,
// but do no other transformation.
return this.tunit1.externalReferences.some(
extref1 => this.tunit2.externalReferences.some((extref2) => {
const result = (
// Make sure that the external reference isn't blank
extref1.trim() !== ''

// And that it is identical after trimming
&& extref1.toLowerCase().trim() === extref2.toLowerCase().trim()
);

if (result) {
const wrappedTUnit1 = new TaxonomicUnitWrapper(this.tunit1);
const wrappedTUnit2 = new TaxonomicUnitWrapper(this.tunit2);

const externalRefs1 = wrappedTUnit1.externalReferences;
const externalRefs2 = wrappedTUnit2.externalReferences;

return externalRefs1.some(
extref1 => externalRefs2.some(
(extref2) => {
if (
extref1
&& extref2
&& (extref1.toLowerCase() === extref2.toLowerCase())
) {
this.matchReason = `External reference '${extref1}' is shared by taxonomic unit ${this.tunit1} and ${this.tunit2}`;
return true;
}

return result;
})
);
}

return false;
return false;
}
)
);
}

matchBySpecimenIdentifier() {
// Try to match by specimen identifier (i.e. occurrence ID).
/** Match by occurrence ID */
matchByOccurrenceID() {
// Are both TUs specimens?
const wrappedTUnit1 = new TaxonomicUnitWrapper(this.tunit1);
const wrappedTUnit2 = new TaxonomicUnitWrapper(this.tunit2);

if (has(this.tunit1, 'includesSpecimens') && has(this.tunit2, 'includesSpecimens')) {
// Convert specimen identifiers (if present) into a standard format and compare those.
return this.tunit1.includesSpecimens.some((specimen1) => {
const specimenURN1 = new SpecimenWrapper(specimen1).occurrenceID;
return this.tunit2.includesSpecimens.some((specimen2) => {
const specimenURN2 = new SpecimenWrapper(specimen2).occurrenceID;
if (!wrappedTUnit1.types.includes(TaxonomicUnitWrapper.TYPE_SPECIMEN)) return false;
if (!wrappedTUnit2.types.includes(TaxonomicUnitWrapper.TYPE_SPECIMEN)) return false;

const result = (specimenURN1 === specimenURN2);
// Occurrence IDs from both taxonomic units.
const wrappedSpecimen1 = new SpecimenWrapper(this.tunit1);
const wrappedSpecimen2 = new SpecimenWrapper(this.tunit2);

if (result) {
this.matchReason = `Specimen identifier '${specimenURN1}' is shared by taxonomic units`;
}
if (
wrappedSpecimen1.occurrenceID && wrappedSpecimen2.occurrenceID
&& wrappedSpecimen1.occurrenceID === wrappedSpecimen2.occurrenceID
) {
this.matchReason = `Specimen identifier '${wrappedSpecimen1.occurrenceID}' is shared by taxonomic units`;

return result;
});
});
return true;
}

return false;
Expand Down
21 changes: 21 additions & 0 deletions src/utils/owlterms.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,25 @@ module.exports = {
PHYLOREFERENCE_TEST_CASE: 'testcase:PhyloreferenceTestCase',
PHYLOREFERENCE_PHYLOGENY: 'testcase:PhyloreferenceTestPhylogeny',
TESTCASE_SPECIFIER: 'testcase:Specifier',
TU_HAS_NAME_PROP: 'http://rs.tdwg.org/ontology/voc/TaxonConcept#hasName',
TU_SPECIMEN_PROP: 'dwc:organismID',

// Terms from CDAO (http://www.obofoundry.org/ontology/cdao.html).
CDAO_TU: 'http://purl.obolibrary.org/obo/CDAO_0000138',

// Terms from the TaxonName ontology
// (https://github.com/tdwg/ontology/blob/master/ontology/voc/TaxonName.rdf).
TDWG_VOC_TAXON_NAME: 'http://rs.tdwg.org/ontology/voc/TaxonName#TaxonName',
TDWG_VOC_TAXON_CONCEPT: 'http://rs.tdwg.org/ontology/voc/TaxonConcept#TaxonConcept',
TDWG_VOC_NAME_COMPLETE: 'http://rs.tdwg.org/ontology/voc/TaxonName#nameComplete',

// Terms from Darwin Core.
DWC_OCCURRENCE: 'http://rs.tdwg.org/dwc/terms/Occurrence',

// Nomenclatural codes from Nomen.
NAME_IN_UNKNOWN_CODE: 'http://purl.obolibrary.org/obo/NOMEN_0000036', // NOMEN:scientific name
ICZN_NAME: 'http://purl.obolibrary.org/obo/NOMEN_0000107',
ICN_NAME: 'http://purl.obolibrary.org/obo/NOMEN_0000109',
ICNP_NAME: 'http://purl.obolibrary.org/obo/NOMEN_0000110',
ICTV_NAME: 'http://purl.obolibrary.org/obo/NOMEN_0000111',
};
5 changes: 4 additions & 1 deletion src/wrappers/PhylogenyWrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ class PhylogenyWrapper {
// If that doesn't work, we can try to extract scientific names from
// the node label. Note that taxonomic units will NOT be extracted from
// the label if there is a taxonomic unit present!
return TaxonomicUnitWrapper.getTaxonomicUnitsFromNodeLabel(nodeLabel.trim());
//
// Note that old-style taxonomic units were lists while new-style taxonomic
// units are single objects. So we turn it into a single entry list here.
return [TaxonomicUnitWrapper.fromLabel(nodeLabel.trim())];
}

getNodeLabelsMatchedBySpecifier(specifier) {
Expand Down
2 changes: 1 addition & 1 deletion src/wrappers/PhyxWrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class PhyxWrapper {

// If the '@context' is missing, add it here.
if (!has(jsonld, '@context')) {
jsonld['@context'] = 'http://www.phyloref.org/phyx.js/v0.1.0/phyx.json';
jsonld['@context'] = 'http://www.phyloref.org/phyx.js/context/v0.1.0/phyx.json';
}

return jsonld;
Expand Down
97 changes: 0 additions & 97 deletions src/wrappers/ScientificNameWrapper.js

This file was deleted.

Loading

0 comments on commit dbedb49

Please sign in to comment.