From e2b9c64913fd7f836ccc5b45d9c46bd8ce3f95bf Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Wed, 17 Jan 2024 01:25:14 -0500 Subject: [PATCH] First stab at normalize() static methods for all wrappers. --- src/wrappers/CitationWrapper.js | 21 ++++++++++++++++++++- src/wrappers/PhylogenyWrapper.js | 20 +++++++++++++++++++- src/wrappers/PhylorefWrapper.js | 14 ++++++++++++++ src/wrappers/PhyxWrapper.js | 24 ++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 2 deletions(-) diff --git a/src/wrappers/CitationWrapper.js b/src/wrappers/CitationWrapper.js index fff84959..5461ad27 100644 --- a/src/wrappers/CitationWrapper.js +++ b/src/wrappers/CitationWrapper.js @@ -3,7 +3,7 @@ * Based on BibJSON (http://okfnlabs.org/bibjson/). */ -const { has, isEmpty } = require('lodash'); +const { has, isEmpty, cloneDeep } = require('lodash'); class CitationWrapper { /** @@ -13,6 +13,25 @@ class CitationWrapper { this.citation = citation; } + /** + * Return a normalized form of a citation. + * + * I'm not really sure how to normalize a citation, but the main thing we can do is delete any key + * that is equivalent to ''. We could interconvert between `name` and `firstname/lastname/middlename`, + * but that's not really equivalent, is it? + */ + static normalize(citation) { + const normalizedCitation = {}; + Object.keys(citation).forEach((key) => { + // As long as citation[key] has a reasonable value, we copy it into the normalized citation. + if (citation[key]) { + normalizedCitation[key] = citation[key]; + } + }); + return normalizedCitation; + } + + /** * Helper method to return a single name for a given agent entry. * The algorithm we use is: diff --git a/src/wrappers/PhylogenyWrapper.js b/src/wrappers/PhylogenyWrapper.js index fc9cd221..5925eba6 100644 --- a/src/wrappers/PhylogenyWrapper.js +++ b/src/wrappers/PhylogenyWrapper.js @@ -2,7 +2,9 @@ * PhylogenyWrapper */ -const { has } = require('lodash'); +const { has, + cloneDeep +} = require('lodash'); /** Used to parse Newick strings. */ const newickJs = require('newick-js'); @@ -34,6 +36,22 @@ class PhylogenyWrapper { this.defaultNomenCode = defaultNomenCode; } + /** + * Return a normalized form of the phylogeny. + */ + static normalize(phylogeny) { + const normalizedPhylogeny = cloneDeep(phylogeny); + + // We could normalize the Newick string, but that doesn't seem very nice. + + // Normalize the source if there is one. + if ('source' in phylogeny) { + normalizedPhylogeny.source = CitationWrapper.normalize(phylogeny.source || {}); + } + + return normalizedPhylogeny; + } + static getErrorsInNewickString(newick) { // Given a Newick string, return a list of errors found in parsing this // string. The errors are returned as a list of objects, each of which diff --git a/src/wrappers/PhylorefWrapper.js b/src/wrappers/PhylorefWrapper.js index f6e6d180..9afb4f22 100644 --- a/src/wrappers/PhylorefWrapper.js +++ b/src/wrappers/PhylorefWrapper.js @@ -33,6 +33,20 @@ class PhylorefWrapper { return this.phyloref.internalSpecifiers; } + /** + * Normalize a phyloreference. + * + * @param phyloref + */ + static normalize(phyloref) { + const normalizedPhyloref = cloneDeep(phyloref); + + normalizedPhyloref.internalSpecifiers = (phyloref.internalSpecifiers || []).map(TaxonomicUnitWrapper.normalize); + normalizedPhyloref.externalSpecifiers = (phyloref.internalSpecifiers || []).map(TaxonomicUnitWrapper.normalize); + + return normalizedPhyloref; + } + /** Return the external specifiers of this phyloref (if any). */ get externalSpecifiers() { if (!has(this.phyloref, 'externalSpecifiers')) { diff --git a/src/wrappers/PhyxWrapper.js b/src/wrappers/PhyxWrapper.js index 93097682..4a0b9694 100644 --- a/src/wrappers/PhyxWrapper.js +++ b/src/wrappers/PhyxWrapper.js @@ -51,6 +51,30 @@ class PhyxWrapper { return owlterms.UNKNOWN_CODE; } + /** + * Return a provided Phyx document as a normalized JSON document. We ignore most keys -- including + * keys we don't know -- but any key that can be wrapped by one of the other Wrappers in this package + * will be wrapped and normalized before being returned. + * + * Normalization is mostly needed for TaxonomicUnitWrappers and its subclasses (TaxonConceptWrapper, + * TaxonNameWrapper), since these can be represented in several essentially identical ways. But if we + * implement it at every level, we can implement comparison code in Klados easily. + * + * Two Phyx documents should -- upon being normalized -- be comparable with each other with + * lodash.deepEqual(). + */ + static normalize(phyxDocument) { + const normalizedDocument = cloneDeep(phyxDocument); + + normalizedDocument.phylorefs = (phyxDocument.phylorefs || []).map(PhylorefWrapper.normalize); + normalizedDocument.phylogenies = (phyxDocument.phylogenies || []).map(PhylogenyWrapper.normalize); + if ('source' in phyxDocument) { + normalizedDocument.source = CitationWrapper.normalize(phyxDocument.source); + } + + return normalizedDocument; + } + /** * Generate an executable ontology from this Phyx document. The document is mostly in JSON-LD * already, except for three important things: