Skip to content

Commit

Permalink
feat: changed metadata RD-XML export to maintain order
Browse files Browse the repository at this point in the history
  • Loading branch information
jonrkarr committed Oct 22, 2021
1 parent d35b4eb commit 819344b
Showing 1 changed file with 83 additions and 20 deletions.
103 changes: 83 additions & 20 deletions biosimulators_utils/omex_meta/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
from ..config import get_config, Config # noqa: F401
from .data_model import (Triple, OmexMetadataOutputFormat, OmexMetadataSchema,
BIOSIMULATIONS_ROOT_URI_PATTERN,
BIOSIMULATIONS_PREDICATE_TYPES)
BIOSIMULATIONS_PREDICATE_TYPES,
BIOSIMULATIONS_NAMESPACE_PREFIX_MAP)
from .utils import get_local_combine_archive_content_uri, get_global_combine_archive_content_uri
from .validation import validate_biosimulations_metadata
from lxml import etree
import abc
import collections
import json
import os
import pyomexmeta
Expand Down Expand Up @@ -363,28 +366,88 @@ def run(self, triples, filename, namespaces=None, config=None):
if config is None:
config = get_config()

graph = rdflib.Graph()
for prefix, namespace in (namespaces or {}).items():
graph.namespace_manager.bind(prefix, namespace)
# graph.namespace_manager.bind('omexLibrary', rdflib.Namespace('http://omex-library.org/'))
# graph.namespace_manager.bind('identifiers', rdflib.Namespace('http://identifiers.org/'))
if config.OMEX_METADATA_OUTPUT_FORMAT == OmexMetadataOutputFormat.turtle:
graph = rdflib.Graph()
for prefix, namespace in (namespaces or {}).items():
graph.namespace_manager.bind(prefix, namespace)
# graph.namespace_manager.bind('omexLibrary', rdflib.Namespace('http://omex-library.org/'))
# graph.namespace_manager.bind('identifiers', rdflib.Namespace('http://identifiers.org/'))

for triple in triples:
graph.add((triple.subject, triple.predicate, triple.object))

if config.OMEX_METADATA_OUTPUT_FORMAT == OmexMetadataOutputFormat.rdfxml:
graph.serialize(filename, format="xml")
for triple in triples:
graph.add((triple.subject, triple.predicate, triple.object))

elif config.OMEX_METADATA_OUTPUT_FORMAT == OmexMetadataOutputFormat.turtle:
graph.serialize(filename, format="turtle")

else:
graph.serialize(filename, format="xml", version="1.0")
def get_uri_namespace_id(uri):
if '#' in uri:
namespace, _, id = uri.rpartition('#')
namespace += '#'
return (namespace, id)
elif '/' in uri:
namespace, _, id = uri.rpartition('/')
namespace += '/'
return (namespace, id)
raise ValueError('URI `{}` does not belong to a namespace'.format(uri))

namespace_prefix_map = dict(BIOSIMULATIONS_NAMESPACE_PREFIX_MAP)
namespaces = namespaces or {}
namespaces['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'

bnode_ids = collections.OrderedDict()

for triple in triples:
namespace, _ = get_uri_namespace_id(str(triple.predicate))
if namespace in namespace_prefix_map:
prefix = namespace_prefix_map[namespace]
else:
prefix = 'ns{}'.format(len(namespaces))
namespace_prefix_map[namespace] = prefix
namespaces[prefix] = namespace

if isinstance(triple.subject, rdflib.term.BNode) and triple.subject not in bnode_ids:
bnode_ids[triple.subject] = '{:07d}'.format(len(bnode_ids))

if isinstance(triple.object, rdflib.term.BNode) and triple.object not in bnode_ids:
bnode_ids[triple.object] = '{:07d}'.format(len(bnode_ids))

root = etree.Element("{{{}}}RDF".format(namespaces['rdf']), nsmap=namespaces)

for id, node in bnode_ids.items():
node = etree.Element("{{{}}}Description".format(namespaces['rdf']), nsmap=namespaces)
root.append(node)

for triple in triples:
subject = etree.Element("{{{}}}Description".format(namespaces['rdf']), nsmap=namespaces)
if isinstance(triple.subject, rdflib.term.URIRef):
subject.attrib["{{{}}}about".format(namespaces['rdf'])] = str(triple.subject)
else:
subject.attrib["{{{}}}nodeID".format(namespaces['rdf'])] = bnode_ids[triple.subject]
root.append(subject)

namespace, id = get_uri_namespace_id(str(triple.predicate))
predicate = etree.Element("{{{}}}{}".format(namespace, id), nsmap=namespaces)

if isinstance(triple.object, rdflib.term.URIRef):
predicate.attrib["{{{}}}resource".format(namespaces['rdf'])] = str(triple.object)
if isinstance(triple.object, rdflib.term.BNode):
predicate.attrib["{{{}}}nodeID".format(namespaces['rdf'])] = bnode_ids[triple.object]
else:
predicate.text = str(triple.object)

subject.append(predicate)

etree.ElementTree(root).write(filename,
xml_declaration=True,
encoding="utf-8",
standalone=False,
pretty_print=True)

rdf = pyomexmeta.RDF.from_file(filename, 'rdfxml')
if rdf.to_file(filename, config.OMEX_METADATA_OUTPUT_FORMAT.value) != 0:
raise RuntimeError('Metadata could not be saved to `{}` in `{}` format.'.format(
filename, config.OMEX_METADATA_OUTPUT_FORMAT.value))
if config.OMEX_METADATA_OUTPUT_FORMAT != OmexMetadataOutputFormat.rdfxml:
rdf = pyomexmeta.RDF.from_file(filename, 'rdfxml')
if rdf.to_file(filename, config.OMEX_METADATA_OUTPUT_FORMAT.value) != 0:
raise RuntimeError('Metadata could not be saved to `{}` in `{}` format.'.format(
filename, config.OMEX_METADATA_OUTPUT_FORMAT.value))


class BiosimulationsOmexMetaReader(OmexMetaReader):
Expand Down Expand Up @@ -484,11 +547,11 @@ def get_combine_archive_uri(cls, triples):

if len(archive_uris) == 0:
msg = 'File does not contain metadata about an OMEX archive.'
return(None, [[msg]], [])
return (None, [[msg]], [])

elif len(archive_uris) > 1:
msg = 'File contains metadata about multiple OMEX archives. File must contains data about 1 archive.'
return(None, [[msg]], [])
msg = 'File contains metadata about multiple OMEX archives. File must contain data about 1 archive.'
return (None, [[msg, [[uri] for uri in archive_uris]]], [])

else:
return (list(archive_uris)[0], [], [])
Expand Down

0 comments on commit 819344b

Please sign in to comment.