Skip to content

Commit

Permalink
improves sample and rdfdumper
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Jan 28, 2024
1 parent 46a9963 commit 0456de1
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 25 deletions.
1 change: 0 additions & 1 deletion lodstorage/linkml.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class Class:
"""
Represents a class in the LinkML schema.
"""

description: str
slots: List[Slot]

Expand Down
72 changes: 49 additions & 23 deletions lodstorage/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
@author: wf, using ChatGPT-4 prompting
"""
from dataclasses import fields
from collections.abc import Iterable, Mapping

from rdflib import BNode, Graph, Literal, Namespace, URIRef
from rdflib.namespace import RDF

from lodstorage.linkml_gen import PythonTypes, Schema

from typing import Any

class RDFDumper:
"""
Expand Down Expand Up @@ -51,57 +52,82 @@ def serialize(self, rdf_format: str = "turtle") -> str:
str: The serialized RDF graph.
"""
return self.graph.serialize(format=rdf_format)


def process_class(self, class_name: str, instance_data: object):
class_obj = self.schema.classes[class_name]
class_uri = URIRef(self.namespaces[self.schema.default_prefix][class_name])
def value_iterator(self, value: Any):
"""
Iterates over values in a mapping or iterable.
Args:
value: The value to iterate over. It can be a mapping, iterable, or a single value.
Yields:
Tuples of (key, value) from the input value. For single values, key is None.
"""
if isinstance(value, Mapping):
yield from value.items()
elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
yield from ((None, v) for v in value)
else:
yield (None, value)

def process_class(self, class_name: str, instance_data: object):
# Get the base namespace URI
self.base_uri = self.namespaces[self.schema.default_prefix]
# get the class object
# class_obj = self.schema.classes[class_name]
# Construct class_uri using the namespace and class_name with a separator
class_uri = URIRef(f"{self.base_uri}:{class_name}")

# Create a unique URI or a Blank Node for the instance
instance_uri = self.get_instance_uri(class_obj, instance_data)
instance_uri = self.get_instance_uri(instance_data)

# Type the instance with its class
self.graph.add((instance_uri, RDF.type, class_uri))

# loop over all fields
for field_info in fields(instance_data):
slot_name = field_info.name
# assure we only work on fields defined
# in our schema
slot_obj = self.schema.slots.get(slot_name)
if not slot_obj:
continue

field_uri = URIRef(self.namespaces[self.schema.default_prefix][slot_name])
# Combine the namespace with the slot name to form the field URI
field_uri = URIRef(f"{self.base_uri}:{slot_name}")
field_value = getattr(instance_data, slot_name, None)

if field_value is not None:
if isinstance(field_value, list):
# Handle multivalued fields
for item in field_value:
# Use value_iterator to handle different types of values
for key, item in self.value_iterator(field_value):
if key is not None:
# Handle as a mapping
key_uri = URIRef(self.namespaces[self.schema.default_prefix][key])
self.graph.add((instance_uri, field_uri, key_uri))
self.graph.add((key_uri, RDF.value, self.convert_to_literal(item, slot_obj)))
else:
# Handle as a single value or an item from an iterable
# Check if item has an 'identifier' property
if hasattr(item, 'identifier') and getattr(item, 'identifier'):
item_uri = self.get_instance_uri(item)
self.graph.add((instance_uri, field_uri, item_uri))
self.process_class(item.__class__.__name__, item)
else:
self.graph.add(
(
instance_uri,
field_uri,
self.convert_to_literal(item, slot_obj),
)
)
else:
# Handle single valued fields
self.graph.add(
(
instance_uri,
field_uri,
self.convert_to_literal(field_value, slot_obj),
)
)

def get_instance_uri(self, class_obj, instance_data):
def get_instance_uri(self, instance_data):
"""
Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
Otherwise, it generates or retrieves a unique URI.
"""
base_uri = self.namespaces[self.schema.default_prefix]
if hasattr(instance_data, 'identifier') and getattr(instance_data, 'identifier'):
identifier = getattr(instance_data, 'identifier')
return URIRef(f"{base_uri}{identifier}")
return URIRef(f"{self.base_uri}:{identifier}")
else:
# Fallback to a blank node if no identifier is found
return BNode()
Expand Down
2 changes: 1 addition & 1 deletion lodstorage/sample2.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def identifier(self)->str:
Generates a unique identifier for the Royal instance.
The identifier is a combination of a slugified name and the Wikidata ID (if available).
"""
slugified_name = slugify(self.name, lowercase=False, regex_pattern=r'[^\w\s\-]')
slugified_name = slugify(self.name, lowercase=False, regex_pattern=r'[^\w\-]')
if self.wikidata_id:
return f"{slugified_name}-{self.wikidata_id}"
return slugified_name
Expand Down

0 comments on commit 0456de1

Please sign in to comment.