forked from nlasolle/omekas2rdf
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtriplesCreation.py
146 lines (117 loc) · 4.89 KB
/
triplesCreation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import requests, json, rdflib, sys, logging
from rdflib import Graph, RDF, RDFS, URIRef, Literal, Namespace
from rdflib.namespace import XSD
from constants import *
#RDFLib library is used to create the RDF document (in Turtle syntax)
#Go here to find more details: https://rdflib.readthedocs.io/en/stable/gettingstarted.html
def initializeRDFdatabase():
logging.info("Adding namespaces to the RDF graph")
#Initializes the RDF graph
graph = Graph()
#Add defined namespaces to the graph
for key, value in namespaces.items():
graph.bind(key, value)
return graph
#Save an RDF graph to a file of one of the specified format (RDF/XML, turtle, etc.)
def saveGraphToFile(graph, category, format):
#Get the name of the file to
if category == ITEMS:
file = FILES_REPOSITORY + ITEMS_FILE
elif category == MEDIAS:
file = FILES_REPOSITORY + MEDIAS_FILE
else:
file = FILES_REPOSITORY + COLLECTIONS_FILE
logging.info("Saving graph to file " + file + " using " + FORMAT + " serialization.")
try:
graph.serialize(destination = file, format = format)
except:
logging.exception("An error occured during the creation of the RDF file: " + file)
logging.exception("Exception message:", exc_info=True)
#Add the given items to the RDF database by creating appropriate triples
def createItemsTriples(items, graph):
for item in items:
try:
#The uri
uri = URIRef(item["@id"])
#The label
graph.add( (uri, RDFS.label, Literal(item["o:title"])) )
#A resource may be part of several item sets
if "o:item_set" in item:
for item_set in item["o:item_set"]:
graph.add( (uri, O.item_set, URIRef(item_set["@id"].strip())) )
#All properties
for key in item:
#2 conditions are required to save only required property and values
#The first thing to check is that the format of the key is "prefix:value" (ex. dcterms:subject)
#The second thing is to avoid saving Omeka S related content (ex. "o:resource_class"
# or "o-module-mapping:marker")
#if only retrieve (isinstance(item[key], list) and
#in other cases a Warning strange prefix is logged
if ":" in key and not (key.startswith("o:") or key.startswith("o-module")):
prefix = key[0:key.index(":")]
if prefix not in namespaces:
# a non declared namespace,
# often an omeka module property (ex: "o-module-mapping:" for map module)
# go to next key
logging.info(f"prefix {prefix} not found for item {item}")
continue
for element in item[key]:
#Omeka returns predicate under the form "prefix:value" (ex. dcterms:subject)
#But RDFlib method needs the full URI to create the RDF node (ex. http://purl.org/dc/terms/subject )
#which is part of a triple (prefixes given as keys of json resources)
if len(prefix) > 0:
predicate = URIRef(namespaces[prefix] + key[len(prefix) + 1:])
if "@value" in element:
graph.add( (uri, predicate, Literal(element["@value"])) )
if "@id" in element:
if element["@id"].strip().startswith("http"):
graph.add( (uri, predicate, URIRef(element["@id"].strip())) )
else:
graph.add( (uri, predicate, Literal(element["@id"].strip())) )
# We want to save the type associated with items,
# but not saving that every item is an Omeka item (o:item)
for type in item["@type"]:
if ":" in type and not type.startswith("o:"):
prefix = type[0:type.index(":")]
if len(prefix) > 0:
object = URIRef(namespaces[prefix] + type[len(prefix) + 1:])
graph.add( (uri, RDF.type, object) )
except:
logging.exception("An error occured for item with id: " + str(item["@id"]))
logging.exception("Exception message:", exc_info=True)
continue #Go to next item
def createMediasTriples(medias, graph):
for media in medias:
try:
#The uri
uri = URIRef(media["@id"])
#The type
if "o-cnt" in media["@type"]:
graph.add( (uri, RDF.type, URIRef(O_CNT + media["@type"][6:])))
if "o-cnt:chars" in media:
graph.add( (uri, O_CNT.chars, Literal(media["o-cnt:chars"])) )
else:
graph.add( (uri, RDF.type, O.Media) )
#The label
graph.add( (uri, RDFS.label, Literal(media["o:title"])) )
#Source (link)
if "o:source" in media:
graph.add( (uri, O.source, Literal(media["o:source"])) )
#The related item
if "o:item" in media:
graph.add( (uri, O.item, URIRef(media["o:item"]["@id"])))
except:
logging.exception("An error occured for media with id: " + str(media["@id"]))
logging.exception("Exception message:", exc_info=True)
continue #Go to next media
def createCollectionsTriples(collections, graph):
for set in collections:
try:
#The uri
uri = URIRef(set["@id"])
#The label
graph.add( (uri, RDFS.label, Literal(set["o:title"])) )
except:
logging.exception("An error occured for set with id: " + str(set["@id"]))
logging.exception("Exception message:", exc_info=True)
continue #Go to next collection