-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdfhandling.py
388 lines (309 loc) · 12.4 KB
/
rdfhandling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
#!/usr/bin/env python
# coding: utf-8
# # Graph functions
# In[1]:
import re #regular expressions
import requests
import xml
from xml import etree
from xml.etree import ElementTree
from io import StringIO
import unicodedata as ucd
# debugging
import pdb
# In[2]:
#Importer lokale moduler
import os
import sys
repopath=os.path.abspath('../Gitrepos/tkpy')
if repopath not in sys.path:
sys.path.append(repopath)
import konverter_v6
# In[1]:
# import rdflib
import rdflib
from rdflib import Graph, ConjunctiveGraph
from rdflib import URIRef, BNode, Literal, Namespace
from rdflib.namespace import XSD, RDF, RDFS, SKOS, OWL, DC, DCTERMS, FOAF
from pprint import pprint as pp
from rdflib.plugins.serializers import n3, rdfxml, turtle
#import surf (surf no good for Python 3.x)
from IPython.display import display, display_pretty, display_html, HTML
#from graphviz import Digraph
from skosify import skosify
import json
# ### Functions related to creating the graphs from SPARQL query
# In[4]:
def createTriple(gr, s, o, valueDict, propMap):
#adds a new triple to the graph gr
#s is the subject of thhe triple (URI),
#o is the property (string, variable from SPARQL query, e.g. 'prefTitle')
#valueDict is the dict as returned from SPARQL query with return format JSON.
# Ex: for property 'qual' valueDict may be:
# {'datatype': 'http://www.w3.org/2001/XMLSchema#string',
# 'type': 'literal',
# 'value': 'kat3'}
#propMap is a dict mapping the string o to a URI (representing the property)
#Note: Any Literal may have either a lang or a datatype (not both)
prop= propMap[o] if o in propMap.keys() else None
if prop is not None:
if 'datatype' in valueDict.keys():
#literals with datatypes (e.g. dates)
gr.add((s, prop, Literal(valueDict['value'], datatype=valueDict['datatype'])))
else:
if 'xml:lang' in valueDict.keys():
#strings with language tags
gr.add((s, prop, Literal(valueDict['value'], lang=valueDict['xml:lang'])))
else:
if valueDict['type'] == 'uri':
gr.add((s, prop, URIRef(valueDict['value'])))
def countTriples(gr, subj=None, pred=None, obj=None):
c=1
for tr in gr.triples((subj,pred, obj)):
c+=1
return c
def getDistinctSubjects(gr, pred=None, obj=None):
#fetches all different subjects as a list
s=[]
for recId in gr.subjects(predicate=pred, object=obj):
s.append(recId)
return list(set(s))
def getDistinctObjects(gr, subj=None, pred=None):
#fetches all different subjects as a list
o=[]
for recId in gr.objects(subject=subj, predicate=pred):
o.append(recId)
return list(set(o))
def getDistinctPredicates(gr, subj=None, obj=None):
#fetches all different predicates between subj and obj as a list
p=[]
for recId in gr.predicates(subject=subj, object=obj):
p.append(recId)
return list(set(p))
def getWorksByCreator(gr, autURI):
return getDistinctSubjects(gr, pred=rdawo.P10065, obj=autURI)
def creatorGraph(gr, autURI):
#generates a new graph with the works by autURI. Includes all information about the works
autGraph= Graph()
bindWRNamespaces(autGraph)
wURIs=getWorksByCreator(gr, autURI)
for w in wURIs:
for tr in gr.triples((w, None, None)):
autGraph.add(tr)
return autGraph
def getBiblByCreator(gr, autURI):
#return a list containing the IDs of all bibliograpgic records attached to the works of autURI
bibls=[]
wURIs=getWorksByCreator(gr, autURI)
for w in wURIs:
bibls.extend(getDistinctObjects(gr, subj=w, pred=rdau.P60313))
return list(set(bibls))
# # Graph functions
# In[5]:
def contextGraph(cGraph, context):
#Return a Graph object containing the triples with context=context
#cGraph is a ConjunctiveGraph
g=Graph()
for tr in cGraph.triples((None, None, None), context=context):
g.add(tr)
return g
def term(URIstr):
#returns the last part of the URI (after the last '/' or '#'
if '#' in URIstr:
result = URIstr.rpartition('#')
else:
result=URIstr.rpartition('/')
return result[2]
def entityTypes(graph, entity, uri=False):
#entity must be an URIRef
#returns a list of all types of entity
#if URI=True, the types' URIRef is returned, otherwise the type label
tps=graph.objects(entity, RDF.type)
if uri==False:
return list (map (lambda x: term(str(x)), list(set(tps))))
else:
return list (set(tps))
def propertyTypes (graph, entity, uri=False):
#entity must be an URIRef
#returns a list of all property types used for entity
#if URI=True, the types' URIRef is returned, otherwise the type label
prp=graph.predicates(entity, None)
if uri==False:
return list(map(lambda x: term(str(x)), list(set(prp))))
else:
return list(set(prp))
def propertyTypesForEntityType(graph, entityType, uri=False):
#entityType must be an URIRef
#returns a list of all property types used for all instances of entityType
#if URI=True, the types' URIRef is returned, otherwise the type label
propList=[]
for ent in graph.subjects(RDF.type, entityType):
propList.extend(propertyTypes(graph, ent, uri=uri))
if uri==False:
return list(map(lambda x: term(str(x)), list(set(propList))))
else:
return list(set(propList))
def alltypes(graph, uri=False):
#returns a list of all distinct types in the graph
return entityTypes(graph, None, uri=uri)
def allpropertyTypes(graph, uri=False):
#returns a list of all distinct properties used in the graph
return propertyTypes(graph, None, uri=uri)
def relatedInfo(graph, entities):
#Extracts information about related entities to each entity in entities
#returns a list of tuples
tbl=[('Opus', 'Title', 'Related entity', 'Relation')]
for ent in entities:
for r in allrelated(graph, ent):
tbl.append((ent, bestTitleLiteral(graph, ent), r, relation(graph, ent, r)[0]))
return tbl
def contributionInfo(graph, entity):
#Extracts information about contributors of entity (which is typically an Opus, Expression or Instance)
tbl=[('Contribution','Agent','Name', 'AgentType', 'Role')]
contributions=related(graph, entity , URIRef('http://id.loc.gov/ontologies/bibframe/contribution'))
for contr in contributions:
agent=related(graph, contr , URIRef('http://id.loc.gov/ontologies/bibframe/agent'))
if agent!=[]:
name=related(graph, agent[0] , RDFS.label)
tp=related(graph, agent[0], RDF.type)
role=related(graph, contr, URIRef('http://id.loc.gov/ontologies/bibframe/role'))
tbl.append((contr,agent[0],name, tp, role))
return tbl
def languageInfo(graph, entity):
#Returns a list of tuples containing the properties (value, label, code, part)
#of each language entity connected to entity
#entity is most often an expression (svde:Work) or bf:Work
langs=[]
uniqlangs=list(set(list(graph.objects(entity, URIRef('http://id.loc.gov/ontologies/bibframe/language')))))
for ul in uniqlangs:
langinfo=[]
vals=list(graph.objects(ul,URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#value')))
lbls=list(graph.objects(ul,URIRef('http://www.w3.org/2000/01/rdf-schema#label')))
codes=list(graph.objects(ul,URIRef('http://id.loc.gov/ontologies/bibframe/code')))
parts=list(graph.objects(ul,URIRef('http://id.loc.gov/ontologies/bibframe/part')))
if vals != []:
langinfo.append(('value: ', vals))
if lbls!= []:
langinfo.append(('label: ', lbls ))
if codes!= []:
langinfo.append(('code: ', codes ))
if parts!= []:
langinfo.append(('part: ', parts ))
langs.append(langinfo)
return langs
def entities(graph, entityType):
#Returns a list of distinct entities of the given type in graph
entityRefs=[]
for ent in graph.subjects(RDF.type, entityType):
entityRefs.append(ent)
return list(set(entityRefs))
## Get entities related to a given entity by any relation
def allrelated (graph, entity):
#returns the entities in g related to entity
related=[]
for ent in graph.objects(entity,None):
related.append(ent)
return list(set(related))
## Get entities related to a given entity
def related (graph, entity, relation):
#returns the entities in g related to entity by relation
related=[]
for ent in graph.objects(entity,relation):
related.append(ent)
return list(set(related))
def related2entityType (graph, entity, entityType):
#returns the entities of type entityType in g related to entity
rel=[]
for r in allrelated(graph, entity):
if entityType in graph.objects(r,RDF.type):
rel.append(r)
return list(set(rel))
def relatedByRelation(graph, relation):
#returns a list of entity tuples relaed by relation)
tbl=[]
for tr in graph.triples((None, relation, None)):
tbl.append((tr[0], tr[2]))
return tbl
def relation(graph, entity1, entity2):
#returns the relations connecting the 2 entities
rel=[]
for r in graph.predicates(entity1, entity2):
rel.append(r)
return list(set(rel))
def typeOf(graph, entity):
#if the type string is desired, use entityTypes
return list(graph.objects(entity, RDF.type))
# In[6]:
#Fjern begrepene Film og TV-programmer, samt alle subtrær
#Forutsetning: Vokabularet har vært prosessert av konverter.py og dermed skosify
#slik at de hierarkiske relasjonene er inferert (alle hiererakiske relasjoner
#eksisterer både som narrower og broader.)
def removeTree(graph, topEntity):
#removes from graph the subtree starting with topEntity
#Method: 1) Collects all nodes in subtree using SKOS.narrower
# 2) Removes all triples in which the nodes are subject or object
nodes=[topEntity]
nodes.extend(traverseRelated(graph, topEntity, SKOS.narrower))
removeConcepts(graph, nodes)
return graph
def traverseRelated(graph, entity, relation):
#returns a list of all entities obtained when traversing relation from entity
#until dead ends
#Used to collect all noed in a subtree headed by entity
nodes=related (graph, entity, relation) #first level
for node in nodes:
nodes.extend(traverseRelated(graph, node, relation))
return list(set(nodes))
def removeConceptsWithIDs(g, uribase, ids):
#removes all triples involving the concepts represented by the list ids
for ide in ids:
c=URIRef(uribase+str(ide))
g.remove((c,None,None))
g.remove((None,None,c))
def removeConcepts(g, concepts):
#removes all triples involving the concepts represented by the list ids
for c in concepts:
g.remove((c,None,None))
g.remove((None,None,c))
# In[1]:
def idNum(entity):
#returnerer den numeriske ID-en til entity (etter ?tema=)
#Spesifikk for TemaTres
uri=str(entity)
return str.partition(uri, '=')[2]
# idNum(URIRef('http://tematres.nb.no/vocab/?tema=213')) = '213'
def prefLabel(graph, entity, lang):
#assumes only one prefLabel per lang
#returns the prefLabel of entity in the lang language, or None
pref=None
for pr in graph.objects(entity, SKOS.prefLabel):
if pr.language == lang:
pref=pr
return pref
def altLabel(graph, entity, lang):
#returns the last altLabel of entity in the lang language,
#or None if no altLabel in lang is there
alt=None
for al in graph.objects(entity, SKOS.altLabel):
if al.language == lang:
alt=al
return alt
def label(graph, entity, lang):
#returns the last RDFS.label of entity in the lang language,
#or None if no label in lang is there
labl=None
for lbl in graph.objects(entity, RDFS.label):
if lbl.language == lang:
labl=lbl
return labl
def exactMatches(graph, lang):
#return a list of concepts which are exact matches to some other concept
#Here: members of some target vocabulary in language lang
matchedPairs=relatedByRelation(graph, SKOS.exactMatch) #a list of tuples
#Only return those from English vocab (LCGFT subset)
res=[]
for m in list(map(lambda x: x[1], matchedPairs)):
if prefLabel(graph, m, lang) is not None:
res.append(m)
return res
# In[ ]: