-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaddAnnos.py
executable file
·86 lines (66 loc) · 2.89 KB
/
addAnnos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#! /usr/bin/env python3
import json
from urllib import request
import os
import base64
def loadJson(url):
with request.urlopen(url) as data:
return json.loads(data.read())
# just a check to make sure that annoPages only link to one canvas
def checkOnlyOneCanvasInAnnoPage(annoPage):
canvases = []
for anno in annoPage['items']:
canvas = anno['target']['source']['id']
if canvas not in canvases:
canvases.append(canvas)
return len(canvases) < 2
def addManifest(manifests, manifestURL):
manifest = loadJson(manifestURL)
manifests[manifestURL] = manifest
def addAnnosToCanvas(manifest, canvasID, annoPageURL):
canvasNo = 0
for canvas in manifest['sequences'][0]['canvases']:
print ('Adding annotations to canvas no: {}'.format(canvasNo))
if canvas['@id'] == canvasID:
# Add link to manifest pointing to annotation page
canvas['annotations'] = [{
"id": annoPageURL, # add the URL to the annotation page to the manifest
"type": "AnnotationPage"
}]
break
canvasNo += 1
def processAnnoPage(manifests, annoPageURL):
print ('Proccessing {}'.format(annoPageURL))
annoPage = loadJson(annoPageURL)
if checkOnlyOneCanvasInAnnoPage(annoPage):
if 'items' in annoPage and len(annoPage['items']) > 0:
target = annoPage['items'][0]['target']
canvas = target['source']['id']
manifestURL = target['source']['partOf']['id']
if manifestURL not in manifests:
addManifest(manifests, manifestURL)
manifest = manifests[manifestURL]
# Add anno page to canvas
addAnnosToCanvas(manifest, canvas, annoPageURL)
else:
print ('Annotation page is empty')
else:
print ('Anno page {} contains more than one canvas'.format(annoPageURL))
if 'next' in annoPage:
processAnnoPage(manifests, annoPage['next']['id'])
if __name__ == "__main__":
base = "https://glenrobson.github.io/iiif_stuff/zooniverse/Mar_2022"
tAnnoCollection = loadJson('https://zooniverse.github.io/iiif-annotations/annotations/titles.json')
# Place to store downloaded Manifests that will be updated
manifests = {
}
processAnnoPage(manifests, tAnnoCollection['first']['id'])
# Write out the manifest with annotation links to file
for manifestURL in manifests:
splitManifestURL = manifestURL.split("/")
# This will only work with BL manifest URLs which have the id before tha manifest e.g:
# https://api.bl.uk/metadata/iiif/ark:/81055/vdc_100022589176.0x000002/manifest.json
filename = splitManifestURL[len(splitManifestURL) - 2] + ".json"
print ('Writing out {}'.format(filename))
with open(filename, 'w') as outfile:
json.dump(manifests[manifestURL], outfile)