-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate-schema.py
94 lines (88 loc) · 3.04 KB
/
generate-schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import csv
import yaml
import io
import requests
# https://docs.google.com/spreadsheets/d/1PZ11esT6COK7Y0mMZEHifY9hQeREbNSlRbC6dkYtjNg/edit?usp=sharing
doc = "1PZ11esT6COK7Y0mMZEHifY9hQeREbNSlRbC6dkYtjNg"
terms_tab = "Terms"
types_tab = "Types"
licences_tab = "Licences"
terms_csv = 'https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&sheet=%s' % (doc, terms_tab)
types_csv = 'https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&sheet=%s' % (doc, types_tab)
licences_csv = 'https://docs.google.com/spreadsheets/d/%s/gviz/tq?tqx=out:csv&sheet=%s' % (doc, licences_tab)
output_yml = "./src/reeco/schema/schema.yml"
licences_yml = "./src/reeco/schema/licences.yml"
component_yml = "./src/reeco/schema/component.yml"
container_yml = "./src/reeco/schema/container.yml"
split_by = {
'enum':',',
'xsd-datatype':',',
'domain':'|',
'example-values': '[OR]'
}
def dictify(input_csv):
dictionary = {}
r = requests.get(input_csv)
buff = io.StringIO(r.text)
cr = csv.DictReader(buff)
position = 0
for row in cr:
position = position + 1
ID = ''
if 'Scope' in row.keys():
ID = row['Scope'] + '/'
if 'Super term' in row.keys() and row['Super term'] != '':
ID = ID + row['Super term'] + '/'
if 'Supertype Id' in row.keys() and row['Supertype Id'] != '':
ID = ID + row['Supertype Id'] + '/'
if 'Term' in row.keys(): # Terms
ID = ID + row['Term']
elif 'Type' in row.keys(): # Types
ID = ID + row['Type']
elif 'Code' in row.keys(): # Licences
ID = ID + row['Code']
else:
raise ValueError('Bad row: ' + row)
dictionary[ID] = {}
dictionary[ID]['_position'] = position
for key in row:
k = key.lower().replace(' ','-')
v = row[key]
if k in split_by.keys():
v = v.split(split_by[k])
if row[key]:
dictionary[ID][k] = v
return dictionary
types = dictify(types_csv)
terms = dictify(terms_csv)
licences = dictify(licences_csv)
schema = {}
schema['types'] = types
schema['terms'] = terms
schema['licences'] = licences
yaml.dump(schema,open(output_yml,'w'))
component = []
container = []
for kt in terms.keys():
# t = { terms[kt]['term']: None }
# for ktt in terms[kt].keys():
# t[ktt] = terms[kt][ktt]
term = terms[kt]
subterms = []
# Looking for subterms
for t in terms.keys():
subterm = terms[t]
if subterm['scope'] == term['scope'] and 'super-term' in subterm.keys() and subterm['super-term'] == term['term']:
subterms.append(subterm)
if len(subterms) > 0:
term['terms'] = subterms
if terms[kt]['scope'] == 'Component':
component.append(term)
if terms[kt]['scope'] == 'Container':
container.append(term)
yaml.dump(component,open(component_yml,'w'))
yaml.dump(container,open(container_yml,'w'))
licences2 = []
for l in licences.keys():
licences2.append(licences[l])
yaml.dump(licences2,open(licences_yml,'w'))