-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtree-diff.py
executable file
·119 lines (101 loc) · 3.22 KB
/
tree-diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
import argparse
import json
import sys
import utils
parser = argparse.ArgumentParser()
parser.add_argument('--show', '-s', action='store_true')
parser.add_argument('book', action='store')
parser.add_argument('verse', action='store')
args = parser.parse_args()
fgen = f'temp/macula-merged/{args.book}.conllu'
fref = f'data/checked/{args.book}.conllu'
gen = utils.load_conllu(fgen)
ref = utils.load_conllu(fref, True)
for k in gen:
if str(gen[k][0]) == args.verse:
sid = k
break
else:
print(f'Could not find {args.book} {args.verse}.')
sys.exit(1)
text = ''
words = [('0', 'ROOT', 'ROOT', 'ROOT', 'ROOT')]
arcs = {}
for line in gen[sid][1].splitlines():
if line.startswith('# text ='):
text = line.split('=')[1].strip()
continue
cols = line.strip().split('\t')
if len(cols) != 10:
continue
if not cols[0].isdigit():
continue
gls = ''
if 'Gloss' in cols[9]:
gls = cols[9].split('Gloss=')[1].split('|')[0]
arcs[cols[0]] = [(cols[6], cols[7])]
words.append((cols[0], cols[1], cols[2], cols[3], gls))
words.reverse()
rows = ''.join(
f'<tr id="words{i}"><td>' + '</td><td>'.join(r) + '</td></tr>'
for i, r in enumerate(zip(*words))
)
rows += f'<tr><td colspan="{len(words)}">{text}</td></tr>'
if sid in ref:
block = ref[sid][1]
elif args.show:
block = gen[sid][1]
else:
print(f'Reference for {args.book} {args.verse} not found.')
sys.exit(1)
for line in block.splitlines():
cols = line.strip().split('\t')
if len(cols) == 10 and cols[0] in arcs:
arcs[cols[0]].append((cols[6], cols[7]))
als = []
for k, v in arcs.items():
dep = len(words) - int(k) - 1
h0 = len(words) - int(v[0][0].replace('_', '0')) - 1
h1 = len(words) - int(v[1][0].replace('_', '0')) - 1
if v[0] == v[1]:
als.append({'head': h0, 'dep': dep, 'label': v[0][1], 'color': 'black'})
else:
als.append({'head': h0, 'dep': dep, 'label': v[0][1], 'color': 'green'})
als.append({'head': h1, 'dep': dep, 'label': v[1][1], 'color': 'red'})
for i in range(len(als)):
als[i]['height'] = 0
def contains(big, little):
bh, bd = big['head'], big['dep']
lh, ld = little['head'], little['dep']
if (bh, bd) == (ld, lh):
return bh < lh
elif (bh, bd) == (lh, ld):
return big['color'] == 'green'
elif (bh <= lh <= bd) and (bh <= ld <= bd):
return True
else:
return (bd <= lh <= bh) and (bd <= ld <= bh)
relevant = {}
for i in range(len(als)):
relevant[i] = [j for j in range(len(als))
if i != j and contains(als[i], als[j])]
todo = list(relevant.keys())
while todo:
nt = []
for t in todo:
h = [als[i]['height'] for i in relevant[t]]
if not h:
als[t]['height'] = 1
elif 0 not in h:
als[t]['height'] = max(h) + 1
else:
nt.append(t)
todo = nt
with open('tree-diff.html') as fin, open('index.html', 'w') as fout:
page = fin.read()
page = page.replace('[[SENT_ID]]', sid)
page = page.replace('[[WORDS]]', rows)
page = page.replace('[[ARCS]]', json.dumps(als))
page = page.replace('[[HEIGHT]]', str(max(h['height'] for h in als)))
fout.write(page)