-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3_alignment.py
39 lines (33 loc) · 1.07 KB
/
3_alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from sinopy import segments
from lingpy import *
from lingrex.align import template_alignment
from sys import argv
if 'all' in argv:
fname='A_Chen_'
else:
fname='D_Chen_'
alms = Alignments(fname+'partial.tsv', ref='cogids')
alms.add_entries(
'structure',
'tokens',
lambda x: basictypes.lists(
' + '.join([' '.join(y) for y in segments.get_structure(
x)]))
)
print('[i] added segments')
D = {0: [c for c in alms.columns]}
for idx, tokens, structure in alms.iter_rows('tokens', 'structure'):
if len(tokens.n) != len(structure.n):
print('[!!!]', tokens, structure)
elif len(tokens) != len(structure):
print('[!]', tokens, structure)
else:
D[idx] = alms[idx]
alms = Alignments(D, ref='cogids')
template_alignment(alms,
ref='cogids',
template='imnct+imnct+imnct+imnct+imnct+imnct',
structure = 'structure',
fuzzy=True,
segments='tokens')
alms.output('tsv', filename=fname+'aligned', prettify=False)