Skip to content

Commit

Permalink
Add script to validate files
Browse files Browse the repository at this point in the history
  • Loading branch information
danny0838 committed May 5, 2024
1 parent 6fc94d3 commit 8c2cb12
Showing 1 changed file with 67 additions and 1 deletion.
68 changes: 67 additions & 1 deletion scripts/trad.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,74 @@ def move_non_tgh_to_extend():
save_trad_table(table)


def validate_trad():
table = load_trad_table()

src = os.path.join(root, 'sts', 'data', 'scheme', 'ts_multi.tsv')
with open(src, encoding='UTF-8') as fh:
for line in fh:
line = line.rstrip('\n')
if not line:
continue
trad, simps, *_ = line.split('\t')
simps = [s for s in simps.split(' ') if s]
if trad not in table:
print(f'WARNING: *{trad}* (ts_multi.tsv) is not defined in traditional.tsv')

src = os.path.join(root, 'sts', 'data', 'scheme', 'st_multi.tsv')
with open(src, encoding='UTF-8') as fh:
for line in fh:
line = line.rstrip('\n')
if not line:
continue
simp, trads, *_ = line.split('\t')
trads = [s for s in trads.split(' ') if s]
for trad in trads:
if trad not in table:
print(f'WARNING: {simp} => *{trad}* (st_multi.tsv) is not defined in traditional.tsv')

src = os.path.join(root, 'sts', 'data', 'dictionary', 'STCharacters.txt')
src_base = os.path.basename(src)
with open(src, encoding='UTF-8') as fh:
for line in fh:
line = line.rstrip('\n')
if not line:
continue
simp, trads, *_ = line.split('\t')
trads = [s for s in trads.split(' ') if s]
simp_found = False
for trad in trads:
try:
entry = table[trad]
except KeyError:
print(f'WARNING: {simp} => *{trad}* ({src_base}) is not defined in traditional.tsv')
else:
if simp in (entry.cn + entry.cnx):
simp_found = True
if not simp_found:
print(f'WARNING: *{simp}* ({src_base}) is not defined in traditional.tsv')

src = os.path.join(root, 'sts', 'data', 'dictionary', 'TSCharacters.txt')
src_base = os.path.basename(src)
with open(src, encoding='UTF-8') as fh:
for line in fh:
line = line.rstrip('\n')
if not line:
continue
trad, simps, *_ = line.split('\t')
simps = [s for s in simps.split(' ') if s]
try:
entry = table[trad]
except KeyError:
print(f'WARNING: *{trad}* ({src_base}) is not defined in traditional.tsv')
else:
for simp in simps:
if simp not in (entry.cn + entry.cnx):
print(f'WARNING: {trad} => *{simp}* ({src_base}) is not defined in traditional.tsv')


def main():
tidy_trad_table()
validate_trad()


if __name__ == '__main__':
Expand Down

0 comments on commit 8c2cb12

Please sign in to comment.