-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
|
||
set -o xtrace | ||
for with_case_ending in yes no | ||
do | ||
echo "CATT Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_ed_mlm_ns_epoch_178.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_eo_mlm_ns_epoch_193.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_CBHG_output_200K.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Command_R_Plus_fixed.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_GPT4_output_fixed.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Sakhr_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Farasa_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_deep_diacritization_d2_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_deep_diacritization_d3_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_TashkeelAlkhalil_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Mishkal_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Multilevel_diacritizer.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_CATT_data/CATT_data_gt.txt benchmarking/all_models_CATT_data/CATT_data_Shakkala_output.txt $with_case_ending | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
|
||
set -o xtrace | ||
|
||
for with_case_ending in yes no | ||
do | ||
echo "WikiNews Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_ed_mlm_ns_epoch_178.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_eo_mlm_ns_epoch_193.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_CBHG_200K_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Command_R_Plus_v2.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_GPT4_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Sakhr_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Farasa_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_no_tashkeel_deep_diacritization_d2_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_no_tashkeel_deep_diacritization_d3_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_TashkeelAlkhalil_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Mishkal_output.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Multilevel_diacritizer.txt $with_case_ending | ||
python compute_der.py benchmarking/all_models_WikiNews_data/WikiNews_data_gt.txt benchmarking/all_models_WikiNews_data/WikiNews_data_Shakkala_output.txt $with_case_ending | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
|
||
set -o xtrace | ||
for with_case_ending in yes no | ||
do | ||
echo "CATT Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_eo_epoch_192.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_eo_mlm_epoch_192.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_eo_mlm_ns_epoch_193.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_ed_epoch_175.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_ed_mlm_epoch_175.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_ed_mlm_ns_epoch_178.txt $with_case_ending | ||
|
||
done | ||
|
||
for with_case_ending in yes no | ||
do | ||
echo "WikiNews Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_eo_epoch_192.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_eo_mlm_epoch_192.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_eo_mlm_ns_epoch_193.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_ed_epoch_175.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_ed_mlm_epoch_175.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_ed_mlm_ns_epoch_178.txt $with_case_ending | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
|
||
set -o xtrace | ||
for with_case_ending in yes no | ||
do | ||
echo "CATT Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_eo_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_eo_mlm_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_ed_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_gt.txt benchmarking/eo_ed_mlm_ns/catt_data/CATT_data_ed_mlm_epoch_5.txt $with_case_ending | ||
|
||
done | ||
|
||
for with_case_ending in yes no | ||
do | ||
echo "WikiNews Benchmark" | ||
echo "##############################################################################" | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_eo_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_eo_mlm_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_ed_epoch_5.txt $with_case_ending | ||
python compute_der.py benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_gt.txt benchmarking/eo_ed_mlm_ns/wikinews_data/WikiNews_data_ed_mlm_epoch_5.txt $with_case_ending | ||
done |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
from tashkeel_tokenizer import TashkeelTokenizer | ||
from tqdm import tqdm | ||
import sys | ||
import xer | ||
|
||
tokenizer = TashkeelTokenizer() | ||
|
||
if len(sys.argv) < 4: | ||
print('USAGE: {} ref_file hyp_file case_ending (yes|no)'.format(sys.argv[0])) | ||
sys.exit(1) | ||
|
||
ref_file = sys.argv[1] | ||
hyp_file = sys.argv[2] | ||
|
||
case_ending = (sys.argv[3].lower() == 'yes') | ||
|
||
ref_lines = open(ref_file).readlines() | ||
hyp_lines = open(hyp_file).readlines() | ||
|
||
total_der_distance = 0 | ||
total_der_ref_length = 0 | ||
total_wer_distance = 0 | ||
total_wer_ref_length = 0 | ||
|
||
mismatch_samples_count = 0 | ||
|
||
assert len(ref_lines) == len(hyp_lines), f"len(ref_lines), len(hyp_lines): {len(ref_lines)}, {len(hyp_lines)}" | ||
|
||
for i in range(len(ref_lines)): | ||
ref = ref_lines[i].strip() | ||
hyp = hyp_lines[i].strip() | ||
ref = tokenizer.clean_text(ref) | ||
hyp = tokenizer.clean_text(hyp) | ||
ref_text = tokenizer.remove_tashkeel(ref) | ||
hyp_text = tokenizer.remove_tashkeel(hyp) | ||
ref_text = ref_text.replace('آ', 'ا').replace('إ', 'ا').replace('أ', 'ا').strip() | ||
hyp_text = hyp_text.replace('آ', 'ا').replace('إ', 'ا').replace('أ', 'ا').strip() | ||
if ref_text != '': | ||
wer_err = xer.wer(ref_text, hyp_text)['Error Rate'] | ||
# IMPORTANT NOTE: | ||
# if there is a little difference between the ref text and the hyp text after diacritization, just ignore the difference | ||
# Ususally, some models alter the original text or introduce new chars that prevent exact text matching, i.e., ref_text != hyp_text | ||
# if the difference is large, then count the example as mismatch BUT calculate the DER. | ||
# This works as a warning for the user to check the output text of the model | ||
if wer_err > 1: # if WER > 5% | ||
mismatch_samples_count += 1 | ||
|
||
der_res = tokenizer.compute_der(ref, hyp, case_ending=case_ending) | ||
wer_res = tokenizer.compute_wer(ref, hyp, case_ending=case_ending) | ||
total_der_distance += der_res['distance'] | ||
total_der_ref_length += der_res['ref_length'] | ||
total_wer_distance += wer_res['distance'] | ||
total_wer_ref_length += wer_res['ref_length'] | ||
|
||
|
||
print('Total DER: %{:0.3f}'.format((total_der_distance / total_der_ref_length) * 100 )) | ||
print('Total WER: %{:0.3f}'.format((total_wer_distance / total_wer_ref_length) * 100 )) | ||
print('Total mismatch samples:', mismatch_samples_count) | ||
print('-'*89) |