Skip to content

Commit

Permalink
formatting (EleutherAI#2104)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintangsutawika authored Jul 15, 2024
1 parent 9884ad6 commit 56a4e79
Show file tree
Hide file tree
Showing 58 changed files with 234 additions and 209 deletions.
2 changes: 1 addition & 1 deletion lm_eval/api/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,4 +565,4 @@ def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True):

assert len(metrics) == len(sizes)

return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
2 changes: 1 addition & 1 deletion lm_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1665,4 +1665,4 @@ def count_bytes(cls, doc) -> int:
@classmethod
def count_words(cls, doc) -> int:
"""Downstream tasks with custom word boundaries should override this!"""
return len(re.split(r"\s+", doc))
return len(re.split(r"\s+", doc))
2 changes: 1 addition & 1 deletion lm_eval/filters/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,4 @@ def filter_ignores(st):
filtered.append(match)
filtered_resps.append(filtered)

return filtered_resps
return filtered_resps
8 changes: 4 additions & 4 deletions lm_eval/tasks/afrimgsm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368

IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).


### Citation

```
@misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024},
eprint={2406.03368},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368},
url={https://arxiv.org/abs/2406.03368},
}
```

Expand Down
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimgsm/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ lm_eval --model hf \
--device cuda:0 \
--batch_size 1 \
--verbosity DEBUG \
--limit 5
--limit 5
106 changes: 64 additions & 42 deletions lm_eval/tasks/afrimgsm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,51 +2,74 @@

import yaml

languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor',
'hau', 'sot', 'swa']

languages_REGEX = {"eng": "The answer is (\\-?[0-9\\.\\,]+)",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
'fra': "La réponse est(\\-?[0-9\\.\\,]+)",
'sna': "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
'lin': "Eyano ezali (\\-?[0-9\\.\\,]+)",
'wol': "Tontu li (\\-?[0-9\\.\\,]+)",
'ewe': "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
'lug': "Ansa eri (\\-?[0-9\\.\\,]+)",
'xho': "Impendulo ngu (\\-?[0-9\\.\\,]+)",
'kin': "Igisubizo ni (\\-?[0-9\\.\\,]+)",
'twi': "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
'zul': "Impendulo ithi (\\-?[0-9\\.\\,]+)",
'orm': "Deebiin isaa (\\-?[0-9\\.\\,]+)",
'yor': "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
'hau': "Amsar ita ce (\\-?[0-9\\.\\,]+)",
'sot': "Karabo ke (\\-?[0-9\\.\\,]+)",
'swa': "Jibu ni (\\-?[0-9\\.\\,]+)",
}

languages = [
"eng",
"amh",
"ibo",
"fra",
"sna",
"lin",
"wol",
"ewe",
"lug",
"xho",
"kin",
"twi",
"zul",
"orm",
"yor",
"hau",
"sot",
"swa",
]

languages_REGEX = {
"eng": "The answer is (\\-?[0-9\\.\\,]+)",
"amh": "መልሱ (\\-?[0-9\\.\\,]+)",
"ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
"fra": "La réponse est(\\-?[0-9\\.\\,]+)",
"sna": "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
"lin": "Eyano ezali (\\-?[0-9\\.\\,]+)",
"wol": "Tontu li (\\-?[0-9\\.\\,]+)",
"ewe": "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
"lug": "Ansa eri (\\-?[0-9\\.\\,]+)",
"xho": "Impendulo ngu (\\-?[0-9\\.\\,]+)",
"kin": "Igisubizo ni (\\-?[0-9\\.\\,]+)",
"twi": "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
"zul": "Impendulo ithi (\\-?[0-9\\.\\,]+)",
"orm": "Deebiin isaa (\\-?[0-9\\.\\,]+)",
"yor": "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
"hau": "Amsar ita ce (\\-?[0-9\\.\\,]+)",
"sot": "Karabo ke (\\-?[0-9\\.\\,]+)",
"swa": "Jibu ni (\\-?[0-9\\.\\,]+)",
}

LANGUAGES = {}

for lang in languages:
if lang == 'amh':
if lang == "amh":
LANGUAGES[lang] = { # English
"QUESTION": "ጥያቄ:",
"ANSWER": "በቅደም ተከተል መልስ:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
elif lang == 'yor':
"REGEX": languages_REGEX[lang],
}
elif lang == "yor":
LANGUAGES[lang] = { # English
"QUESTION": "Ìbéèrè:",
"ANSWER": "Ìdáhùn lẹ́sẹsẹ:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
"REGEX": languages_REGEX[lang],
}

else:
LANGUAGES[lang] = { # English
"QUESTION": "Question:",
"ANSWER": "Step-by-Step Answer:",
"DIRECT": "Answer:",
"REGEX": languages_REGEX[lang]}
"REGEX": languages_REGEX[lang],
}


def add_regex_pattern(regex_pattern):
Expand Down Expand Up @@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
err = []
for lang in LANGUAGES.keys():
try:

yaml_template = "cot_yaml"
filter_list = {}
DELIMITER = None
if mode == "direct":
ANSWER = LANGUAGES['eng']["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"]
ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None
task_name = f"afrimgsm_direct_{lang}"
yaml_template = "direct_yaml"
Expand All @@ -122,16 +144,16 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
QUESTION = LANGUAGES["eng"]["QUESTION"]
task_name = f"afrimgsm_en_cot_{lang}"
elif mode == "translate-direct":
ANSWER = LANGUAGES['eng']["DIRECT"]
QUESTION = LANGUAGES['eng']["QUESTION"]
ANSWER = LANGUAGES["eng"]["DIRECT"]
QUESTION = LANGUAGES["eng"]["QUESTION"]
REGEX = None
task_name = f"afrimgsm_translate_direct_{lang}"
yaml_template = "translate_direct_yaml"

file_name = f"{task_name}.yaml"
ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
with open(
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
) as f:
f.write("# Generated by utils.py\n")
yaml.dump(
Expand All @@ -140,15 +162,15 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
"dataset_name": lang,
"task": f"{task_name}",
"doc_to_text": f"""{{% if answer is not none %}}"""
f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""",
f"""{{{{question+"\\n{ANSWER}"}}}}"""
f"""{{% else %}}"""
f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
f"""{{% endif %}}""",
"doc_to_target": f"""{{% if answer is not none %}}"""
f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""",
f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
f"""{{% else %}}"""
f"""{{{{answer_number|string}}}}"""
f"""{{% endif %}}""",
**filter_list,
"generation_kwargs": {
"until": [QUESTION, "</s>", "<|im_end|>"],
Expand Down Expand Up @@ -194,4 +216,4 @@ def main() -> None:


if __name__ == "__main__":
main()
main()
8 changes: 4 additions & 4 deletions lm_eval/tasks/afrimmlu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
https://arxiv.org/pdf/2406.03368

IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
low-resource African languages covering three tasks: natural language inference (AfriXNLI),
mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).


### Citation

```
@misc{adelani2024irokobenchnewbenchmarkafrican,
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
year={2024},
eprint={2406.03368},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2406.03368},
url={https://arxiv.org/abs/2406.03368},
}
```

Expand Down
12 changes: 6 additions & 6 deletions lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,18 @@ output_type: multiple_choice
validation_split: validation
test_split: test
fewshot_split: validation
doc_to_text: !function utils.doc_to_text
doc_to_text: !function utils.doc_to_text
doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
doc_to_choice: !function utils.doc_to_choice
should_decontaminate: true
doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
metric_list:
- metric: f1
aggregation: !function utils.weighted_f1_score
- metric: f1
aggregation: !function utils.weighted_f1_score
# aggregation: mean
average: weighted
hf_evaluate: true
higher_is_better: True
average: weighted
hf_evaluate: true
higher_is_better: True
ignore_case: true
ignore_punctuation: true
regexes_to_ignore:
Expand Down
1 change: 0 additions & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
dataset_name: eng
include: afrimmlu_common_yaml
task: afrimmlu_direct_eng

1 change: 0 additions & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
dataset_name: ewe
include: afrimmlu_common_yaml
task: afrimmlu_direct_ewe

2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: fra
include: afrimmlu_common_yaml
task: afrimmlu_direct_fra
task: afrimmlu_direct_fra
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: hau
include: afrimmlu_common_yaml
task: afrimmlu_direct_hau
task: afrimmlu_direct_hau
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: ibo
include: afrimmlu_common_yaml
task: afrimmlu_direct_ibo
task: afrimmlu_direct_ibo
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: kin
include: afrimmlu_common_yaml
task: afrimmlu_direct_kin
task: afrimmlu_direct_kin
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: lin
include: afrimmlu_common_yaml
task: afrimmlu_direct_lin
task: afrimmlu_direct_lin
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: lug
include: afrimmlu_common_yaml
task: afrimmlu_direct_lug
task: afrimmlu_direct_lug
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: orm
include: afrimmlu_common_yaml
task: afrimmlu_direct_orm
task: afrimmlu_direct_orm
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: sna
include: afrimmlu_common_yaml
task: afrimmlu_direct_sna
task: afrimmlu_direct_sna
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: sot
include: afrimmlu_common_yaml
task: afrimmlu_direct_sot
task: afrimmlu_direct_sot
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: swa
include: afrimmlu_common_yaml
task: afrimmlu_direct_swa
task: afrimmlu_direct_swa
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: twi
include: afrimmlu_common_yaml
task: afrimmlu_direct_twi
task: afrimmlu_direct_twi
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: wol
include: afrimmlu_common_yaml
task: afrimmlu_direct_wol
task: afrimmlu_direct_wol
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: xho
include: afrimmlu_common_yaml
task: afrimmlu_direct_xho
task: afrimmlu_direct_xho
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: yor
include: afrimmlu_common_yaml
task: afrimmlu_direct_yor
task: afrimmlu_direct_yor
2 changes: 1 addition & 1 deletion lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dataset_name: zul
include: afrimmlu_common_yaml
task: afrimmlu_direct_zul
task: afrimmlu_direct_zul
Loading

0 comments on commit 56a4e79

Please sign in to comment.