From 56a4e7943fca2959aff06bb96a3fe5ec63255aaa Mon Sep 17 00:00:00 2001
From: Lintang Sutawika <lintang@eleuther.ai>
Date: Mon, 15 Jul 2024 21:24:49 +0700
Subject: [PATCH] formatting (#2104)

---
 lm_eval/api/metrics.py                        |   2 +-
 lm_eval/api/task.py                           |   2 +-
 lm_eval/filters/extraction.py                 |   2 +-
 lm_eval/tasks/afrimgsm/README.md              |   8 +-
 lm_eval/tasks/afrimgsm/run.sh                 |   2 +-
 lm_eval/tasks/afrimgsm/utils.py               | 106 +++++++++++-------
 lm_eval/tasks/afrimmlu/README.md              |   8 +-
 .../afrimmlu/direct/afrimmlu_common_yaml      |  12 +-
 .../afrimmlu/direct/afrimmlu_direct_eng.yaml  |   1 -
 .../afrimmlu/direct/afrimmlu_direct_ewe.yaml  |   1 -
 .../afrimmlu/direct/afrimmlu_direct_fra.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_hau.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_ibo.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_kin.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_lin.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_lug.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_orm.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_sna.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_sot.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_swa.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_twi.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_wol.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_xho.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_yor.yaml  |   2 +-
 .../afrimmlu/direct/afrimmlu_direct_zul.yaml  |   2 +-
 lm_eval/tasks/afrimmlu/direct/utils.py        |  29 +++--
 lm_eval/tasks/afrimmlu/fewshot.sh             |   2 +-
 .../translate/afrimmlu_common_translate_yaml  |  12 +-
 .../translate/afrimmlu_translate_eng.yaml     |   1 -
 .../translate/afrimmlu_translate_ewe.yaml     |   1 -
 .../translate/afrimmlu_translate_fra.yaml     |   2 +-
 .../translate/afrimmlu_translate_hau.yaml     |   2 +-
 .../translate/afrimmlu_translate_ibo.yaml     |   2 +-
 .../translate/afrimmlu_translate_kin.yaml     |   2 +-
 .../translate/afrimmlu_translate_lin.yaml     |   2 +-
 .../translate/afrimmlu_translate_lug.yaml     |   2 +-
 .../translate/afrimmlu_translate_orm.yaml     |   2 +-
 .../translate/afrimmlu_translate_sna.yaml     |   2 +-
 .../translate/afrimmlu_translate_sot.yaml     |   2 +-
 .../translate/afrimmlu_translate_swa.yaml     |   2 +-
 .../translate/afrimmlu_translate_twi.yaml     |   2 +-
 .../translate/afrimmlu_translate_wol.yaml     |   2 +-
 .../translate/afrimmlu_translate_xho.yaml     |   2 +-
 .../translate/afrimmlu_translate_yor.yaml     |   2 +-
 .../translate/afrimmlu_translate_zul.yaml     |   2 +-
 lm_eval/tasks/afrimmlu/translate/utils.py     |  24 ++--
 lm_eval/tasks/afrimmlu/utils.py               |  24 ++--
 lm_eval/tasks/afrixnli/README.md              |  10 +-
 .../afrixnli/anli prompt/en-direct/utils.py   |   6 +-
 .../translate/afrixnli_translate_amh.yaml     |   1 -
 .../afrixnli/anli prompt/translate/utils.py   |   6 +-
 .../tasks/afrixnli/lai prompt/direct/utils.py |  15 +--
 .../afrixnli/lai prompt/translate/utils.py    |  15 +--
 lm_eval/tasks/afrixnli/utils.py               |  85 +++++++++-----
 .../med_concepts_qa/_med_concepts_qa_atc.yaml |   2 +-
 .../_med_concepts_qa_icd10proc.yaml           |   2 +-
 .../_med_concepts_qa_icd9cm.yaml              |   2 +-
 .../_med_concepts_qa_icd9proc.yaml            |   2 +-
 58 files changed, 234 insertions(+), 209 deletions(-)

diff --git a/lm_eval/api/metrics.py b/lm_eval/api/metrics.py
index 5ea6b221fd..1daf3847de 100644
--- a/lm_eval/api/metrics.py
+++ b/lm_eval/api/metrics.py
@@ -565,4 +565,4 @@ def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True):
 
     assert len(metrics) == len(sizes)
 
-    return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
\ No newline at end of file
+    return sum([metric * size for metric, size in zip(metrics, sizes)]) / sum(sizes)
diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py
index ec9caccfba..030e6857f8 100644
--- a/lm_eval/api/task.py
+++ b/lm_eval/api/task.py
@@ -1665,4 +1665,4 @@ def count_bytes(cls, doc) -> int:
     @classmethod
     def count_words(cls, doc) -> int:
         """Downstream tasks with custom word boundaries should override this!"""
-        return len(re.split(r"\s+", doc))
\ No newline at end of file
+        return len(re.split(r"\s+", doc))
diff --git a/lm_eval/filters/extraction.py b/lm_eval/filters/extraction.py
index 4f087451d9..41dc6208ce 100644
--- a/lm_eval/filters/extraction.py
+++ b/lm_eval/filters/extraction.py
@@ -181,4 +181,4 @@ def filter_ignores(st):
                 filtered.append(match)
             filtered_resps.append(filtered)
 
-        return filtered_resps
\ No newline at end of file
+        return filtered_resps
diff --git a/lm_eval/tasks/afrimgsm/README.md b/lm_eval/tasks/afrimgsm/README.md
index 8f9d4619fb..cca14d968d 100644
--- a/lm_eval/tasks/afrimgsm/README.md
+++ b/lm_eval/tasks/afrimgsm/README.md
@@ -5,8 +5,8 @@
 IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
 https://arxiv.org/pdf/2406.03368
 
-IrokoBench is a human-translated benchmark dataset for 16 typologically diverse 
-low-resource African languages covering three tasks: natural language inference (AfriXNLI), 
+IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
+low-resource African languages covering three tasks: natural language inference (AfriXNLI),
 mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
 
 
@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
 
 ```
 @misc{adelani2024irokobenchnewbenchmarkafrican,
-      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, 
+      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
       author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
       year={2024},
       eprint={2406.03368},
       archivePrefix={arXiv},
       primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2406.03368}, 
+      url={https://arxiv.org/abs/2406.03368},
 }
 ```
 
diff --git a/lm_eval/tasks/afrimgsm/run.sh b/lm_eval/tasks/afrimgsm/run.sh
index 370ffd9a8a..075500be33 100644
--- a/lm_eval/tasks/afrimgsm/run.sh
+++ b/lm_eval/tasks/afrimgsm/run.sh
@@ -3,4 +3,4 @@ lm_eval --model hf   \
         --device cuda:0     \
         --batch_size 1  \
         --verbosity DEBUG \
-        --limit 5
\ No newline at end of file
+        --limit 5
diff --git a/lm_eval/tasks/afrimgsm/utils.py b/lm_eval/tasks/afrimgsm/utils.py
index 8899e8868f..0dd336f8b3 100644
--- a/lm_eval/tasks/afrimgsm/utils.py
+++ b/lm_eval/tasks/afrimgsm/utils.py
@@ -2,51 +2,74 @@
 
 import yaml
 
-languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'lin', 'wol', 'ewe', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm', 'yor',
-             'hau', 'sot', 'swa']
-
-languages_REGEX = {"eng": "The answer is (\\-?[0-9\\.\\,]+)",
-                   "amh": "መልሱ (\\-?[0-9\\.\\,]+)",
-                   "ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
-                   'fra': "La réponse est(\\-?[0-9\\.\\,]+)",
-                   'sna': "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
-                   'lin': "Eyano ezali (\\-?[0-9\\.\\,]+)",
-                   'wol': "Tontu li (\\-?[0-9\\.\\,]+)",
-                   'ewe': "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
-                   'lug': "Ansa eri (\\-?[0-9\\.\\,]+)",
-                   'xho': "Impendulo ngu (\\-?[0-9\\.\\,]+)",
-                   'kin': "Igisubizo ni (\\-?[0-9\\.\\,]+)",
-                   'twi': "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
-                   'zul': "Impendulo ithi (\\-?[0-9\\.\\,]+)",
-                   'orm': "Deebiin isaa (\\-?[0-9\\.\\,]+)",
-                   'yor': "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
-                   'hau': "Amsar ita ce (\\-?[0-9\\.\\,]+)",
-                   'sot': "Karabo ke (\\-?[0-9\\.\\,]+)",
-                   'swa': "Jibu ni (\\-?[0-9\\.\\,]+)",
-                   }
+
+languages = [
+    "eng",
+    "amh",
+    "ibo",
+    "fra",
+    "sna",
+    "lin",
+    "wol",
+    "ewe",
+    "lug",
+    "xho",
+    "kin",
+    "twi",
+    "zul",
+    "orm",
+    "yor",
+    "hau",
+    "sot",
+    "swa",
+]
+
+languages_REGEX = {
+    "eng": "The answer is (\\-?[0-9\\.\\,]+)",
+    "amh": "መልሱ (\\-?[0-9\\.\\,]+)",
+    "ibo": "Azịza ya bụ (\\-?[0-9\\.\\,]+)",
+    "fra": "La réponse est(\\-?[0-9\\.\\,]+)",
+    "sna": "Mhinduro kumubvunzo ndi (\\-?[0-9\\.\\,]+)",
+    "lin": "Eyano ezali (\\-?[0-9\\.\\,]+)",
+    "wol": "Tontu li (\\-?[0-9\\.\\,]+)",
+    "ewe": "ŋuɖoɖoae nye (\\-?[0-9\\.\\,]+)",
+    "lug": "Ansa eri (\\-?[0-9\\.\\,]+)",
+    "xho": "Impendulo ngu (\\-?[0-9\\.\\,]+)",
+    "kin": "Igisubizo ni (\\-?[0-9\\.\\,]+)",
+    "twi": "Ne nnyiano yɛ (\\-?[0-9\\.\\,]+)",
+    "zul": "Impendulo ithi (\\-?[0-9\\.\\,]+)",
+    "orm": "Deebiin isaa (\\-?[0-9\\.\\,]+)",
+    "yor": "Ìdáhùn náà ni (\\-?[0-9\\.\\,]+)",
+    "hau": "Amsar ita ce (\\-?[0-9\\.\\,]+)",
+    "sot": "Karabo ke (\\-?[0-9\\.\\,]+)",
+    "swa": "Jibu ni (\\-?[0-9\\.\\,]+)",
+}
 
 LANGUAGES = {}
 
 for lang in languages:
-    if lang == 'amh':
+    if lang == "amh":
         LANGUAGES[lang] = {  # English
             "QUESTION": "ጥያቄ:",
             "ANSWER": "በቅደም ተከተል መልስ:",
             "DIRECT": "Answer:",
-            "REGEX": languages_REGEX[lang]}
-    elif lang == 'yor':
+            "REGEX": languages_REGEX[lang],
+        }
+    elif lang == "yor":
         LANGUAGES[lang] = {  # English
             "QUESTION": "Ìbéèrè:",
             "ANSWER": "Ìdáhùn lẹ́sẹsẹ:",
             "DIRECT": "Answer:",
-            "REGEX": languages_REGEX[lang]}
+            "REGEX": languages_REGEX[lang],
+        }
 
     else:
         LANGUAGES[lang] = {  # English
             "QUESTION": "Question:",
             "ANSWER": "Step-by-Step Answer:",
             "DIRECT": "Answer:",
-            "REGEX": languages_REGEX[lang]}
+            "REGEX": languages_REGEX[lang],
+        }
 
 
 def add_regex_pattern(regex_pattern):
@@ -93,13 +116,12 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
     err = []
     for lang in LANGUAGES.keys():
         try:
-
             yaml_template = "cot_yaml"
             filter_list = {}
             DELIMITER = None
             if mode == "direct":
-                ANSWER = LANGUAGES['eng']["DIRECT"]
-                QUESTION = LANGUAGES['eng']["QUESTION"]
+                ANSWER = LANGUAGES["eng"]["DIRECT"]
+                QUESTION = LANGUAGES["eng"]["QUESTION"]
                 REGEX = None
                 task_name = f"afrimgsm_direct_{lang}"
                 yaml_template = "direct_yaml"
@@ -122,8 +144,8 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                 QUESTION = LANGUAGES["eng"]["QUESTION"]
                 task_name = f"afrimgsm_en_cot_{lang}"
             elif mode == "translate-direct":
-                ANSWER = LANGUAGES['eng']["DIRECT"]
-                QUESTION = LANGUAGES['eng']["QUESTION"]
+                ANSWER = LANGUAGES["eng"]["DIRECT"]
+                QUESTION = LANGUAGES["eng"]["QUESTION"]
                 REGEX = None
                 task_name = f"afrimgsm_translate_direct_{lang}"
                 yaml_template = "translate_direct_yaml"
@@ -131,7 +153,7 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
             file_name = f"{task_name}.yaml"
             ANSWER_TO_SKIP = len(LANGUAGES[lang]["ANSWER"]) + 1
             with open(
-                    f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
             ) as f:
                 f.write("# Generated by utils.py\n")
                 yaml.dump(
@@ -140,15 +162,15 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                         "dataset_name": lang,
                         "task": f"{task_name}",
                         "doc_to_text": f"""{{% if answer is not none %}}"""
-                                       f"""{{{{question+"\\n{ANSWER}"}}}}"""
-                                       f"""{{% else %}}"""
-                                       f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
-                                       f"""{{% endif %}}""",
+                        f"""{{{{question+"\\n{ANSWER}"}}}}"""
+                        f"""{{% else %}}"""
+                        f"""{{{{"{QUESTION} "+question+"\\n{ANSWER}"}}}}"""
+                        f"""{{% endif %}}""",
                         "doc_to_target": f"""{{% if answer is not none %}}"""
-                                         f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
-                                         f"""{{% else %}}"""
-                                         f"""{{{{answer_number|string}}}}"""
-                                         f"""{{% endif %}}""",
+                        f"""{{{{answer[{ANSWER_TO_SKIP}:]}}}}"""
+                        f"""{{% else %}}"""
+                        f"""{{{{answer_number|string}}}}"""
+                        f"""{{% endif %}}""",
                         **filter_list,
                         "generation_kwargs": {
                             "until": [QUESTION, "</s>", "<|im_end|>"],
@@ -194,4 +216,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/lm_eval/tasks/afrimmlu/README.md b/lm_eval/tasks/afrimmlu/README.md
index b512937811..f7f7ed4d82 100644
--- a/lm_eval/tasks/afrimmlu/README.md
+++ b/lm_eval/tasks/afrimmlu/README.md
@@ -5,8 +5,8 @@
 IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
 https://arxiv.org/pdf/2406.03368
 
-IrokoBench is a human-translated benchmark dataset for 16 typologically diverse 
-low-resource African languages covering three tasks: natural language inference (AfriXNLI), 
+IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
+low-resource African languages covering three tasks: natural language inference (AfriXNLI),
 mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
 
 
@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
 
 ```
 @misc{adelani2024irokobenchnewbenchmarkafrican,
-      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, 
+      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
       author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
       year={2024},
       eprint={2406.03368},
       archivePrefix={arXiv},
       primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2406.03368}, 
+      url={https://arxiv.org/abs/2406.03368},
 }
 ```
 
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
index 8c42b8f58a..47d16d95dd 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_common_yaml
@@ -9,18 +9,18 @@ output_type: multiple_choice
 validation_split: validation
 test_split: test
 fewshot_split: validation
-doc_to_text: !function utils.doc_to_text 
+doc_to_text: !function utils.doc_to_text
 doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
 doc_to_choice: !function utils.doc_to_choice
 should_decontaminate: true
 doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
 metric_list:
-  - metric: f1 
-    aggregation: !function utils.weighted_f1_score 
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
     # aggregation: mean
-    average: weighted 
-    hf_evaluate: true 
-    higher_is_better: True 
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
     ignore_case: true
     ignore_punctuation: true
     regexes_to_ignore:
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
index f2add8a5ff..a1e647cdf1 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_eng.yaml
@@ -1,4 +1,3 @@
 dataset_name: eng
 include: afrimmlu_common_yaml
 task: afrimmlu_direct_eng
-
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
index e0b00c08ab..1cc45ddc0e 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ewe.yaml
@@ -1,4 +1,3 @@
 dataset_name: ewe
 include: afrimmlu_common_yaml
 task: afrimmlu_direct_ewe
-
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
index 6e3383b387..e6adb6c8aa 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_fra.yaml
@@ -1,3 +1,3 @@
 dataset_name: fra
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_fra
\ No newline at end of file
+task: afrimmlu_direct_fra
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
index a28b083c4d..9cc9a1ae7a 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_hau.yaml
@@ -1,3 +1,3 @@
 dataset_name: hau
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_hau
\ No newline at end of file
+task: afrimmlu_direct_hau
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
index f9a863f237..6abb2c4a46 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_ibo.yaml
@@ -1,3 +1,3 @@
 dataset_name: ibo
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_ibo
\ No newline at end of file
+task: afrimmlu_direct_ibo
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
index bfd008d4a3..2f81f709c4 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_kin.yaml
@@ -1,3 +1,3 @@
 dataset_name: kin
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_kin
\ No newline at end of file
+task: afrimmlu_direct_kin
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
index a541b655fb..55363ed937 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lin.yaml
@@ -1,3 +1,3 @@
 dataset_name: lin
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_lin
\ No newline at end of file
+task: afrimmlu_direct_lin
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
index b6d0be0505..0d484427ed 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_lug.yaml
@@ -1,3 +1,3 @@
 dataset_name: lug
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_lug
\ No newline at end of file
+task: afrimmlu_direct_lug
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
index 124bbe59ba..763eb8a75f 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_orm.yaml
@@ -1,3 +1,3 @@
 dataset_name: orm
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_orm
\ No newline at end of file
+task: afrimmlu_direct_orm
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
index 9d75eb8570..ed9e69af39 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sna.yaml
@@ -1,3 +1,3 @@
 dataset_name: sna
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_sna
\ No newline at end of file
+task: afrimmlu_direct_sna
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
index fba23339e5..acdba0fdcc 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_sot.yaml
@@ -1,3 +1,3 @@
 dataset_name: sot
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_sot
\ No newline at end of file
+task: afrimmlu_direct_sot
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
index 099ecb0d7d..c1aa82b0b1 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_swa.yaml
@@ -1,3 +1,3 @@
 dataset_name: swa
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_swa
\ No newline at end of file
+task: afrimmlu_direct_swa
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
index 2da2e627c7..2695d4a156 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_twi.yaml
@@ -1,3 +1,3 @@
 dataset_name: twi
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_twi
\ No newline at end of file
+task: afrimmlu_direct_twi
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
index 938247ab22..027f837637 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_wol.yaml
@@ -1,3 +1,3 @@
 dataset_name: wol
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_wol
\ No newline at end of file
+task: afrimmlu_direct_wol
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
index bcaa7a6229..8e0c12972d 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_xho.yaml
@@ -1,3 +1,3 @@
 dataset_name: xho
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_xho
\ No newline at end of file
+task: afrimmlu_direct_xho
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
index a83c8454f2..2a9f7645c2 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_yor.yaml
@@ -1,3 +1,3 @@
 dataset_name: yor
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_yor
\ No newline at end of file
+task: afrimmlu_direct_yor
diff --git a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
index a597d3bbde..9d8d3b415b 100644
--- a/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
+++ b/lm_eval/tasks/afrimmlu/direct/afrimmlu_direct_zul.yaml
@@ -1,3 +1,3 @@
 dataset_name: zul
 include: afrimmlu_common_yaml
-task: afrimmlu_direct_zul
\ No newline at end of file
+task: afrimmlu_direct_zul
diff --git a/lm_eval/tasks/afrimmlu/direct/utils.py b/lm_eval/tasks/afrimmlu/direct/utils.py
index 2250b4b3ec..6d30579d79 100644
--- a/lm_eval/tasks/afrimmlu/direct/utils.py
+++ b/lm_eval/tasks/afrimmlu/direct/utils.py
@@ -1,9 +1,4 @@
-import re
-import sys
-import unicodedata
-
 from sklearn.metrics import f1_score
-from lm_eval.filters.extraction import RegexFilter
 
 
 def doc_to_choice(doc):
@@ -12,9 +7,9 @@ def doc_to_choice(doc):
 
 
 def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence 
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
                 model answers multiple-choice questions about {subject}
-                
+
                 Question: {question}
 
                 Choices:
@@ -22,16 +17,18 @@ def doc_to_text(doc):
                         B: {choice2}
                         C: {choice3}
                         D: {choice4}
-                       
+
                 Answer:  """
-    
+
     choices = eval(doc["choices"])
-    text = output.format(subject=doc['subject'],
-                         question=doc['question'],
-                         choice1=choices[0],
-                         choice2=choices[1],
-                         choice3=choices[2],
-                         choice4=choices[3])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
     return text
 
 
@@ -40,4 +37,4 @@ def weighted_f1_score(items):
     golds = unzipped_list[0]
     preds = unzipped_list[1]
     fscore = f1_score(golds, preds, average="weighted")
-    return fscore
\ No newline at end of file
+    return fscore
diff --git a/lm_eval/tasks/afrimmlu/fewshot.sh b/lm_eval/tasks/afrimmlu/fewshot.sh
index 42902dddc2..c69c48d7df 100644
--- a/lm_eval/tasks/afrimmlu/fewshot.sh
+++ b/lm_eval/tasks/afrimmlu/fewshot.sh
@@ -5,4 +5,4 @@ lm_eval --model hf \
         --batch_size 1 \
         --num_fewshot 0 \
         --verbosity DEBUG \
-        --wandb_args project=afrimmlu
\ No newline at end of file
+        --wandb_args project=afrimmlu
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
index 247a677923..2bc87d5aa7 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_common_translate_yaml
@@ -6,18 +6,18 @@ dataset_path: masakhane/afrimmlu-translate-test
 dataset_name: null
 output_type: multiple_choice
 test_split: test
-doc_to_text: !function utils.doc_to_text 
+doc_to_text: !function utils.doc_to_text
 doc_to_target: "{{['A', 'B', 'C', 'D'].index(answer)}}"
 doc_to_choice: !function utils.doc_to_choice
 should_decontaminate: true
 doc_to_decontamination_query: "Question: {{question}}\nAnswer:"
 metric_list:
-  - metric: f1 
-    aggregation: !function utils.weighted_f1_score 
+  - metric: f1
+    aggregation: !function utils.weighted_f1_score
     # aggregation: mean
-    average: weighted 
-    hf_evaluate: true 
-    higher_is_better: True 
+    average: weighted
+    hf_evaluate: true
+    higher_is_better: True
     ignore_case: true
     ignore_punctuation: true
     regexes_to_ignore:
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
index 30c5007881..0be98beedd 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_eng.yaml
@@ -1,4 +1,3 @@
 dataset_name: eng
 include: afrimmlu_common_translate_yaml
 task: afrimmlu_translate_eng
-
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
index 3c764f9c98..624342b91f 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ewe.yaml
@@ -1,4 +1,3 @@
 dataset_name: ewe
 include: afrimmlu_common_translate_yaml
 task: afrimmlu_translate_ewe
-
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
index 5401255c62..c4fd7e1fc7 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_fra.yaml
@@ -1,3 +1,3 @@
 dataset_name: fra
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_fra
\ No newline at end of file
+task: afrimmlu_translate_fra
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
index 590baa6e27..aaeb415fa2 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_hau.yaml
@@ -1,3 +1,3 @@
 dataset_name: hau
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_hau
\ No newline at end of file
+task: afrimmlu_translate_hau
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
index 25c306d185..93fb24e8c3 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_ibo.yaml
@@ -1,3 +1,3 @@
 dataset_name: ibo
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_ibo
\ No newline at end of file
+task: afrimmlu_translate_ibo
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
index ef917724a3..f39f666840 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_kin.yaml
@@ -1,3 +1,3 @@
 dataset_name: kin
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_kin
\ No newline at end of file
+task: afrimmlu_translate_kin
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
index a07db09029..c935ee4738 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lin.yaml
@@ -1,3 +1,3 @@
 dataset_name: lin
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_lin
\ No newline at end of file
+task: afrimmlu_translate_lin
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
index f23a9472ec..72e4bce011 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_lug.yaml
@@ -1,3 +1,3 @@
 dataset_name: lug
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_lug
\ No newline at end of file
+task: afrimmlu_translate_lug
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
index bee6ff8435..3ff9024994 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_orm.yaml
@@ -1,3 +1,3 @@
 dataset_name: orm
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_orm
\ No newline at end of file
+task: afrimmlu_translate_orm
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
index ca99078ddd..9979740a9b 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sna.yaml
@@ -1,3 +1,3 @@
 dataset_name: sna
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_sna
\ No newline at end of file
+task: afrimmlu_translate_sna
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
index 3c617bc638..deb2b9b81d 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_sot.yaml
@@ -1,3 +1,3 @@
 dataset_name: sot
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_sot
\ No newline at end of file
+task: afrimmlu_translate_sot
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
index a4baad77aa..e58d90bc69 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_swa.yaml
@@ -1,3 +1,3 @@
 dataset_name: swa
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_swa
\ No newline at end of file
+task: afrimmlu_translate_swa
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
index 16436ff38a..51a2d26ae0 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_twi.yaml
@@ -1,3 +1,3 @@
 dataset_name: twi
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_twi
\ No newline at end of file
+task: afrimmlu_translate_twi
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
index 025139a168..006b684782 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_wol.yaml
@@ -1,3 +1,3 @@
 dataset_name: wol
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_wol
\ No newline at end of file
+task: afrimmlu_translate_wol
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
index 4404ab6152..c0bdf4471b 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_xho.yaml
@@ -1,3 +1,3 @@
 dataset_name: xho
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_xho
\ No newline at end of file
+task: afrimmlu_translate_xho
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
index 48152ab2b6..0e7ba6005b 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_yor.yaml
@@ -1,3 +1,3 @@
 dataset_name: yor
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_yor
\ No newline at end of file
+task: afrimmlu_translate_yor
diff --git a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
index b4fc8dfbec..a18d251cc8 100644
--- a/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
+++ b/lm_eval/tasks/afrimmlu/translate/afrimmlu_translate_zul.yaml
@@ -1,3 +1,3 @@
 dataset_name: zul
 include: afrimmlu_common_translate_yaml
-task: afrimmlu_translate_zul
\ No newline at end of file
+task: afrimmlu_translate_zul
diff --git a/lm_eval/tasks/afrimmlu/translate/utils.py b/lm_eval/tasks/afrimmlu/translate/utils.py
index 6054cf31d4..f53fb68f11 100644
--- a/lm_eval/tasks/afrimmlu/translate/utils.py
+++ b/lm_eval/tasks/afrimmlu/translate/utils.py
@@ -7,9 +7,9 @@ def doc_to_choice(doc):
 
 
 def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence 
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
                 model answers multiple-choice questions about '{subject}'
-                
+
                 Question: '''{question}'''
 
                 Choices:
@@ -17,16 +17,18 @@ def doc_to_text(doc):
                         B: ''{choice2}'''
                         C: ''{choice3}'''
                         D: ''{choice4}'''
-                       
+
                 Answer:  """
-    
+
     choices = eval(doc["choices"])
-    text = output.format(subject=doc['subject'],
-                         question=doc['question'],
-                         choice1=choices[0],
-                         choice2=choices[1],
-                         choice3=choices[2],
-                         choice4=choices[3])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
     return text
 
 
@@ -35,4 +37,4 @@ def weighted_f1_score(items):
     golds = unzipped_list[0]
     preds = unzipped_list[1]
     fscore = f1_score(golds, preds, average="weighted")
-    return fscore
\ No newline at end of file
+    return fscore
diff --git a/lm_eval/tasks/afrimmlu/utils.py b/lm_eval/tasks/afrimmlu/utils.py
index 6054cf31d4..f53fb68f11 100644
--- a/lm_eval/tasks/afrimmlu/utils.py
+++ b/lm_eval/tasks/afrimmlu/utils.py
@@ -7,9 +7,9 @@ def doc_to_choice(doc):
 
 
 def doc_to_text(doc):
-    output = """You are a highly knowledgeable and intelligent artificial intelligence 
+    output = """You are a highly knowledgeable and intelligent artificial intelligence
                 model answers multiple-choice questions about '{subject}'
-                
+
                 Question: '''{question}'''
 
                 Choices:
@@ -17,16 +17,18 @@ def doc_to_text(doc):
                         B: ''{choice2}'''
                         C: ''{choice3}'''
                         D: ''{choice4}'''
-                       
+
                 Answer:  """
-    
+
     choices = eval(doc["choices"])
-    text = output.format(subject=doc['subject'],
-                         question=doc['question'],
-                         choice1=choices[0],
-                         choice2=choices[1],
-                         choice3=choices[2],
-                         choice4=choices[3])
+    text = output.format(
+        subject=doc["subject"],
+        question=doc["question"],
+        choice1=choices[0],
+        choice2=choices[1],
+        choice3=choices[2],
+        choice4=choices[3],
+    )
     return text
 
 
@@ -35,4 +37,4 @@ def weighted_f1_score(items):
     golds = unzipped_list[0]
     preds = unzipped_list[1]
     fscore = f1_score(golds, preds, average="weighted")
-    return fscore
\ No newline at end of file
+    return fscore
diff --git a/lm_eval/tasks/afrixnli/README.md b/lm_eval/tasks/afrixnli/README.md
index cf8e0ff22b..65b0272bc6 100644
--- a/lm_eval/tasks/afrixnli/README.md
+++ b/lm_eval/tasks/afrixnli/README.md
@@ -5,8 +5,8 @@
 IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
 https://arxiv.org/pdf/2406.03368
 
-IrokoBench is a human-translated benchmark dataset for 16 typologically diverse 
-low-resource African languages covering three tasks: natural language inference (AfriXNLI), 
+IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
+low-resource African languages covering three tasks: natural language inference (AfriXNLI),
 mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
 
 
@@ -14,13 +14,13 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
 
 ```
 @misc{adelani2024irokobenchnewbenchmarkafrican,
-      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models}, 
+      title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
       author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
       year={2024},
       eprint={2406.03368},
       archivePrefix={arXiv},
       primaryClass={cs.CL},
-      url={https://arxiv.org/abs/2406.03368}, 
+      url={https://arxiv.org/abs/2406.03368},
 }
 ```
 
@@ -30,7 +30,7 @@ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU
 
 * `afrixnli`: All afrixnli tasks
 * `afrixnli_en_direct`: afrixnli_en_direct evaluates models performance using the anli prompt on the curated dataset
-* `afrixnli_native_direct`: afrixnli_native_direct evaluates models performance using the anli prompt translated to the 
+* `afrixnli_native_direct`: afrixnli_native_direct evaluates models performance using the anli prompt translated to the
 respective languages on the curated dataset
 * `afrixnli_translate`: afrixnli_translate evaluates models using the anli prompt in translate-test setting
 * `afrixnli_manual_direct`: afrixnli_manual_direct evaluates models performance using Lai's prompt on the curated dataset
diff --git a/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py b/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py
index c4beaf8ec8..17df7ca963 100644
--- a/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py	
+++ b/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py	
@@ -2,11 +2,7 @@
 
 
 def doc_to_target(doc):
-    replacements = {
-        0: 'True',
-        1: 'Neither',
-        2: 'False'
-    }
+    replacements = {0: "True", 1: "Neither", 2: "False"}
     return replacements[doc["label"]]
 
 
diff --git a/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml b/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml
index 785ffe997f..94fb2bdcb6 100644
--- a/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml	
+++ b/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml	
@@ -2,4 +2,3 @@
 dataset_name: amh
 include: afrixnli_translate_yaml
 task: afrixnli_translate_amh
-
diff --git a/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py b/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py
index c4beaf8ec8..17df7ca963 100644
--- a/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py	
+++ b/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py	
@@ -2,11 +2,7 @@
 
 
 def doc_to_target(doc):
-    replacements = {
-        0: 'True',
-        1: 'Neither',
-        2: 'False'
-    }
+    replacements = {0: "True", 1: "Neither", 2: "False"}
     return replacements[doc["label"]]
 
 
diff --git a/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py b/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py
index e8e3fb5882..8f472503c6 100644
--- a/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py	
+++ b/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py	
@@ -2,25 +2,20 @@
 
 
 def doc_to_text(doc):
-    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise 
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
     and hypothesis. The answer should be exact entailment, contradiction, or neutral.
-    
+
     Premise: {premise}
     Hypothesis: {hypothesis}
-    
+
     Is it entailment, contradiction, or neutral?"""
 
-    text = output.format(premise=doc['premise'],
-                         hypothesis=doc['hypothesis'])
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
     return text
 
 
 def doc_to_target(doc):
-    replacements = {
-        0: 'entailment',
-        1: 'neutral',
-        2: 'contradiction'
-    }
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
     return replacements[doc["label"]]
 
 
diff --git a/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py b/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py
index e8e3fb5882..8f472503c6 100644
--- a/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py	
+++ b/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py	
@@ -2,25 +2,20 @@
 
 
 def doc_to_text(doc):
-    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise 
+    output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise
     and hypothesis. The answer should be exact entailment, contradiction, or neutral.
-    
+
     Premise: {premise}
     Hypothesis: {hypothesis}
-    
+
     Is it entailment, contradiction, or neutral?"""
 
-    text = output.format(premise=doc['premise'],
-                         hypothesis=doc['hypothesis'])
+    text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"])
     return text
 
 
 def doc_to_target(doc):
-    replacements = {
-        0: 'entailment',
-        1: 'neutral',
-        2: 'contradiction'
-    }
+    replacements = {0: "entailment", 1: "neutral", 2: "contradiction"}
     return replacements[doc["label"]]
 
 
diff --git a/lm_eval/tasks/afrixnli/utils.py b/lm_eval/tasks/afrixnli/utils.py
index 088f8fe66a..905a72b001 100644
--- a/lm_eval/tasks/afrixnli/utils.py
+++ b/lm_eval/tasks/afrixnli/utils.py
@@ -1,6 +1,7 @@
-import yaml
 import argparse
 
+import yaml
+
 
 class FunctionTag:
     def __init__(self, value):
@@ -12,110 +13,110 @@ def __init__(self, value):
         "QUESTION_WORD": "ትክክል",
         "ENTAILMENT_LABEL": "አዎ",
         "NEUTRAL_LABEL": "እንዲሁም",
-        "CONTRADICTION_LABEL": "አይ"
+        "CONTRADICTION_LABEL": "አይ",
     },
     "eng": {
         "QUESTION_WORD": "Right",
         "ENTAILMENT_LABEL": "Yes",
         "NEUTRAL_LABEL": "Also",
-        "CONTRADICTION_LABEL": "No"
+        "CONTRADICTION_LABEL": "No",
     },
     "ewe": {
         "QUESTION_WORD": "Esɔ gbe",
         "ENTAILMENT_LABEL": "Ɛ̃",
         "NEUTRAL_LABEL": "Hã",
-        "CONTRADICTION_LABEL": "Ao"
+        "CONTRADICTION_LABEL": "Ao",
     },
     "fra": {
         "QUESTION_WORD": "correct",
         "ENTAILMENT_LABEL": "Oui",
         "NEUTRAL_LABEL": "Aussi",
-        "CONTRADICTION_LABEL": "Non"
+        "CONTRADICTION_LABEL": "Non",
     },
     "hau": {
         "QUESTION_WORD": "Daidai",
         "ENTAILMENT_LABEL": "Ee",
         "NEUTRAL_LABEL": "Haka kuma",
-        "CONTRADICTION_LABEL": "A'a"
+        "CONTRADICTION_LABEL": "A'a",
     },
     "ibo": {
         "QUESTION_WORD": "Ziri ezi",
         "ENTAILMENT_LABEL": "Éè",
         "NEUTRAL_LABEL": "Ọzọkwa",
-        "CONTRADICTION_LABEL": "Mba"
+        "CONTRADICTION_LABEL": "Mba",
     },
     "kin": {
         "QUESTION_WORD": "Nibyo",
         "ENTAILMENT_LABEL": "Yego",
         "NEUTRAL_LABEL": "Na none",
-        "CONTRADICTION_LABEL": "Oya"
+        "CONTRADICTION_LABEL": "Oya",
     },
     "lin": {
         "QUESTION_WORD": "Malamu",
         "ENTAILMENT_LABEL": "Iyo",
         "NEUTRAL_LABEL": "Lisusu",
-        "CONTRADICTION_LABEL": "Te"
+        "CONTRADICTION_LABEL": "Te",
     },
     "lug": {
         "QUESTION_WORD": "Kituufu",
         "ENTAILMENT_LABEL": "Yee",
         "NEUTRAL_LABEL": "N’ekirala",
-        "CONTRADICTION_LABEL": "Nedda"
+        "CONTRADICTION_LABEL": "Nedda",
     },
     "orm": {
         "QUESTION_WORD": "Sirrii",
         "ENTAILMENT_LABEL": "Eeyyee",
         "NEUTRAL_LABEL": "Akkasumas",
-        "CONTRADICTION_LABEL": "Lakki"
+        "CONTRADICTION_LABEL": "Lakki",
     },
     "sna": {
         "QUESTION_WORD": "Chokwadi",
         "ENTAILMENT_LABEL": "Hongu",
         "NEUTRAL_LABEL": "Uye",
-        "CONTRADICTION_LABEL": "Kwete"
+        "CONTRADICTION_LABEL": "Kwete",
     },
     "sot": {
         "QUESTION_WORD": "Nepile",
         "ENTAILMENT_LABEL": "E",
         "NEUTRAL_LABEL": "Hape",
-        "CONTRADICTION_LABEL": "Tjhe"
+        "CONTRADICTION_LABEL": "Tjhe",
     },
     "swa": {
         "QUESTION_WORD": "Sahihi",
         "ENTAILMENT_LABEL": "Ndiyo",
         "NEUTRAL_LABEL": "Pia",
-        "CONTRADICTION_LABEL": "Hapana"
+        "CONTRADICTION_LABEL": "Hapana",
     },
     "twi": {
         "QUESTION_WORD": "Nifa",
         "ENTAILMENT_LABEL": "Aane",
         "NEUTRAL_LABEL": "Anaasɛ",
-        "CONTRADICTION_LABEL": "Daabi"
+        "CONTRADICTION_LABEL": "Daabi",
     },
     "wol": {
         "QUESTION_WORD": "Dëgg",
         "ENTAILMENT_LABEL": "Waaw",
         "NEUTRAL_LABEL": "Itam",
-        "CONTRADICTION_LABEL": "Déet"
+        "CONTRADICTION_LABEL": "Déet",
     },
     "xho": {
         "QUESTION_WORD": "Ichanekile",
         "ENTAILMENT_LABEL": "Ewe",
         "NEUTRAL_LABEL": "Kananjalo",
-        "CONTRADICTION_LABEL": "Hayi"
+        "CONTRADICTION_LABEL": "Hayi",
     },
     "yor": {
         "QUESTION_WORD": "Òótọ́",
         "ENTAILMENT_LABEL": "Bẹ́ẹ̀ni",
         "NEUTRAL_LABEL": "Àti pé",
-        "CONTRADICTION_LABEL": "Rárá"
+        "CONTRADICTION_LABEL": "Rárá",
     },
     "zul": {
         "QUESTION_WORD": "Kulungile",
         "ENTAILMENT_LABEL": "Yebo",
         "NEUTRAL_LABEL": "Futhi",
-        "CONTRADICTION_LABEL": "Cha"
-    }
+        "CONTRADICTION_LABEL": "Cha",
+    },
 }
 
 
@@ -127,8 +128,26 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
     :param overwrite: Whether to overwrite files if they already exist.
     """
     err = []
-    languages = ['eng', 'amh', 'ibo', 'fra', 'sna', 'wol', 'ewe', 'lin', 'lug', 'xho', 'kin', 'twi', 'zul', 'orm',
-                 'yor', 'hau', 'sot', 'swa']
+    languages = [
+        "eng",
+        "amh",
+        "ibo",
+        "fra",
+        "sna",
+        "wol",
+        "ewe",
+        "lin",
+        "lug",
+        "xho",
+        "kin",
+        "twi",
+        "zul",
+        "orm",
+        "yor",
+        "hau",
+        "sot",
+        "swa",
+    ]
     for lang in languages:
         try:
             if mode == "native-direct":
@@ -141,7 +160,9 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                 task_name = f"afrixnli_native_direct_{lang}"
                 yaml_template = "afrixnli_native_direct_yaml"
                 with open(
-                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                    f"{output_dir}/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
                 ) as f:
                     f.write("# Generated by utils.py\n")
                     yaml.dump(
@@ -150,10 +171,10 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                             "task": task_name,
                             "dataset_name": lang,
                             "doc_to_choice": f"{{{{["
-                                           f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
-                                           f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
-                                           f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
-                                           f"]}}}}",
+                            f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
+                            f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
+                            f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
+                            f"]}}}}",
                         },
                         f,
                         allow_unicode=True,
@@ -163,14 +184,16 @@ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
                 task_name = f"afrixnli_{mode}_{lang}"
                 yaml_template = f"afrixnli_{mode}_yaml"
                 with open(
-                        f"{output_dir}/{file_name}", "w" if overwrite else "x", encoding="utf8"
+                    f"{output_dir}/{file_name}",
+                    "w" if overwrite else "x",
+                    encoding="utf8",
                 ) as f:
                     f.write("# Generated by utils.py\n")
                     yaml.dump(
                         {
                             "include": yaml_template,
                             "task": task_name,
-                            "dataset_name": lang
+                            "dataset_name": lang,
                         },
                         f,
                         allow_unicode=True,
@@ -195,7 +218,9 @@ def main() -> None:
         help="Overwrite files if they already exist",
     )
     parser.add_argument(
-        "--output-dir", default="./manual/translate", help="Directory to write yaml files to"
+        "--output-dir",
+        default="./manual/translate",
+        help="Directory to write yaml files to",
     )
     parser.add_argument(
         "--mode",
diff --git a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
index b457ae582f..2e34ad36c2 100644
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_atc.yaml
@@ -3,4 +3,4 @@ task:
   - med_concepts_qa_atc_tasks
 aggregate_metric_list:
   - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
diff --git a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
index 777a5ce6ee..407ea4088d 100644
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd10proc.yaml
@@ -3,4 +3,4 @@ task:
   - med_concepts_qa_icd10proc_tasks
 aggregate_metric_list:
   - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
diff --git a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
index d5671bf035..b12ea811ff 100644
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9cm.yaml
@@ -3,4 +3,4 @@ task:
   - med_concepts_qa_icd9cm_tasks
 aggregate_metric_list:
   - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean
diff --git a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
index 4487772fa1..94fc034eb2 100644
--- a/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
+++ b/lm_eval/tasks/med_concepts_qa/_med_concepts_qa_icd9proc.yaml
@@ -3,4 +3,4 @@ task:
   - med_concepts_qa_icd9proc_tasks
 aggregate_metric_list:
   - metric: acc
-    aggregation: mean
\ No newline at end of file
+    aggregation: mean