samples is newline delimited (EleutherAI#1930)

* `samples` is newline delimited * updated git and pre-commit * appease pre-commit * nit * Revert back for now * Revert for now --------- Co-authored-by: Lintang Sutawika <[email protected]>
OpenLLM-France · Jun 13, 2024 · 3850e21 · 3850e21
1 parent 793469e
commit 3850e21
Show file tree

Hide file tree

Showing 25 changed files with 39 additions and 30 deletions.
diff --git a/.github/workflows/new_tasks.yml b/.github/workflows/new_tasks.yml
@@ -20,13 +20,13 @@ jobs:
         with:
           fetch-depth: 2  # OR "2" -> To retrieve the preceding commit.
 
-      # Uses the tj-actions/changed-files@v37 action to check for changes.
+      # Uses the tj-actions/changed-files action to check for changes.
       # Outputs provided here: https://github.com/tj-actions/changed-files#outputs
       # The `files_yaml` input optionally takes a yaml string to specify filters,
       # and prepends the filter name to the standard output names.
       - name: Check task folders
         id: changed-tasks
-        uses: tj-actions/changed-files@v37.1.2
+        uses: tj-actions/changed-files@v44.5.2
         with:
           # tasks checks the tasks folder and api checks the api folder for changes
           files_yaml: |

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -32,7 +32,7 @@ jobs:
       env:
         SKIP: "no-commit-to-branch,mypy"
 
-      uses: pre-commit/[email protected].0
+      uses: pre-commit/[email protected].1
 #       # mypy turned off for now
 #    - name: Lint with mypy
 #      run: mypy . --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,8 +29,7 @@ repos:
       - id: mixed-line-ending
         args: [--fix=lf]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: v0.2.2
+    rev: v0.4.8
     hooks:
       # Run the linter.
       - id: ruff
@@ -39,17 +38,17 @@ repos:
         # Run the formatter.
       - id: ruff-format
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.6
+    rev: v2.3.0
     hooks:
       - id: codespell
         exclude: >
           (?x)^(
               .*\.json|ignore.txt|lm_eval/tasks/.*|.*yaml|.*\.ipynb
           )$
         args: [--check-filenames, --check-hidden, --ignore-words=ignore.txt]
-  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.5.1
-    hooks:
-    - id: mypy
-      additional_dependencies: [".[sentencepiece,multilingual,promptsource,gptq]", "types-PyYAML", "types-requests"]
-      exclude: ^tests/.*$
+#  - repo: https://github.com/pre-commit/mirrors-mypy
+#    rev: v1.5.1
+#    hooks:
+#    - id: mypy
+#      additional_dependencies: [".[sentencepiece,multilingual,promptsource,gptq]", "types-PyYAML", "types-requests"]
+#      exclude: ^tests/.*$
diff --git a/lm_eval/api/task.py b/lm_eval/api/task.py
@@ -67,9 +67,9 @@ class TaskConfig(dict):
     training_split: Optional[str] = None
     validation_split: Optional[str] = None
     test_split: Optional[str] = None
-    fewshot_split: Optional[
-        str
-    ] = None  # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
+    fewshot_split: Optional[str] = (
+        None  # TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
+    )
     # formatting / prompting options.
     # see docs/advanced_task_guide.md for more info
     process_docs: Optional[Callable] = None
@@ -92,9 +92,9 @@ class TaskConfig(dict):
     filter_list: Optional[Union[str, list]] = None
     should_decontaminate: bool = False
     doc_to_decontamination_query: Optional[str] = None
-    metadata: Optional[
-        dict
-    ] = None  # by default, not used in the code. allows for users to pass arbitrary info to tasks
+    metadata: Optional[dict] = (
+        None  # by default, not used in the code. allows for users to pass arbitrary info to tasks
+    )
 
     def __post_init__(self) -> None:
         if self.generation_kwargs is not None:
@@ -229,9 +229,9 @@ def __init__(
         self._config: TaskConfig = TaskConfig({**config}) if config else TaskConfig()
 
         self._filters = [build_filter_ensemble("none", [["take_first", None]])]
-        self.fewshot_rnd: Optional[
-            random.Random
-        ] = None  # purposely induce errors in case of improper usage
+        self.fewshot_rnd: Optional[random.Random] = (
+            None  # purposely induce errors in case of improper usage
+        )
 
     def download(
         self,

diff --git a/lm_eval/filters/decontamination.py b/lm_eval/filters/decontamination.py
@@ -4,7 +4,6 @@
 
 @register_filter("decontaminate")
 class DecontaminationFilter(Filter):
-
     """
     A filter which evaluates
     """

diff --git a/lm_eval/loggers/evaluation_tracker.py b/lm_eval/loggers/evaluation_tracker.py
@@ -259,7 +259,7 @@ def save_results_samples(
                 path.mkdir(parents=True, exist_ok=True)
 
                 file_results_samples = path.joinpath(
-                    f"samples_{task_name}_{self.date_id}.json"
+                    f"samples_{task_name}_{self.date_id}.jsonl"
                 )
 
                 for sample in samples:

diff --git a/lm_eval/models/textsynth.py b/lm_eval/models/textsynth.py
@@ -1,4 +1,4 @@
-""" TextSynth API
+"""TextSynth API
 Implementation provided by Fabrice Bellard:
     https://github.com/EleutherAI/lm-evaluation-harness/issues/295
 
@@ -11,6 +11,7 @@
 
 Homepage: https://textsynth.com/index.html
 """
+
 import logging
 import os
 

diff --git a/lm_eval/tasks/aclue/_generate_configs.py b/lm_eval/tasks/aclue/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/lm_eval/tasks/bbh/_generate_configs.py b/lm_eval/tasks/bbh/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 import re

diff --git a/lm_eval/tasks/belebele/_generate_configs.py b/lm_eval/tasks/belebele/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/lm_eval/tasks/bigbench/push_bigbench_dataset.py b/lm_eval/tasks/bigbench/push_bigbench_dataset.py
@@ -8,6 +8,7 @@
 `pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
 and is included so that the bigbench dependency can be avoided.
 """
+
 import bigbench.api.util as bb_utils
 import datasets
 from tqdm import tqdm

diff --git a/lm_eval/tasks/ceval/_generate_configs.py b/lm_eval/tasks/ceval/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/lm_eval/tasks/cmmlu/_generate_configs.py b/lm_eval/tasks/cmmlu/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/lm_eval/tasks/csatqa/_generate_configs.py b/lm_eval/tasks/csatqa/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all other splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/lm_eval/tasks/fda/task.py b/lm_eval/tasks/fda/task.py
@@ -1,5 +1,3 @@
-"""
-"""
 import re
 from typing import List
 

diff --git a/lm_eval/tasks/ifeval/instructions.py b/lm_eval/tasks/ifeval/instructions.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Library of instructions."""
+
 import collections
 import json
 import logging

diff --git a/lm_eval/tasks/ifeval/instructions_registry.py b/lm_eval/tasks/ifeval/instructions_registry.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Registry of all instructions."""
+
 from lm_eval.tasks.ifeval import instructions
 
 

diff --git a/lm_eval/tasks/mmlu/_generate_configs.py b/lm_eval/tasks/mmlu/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all "other" splits with this YAML
 """
+
 import argparse
 import logging
 import os

diff --git a/lm_eval/tasks/squad_completion/task.py b/lm_eval/tasks/squad_completion/task.py
@@ -1,5 +1,3 @@
-"""
-"""
 import re
 from typing import List
 

diff --git a/lm_eval/tasks/squadv2/task.py b/lm_eval/tasks/squadv2/task.py
@@ -13,6 +13,7 @@
 
 Homepage: https://rajpurkar.github.io/SQuAD-explorer/
 """
+
 from functools import partial
 from math import exp
 

diff --git a/lm_eval/tasks/tinyBenchmarks/utils_winogrande.py b/lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
@@ -1,4 +1,4 @@
-""" This code mirrors the utils of the original winogrande task """
+"""This code mirrors the utils of the original winogrande task"""
 
 
 def doc_to_text(doc):

diff --git a/lm_eval/tasks/tmmluplus/default/_generate_configs.py b/lm_eval/tasks/tmmluplus/default/_generate_configs.py
@@ -1,6 +1,7 @@
 """
 Take in a YAML, and output all "other" splits with this YAML
 """
+
 import argparse
 import os
 

diff --git a/scripts/clean_training_data/README.md b/scripts/clean_training_data/README.md
@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
     the match, splitting the training data into chunks
    3) Any chunks less than `minimum_slice_length` are removed
    4) Training data sets split into more than `too_dirty_cutoff` are considered
-    completey contaminated and removed
+    completely contaminated and removed
 
 OpenAI used:
 ```

diff --git a/scripts/make_table_results.py b/scripts/make_table_results.py
@@ -2,6 +2,7 @@
 Usage:
    python make_table_tasks.py --output <markdown_filename>
 """
+
 import json
 import logging
 import os

diff --git a/scripts/make_table_tasks.py b/scripts/make_table_tasks.py
@@ -2,6 +2,7 @@
 Usage:
    python make_table_tasks.py --output <markdown_filename>
 """
+
 import argparse
 import logging
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,7 +4,6 @@ @@
     @register_filter("decontaminate")
     class DecontaminationFilter(Filter):
         """
         A filter which evaluates
         """
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,6 +13,7 @@
		# limitations under the License.

		"""Registry of all instructions."""

		from lm_eval.tasks.ifeval import instructions


Expand Down