diff --git a/evaluation/data_loader.py b/evaluation/data_loader.py index a7c3f1cf..da23e513 100755 --- a/evaluation/data_loader.py +++ b/evaluation/data_loader.py @@ -11,6 +11,7 @@ def load_data(data_name, split, data_dir="./data"): if os.path.exists(data_file): examples = list(load_jsonl(data_file)) else: + dataset = None if data_name == "math": dataset = load_dataset( "competition_math", @@ -21,7 +22,6 @@ def load_data(data_name, split, data_dir="./data"): elif data_name == "gsm8k": dataset = load_dataset(data_name, split=split) elif data_name == "svamp": - # evaluate on training set + test set dataset = load_dataset("ChilleD/SVAMP", split="train") dataset = concatenate_datasets( [dataset, load_dataset("ChilleD/SVAMP", split="test")] @@ -33,16 +33,14 @@ def load_data(data_name, split, data_dir="./data"): ) # remove multi-answer examples elif data_name == "mawps": examples = [] - # four sub-tasks - for data_name in ["singleeq", "singleop", "addsub", "multiarith"]: - sub_examples = list(load_jsonl(f"{data_dir}/mawps/{data_name}.jsonl")) + for sub_task in ["singleeq", "singleop", "addsub", "multiarith"]: + sub_examples = list(load_jsonl(f"{data_dir}/mawps/{sub_task}.jsonl")) for example in sub_examples: - example["type"] = data_name + example["type"] = sub_task examples.extend(sub_examples) dataset = Dataset.from_list(examples) elif data_name == "mmlu_stem": dataset = load_dataset("hails/mmlu_no_train", "all", split="test") - # only keep stem subjects stem_subjects = [ "abstract_algebra", "astronomy", @@ -66,20 +64,19 @@ def load_data(data_name, split, data_dir="./data"): dataset = dataset.rename_column("subject", "type") dataset = dataset.filter(lambda x: x["type"] in stem_subjects) elif data_name == "carp_en": - dataset = load_jsonl(f"{data_dir}/carp_en/test.jsonl") + dataset = Dataset.from_list(load_jsonl(f"{data_dir}/carp_en/test.jsonl")) else: raise NotImplementedError(data_name) - examples = list(dataset) - examples = [lower_keys(example) for example in examples] - dataset = Dataset.from_list(examples) - os.makedirs(f"{data_dir}/{data_name}", exist_ok=True) - dataset.to_json(data_file) + if dataset is not None: + examples = [lower_keys(example) for example in dataset] + os.makedirs(f"{data_dir}/{data_name}", exist_ok=True) + dataset.to_json(data_file) - # add 'idx' in the first column if "idx" not in examples[0]: examples = [{"idx": i, **example} for i, example in enumerate(examples)] - # dedepulicate & sort examples = sorted(examples, key=lambda x: x["idx"]) + return examples +