-
Notifications
You must be signed in to change notification settings - Fork 269
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
da32235
commit a13ef55
Showing
3 changed files
with
116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from helm.benchmark.adaptation.common_adapter_specs import get_generation_adapter_spec | ||
from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs, get_classification_metric_specs | ||
from helm.benchmark.run_spec import RunSpec, run_spec_function | ||
from helm.benchmark.scenarios.scenario import ScenarioSpec | ||
|
||
|
||
@run_spec_function("imdb_ptbr") | ||
def get_tweetsentbr_spec() -> RunSpec: | ||
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.imdb_ptbr_scenario.IMDB_PTBRScenario", args={}) | ||
|
||
adapter_spec = get_generation_adapter_spec( | ||
instructions="""Classifique a resenha do usuário sobre o filme como "positivo" ou "negativo". | ||
Resenha: Tudo sobre o filme é maravilhoso. Atuações, trilha sonora, fotografia. Amei tudo! | ||
Classe: positivo | ||
Resenha: Achei um filme bem fraco, não gostei da história. | ||
Classe: negativo | ||
""", | ||
input_noun="Resenha", | ||
output_noun="Classe", | ||
) | ||
|
||
return RunSpec( | ||
name="imdb_ptbr", | ||
scenario_spec=scenario_spec, | ||
adapter_spec=adapter_spec, | ||
metric_specs=get_exact_match_metric_specs() + get_classification_metric_specs(), | ||
groups=["imdb_ptbr"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from typing import Any, List, Dict | ||
from pathlib import Path | ||
from datasets import load_dataset | ||
from helm.common.hierarchical_logger import hlog | ||
from helm.benchmark.scenarios.scenario import ( | ||
Scenario, | ||
Instance, | ||
Reference, | ||
TRAIN_SPLIT, | ||
TEST_SPLIT, | ||
CORRECT_TAG, | ||
Input, | ||
Output, | ||
) | ||
|
||
|
||
class IMDB_PTBRScenario(Scenario): | ||
""" | ||
The IMDB dataset is a widely-used benchmark dataset for natural language processing (NLP) | ||
particularly for text classification and sentiment analysis. | ||
This is a translated version that is meant to evaluate PT-BR models. | ||
It consists of movie reviews from the Internet Movie Database (IMDB) and | ||
includes both positive and negative sentiments labeled for supervised learning. | ||
""" | ||
|
||
name = "simple_classification" | ||
description = "Classify movie reviews between positive or negative." | ||
tags = ["classification"] | ||
|
||
def process_dataset(self, dataset: Any, split: str) -> List[Instance]: | ||
instances: List[Instance] = [] | ||
label_names = {0: "negativo", 1: "positivo"} | ||
for example in dataset[split]: | ||
input = Input(text=example["text"]) | ||
# NOTE: For classification scenarios, the reference outputs should be the same | ||
# for all instances, and should include both correct and incorrect classes. | ||
# HELM only supports single-label classification. Exactly one reference | ||
# should have the CORRECT_TAG tag. | ||
references = [ | ||
Reference(Output(text=label_names[example["label"]]), tags=[CORRECT_TAG]), | ||
] | ||
instance = Instance(input=input, references=references, split=split) | ||
instances.append(instance) | ||
return instances | ||
|
||
def get_instances(self, output_path: str) -> List[Instance]: | ||
instances: List[Instance] = [] | ||
cache_dir = str(Path(output_path) / "data") | ||
dataset = load_dataset("maritaca-ai/imdb_pt", cache_dir=cache_dir) | ||
splits: Dict[str, str] = { | ||
"train": TRAIN_SPLIT, | ||
"test": TEST_SPLIT, | ||
} | ||
for split in splits: | ||
if split not in splits.keys(): | ||
hlog(f"{split} split doesn't exist, skipping") | ||
continue | ||
instances.extend(self.process_dataset(dataset, splits[split])) | ||
|
||
return instances |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import pytest | ||
from tempfile import TemporaryDirectory | ||
|
||
from helm.benchmark.scenarios.imdb_ptbr_scenario import IMDB_PTBRScenario | ||
from helm.benchmark.scenarios.scenario import TRAIN_SPLIT, CORRECT_TAG, Output, Reference | ||
|
||
|
||
#@pytest.mark.scenarios | ||
def test_imdb_ptbr_scenario(): | ||
imdb_ptbr = IMDB_PTBRScenario() | ||
with TemporaryDirectory() as tmpdir: | ||
instances = imdb_ptbr.get_instances(tmpdir) | ||
assert len(instances) == 30000 | ||
assert instances[0].split == TRAIN_SPLIT | ||
|
||
assert instances[10].input.text.startswith( | ||
"Foi ótimo ver algumas das minhas estrelas favoritas de 30 anos atrás, incluindo John Ritter, Ben Gazarra e Audrey Hepburn." | ||
) | ||
assert len(instances[10].input.text) == 1549 | ||
|
||
assert instances[10].references == [ | ||
Reference( | ||
output=Output(text="negativo"), | ||
tags=[CORRECT_TAG], | ||
) | ||
] |