Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding IMDB_PTBR Scenario #3284

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/helm/benchmark/run_specs/imdb_ptbr_run_specs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from helm.benchmark.adaptation.common_adapter_specs import get_generation_adapter_spec
from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs, get_classification_metric_specs
from helm.benchmark.run_spec import RunSpec, run_spec_function
from helm.benchmark.scenarios.scenario import ScenarioSpec


@run_spec_function("imdb_ptbr")
def get_tweetsentbr_spec() -> RunSpec:
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.imdb_ptbr_scenario.IMDB_PTBRScenario", args={})

adapter_spec = get_generation_adapter_spec(
instructions="""Classifique a resenha do usuário sobre o filme como "positivo" ou "negativo".

Resenha: Tudo sobre o filme é maravilhoso. Atuações, trilha sonora, fotografia. Amei tudo!
Classe: positivo

Resenha: Achei um filme bem fraco, não gostei da história.
Classe: negativo
""",
input_noun="Resenha",
output_noun="Classe",
)

return RunSpec(
name="imdb_ptbr",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs() + get_classification_metric_specs(),
groups=["imdb_ptbr"],
)
60 changes: 60 additions & 0 deletions src/helm/benchmark/scenarios/imdb_ptbr_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import Any, List, Dict
from pathlib import Path
from datasets import load_dataset
from helm.common.hierarchical_logger import hlog
from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)


class IMDB_PTBRScenario(Scenario):
"""
The IMDB dataset is a widely-used benchmark dataset for natural language processing (NLP)
particularly for text classification and sentiment analysis.
This is a translated version that is meant to evaluate PT-BR models.
It consists of movie reviews from the Internet Movie Database (IMDB) and
includes both positive and negative sentiments labeled for supervised learning.
"""

name = "simple_classification"
description = "Classify movie reviews between positive or negative."
tags = ["classification"]

def process_dataset(self, dataset: Any, split: str) -> List[Instance]:
instances: List[Instance] = []
label_names = {0: "negativo", 1: "positivo"}
for example in dataset[split]:
input = Input(text=example["text"])
# NOTE: For classification scenarios, the reference outputs should be the same
# for all instances, and should include both correct and incorrect classes.
# HELM only supports single-label classification. Exactly one reference
# should have the CORRECT_TAG tag.
references = [
Reference(Output(text=label_names[example["label"]]), tags=[CORRECT_TAG]),
]
instance = Instance(input=input, references=references, split=split)
instances.append(instance)
return instances

def get_instances(self, output_path: str) -> List[Instance]:
instances: List[Instance] = []
cache_dir = str(Path(output_path) / "data")
dataset = load_dataset("maritaca-ai/imdb_pt", cache_dir=cache_dir)
splits: Dict[str, str] = {
"train": TRAIN_SPLIT,
"test": TEST_SPLIT,
}
for split in splits:
if split not in splits.keys():
hlog(f"{split} split doesn't exist, skipping")
continue
instances.extend(self.process_dataset(dataset, splits[split]))

return instances
27 changes: 27 additions & 0 deletions src/helm/benchmark/scenarios/test_imdb_ptbr_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
from tempfile import TemporaryDirectory

from helm.benchmark.scenarios.imdb_ptbr_scenario import IMDB_PTBRScenario
from helm.benchmark.scenarios.scenario import TRAIN_SPLIT, CORRECT_TAG, Output, Reference


@pytest.mark.scenarios
def test_imdb_ptbr_scenario():
imdb_ptbr = IMDB_PTBRScenario()
with TemporaryDirectory() as tmpdir:
instances = imdb_ptbr.get_instances(tmpdir)
assert len(instances) == 30000
assert instances[0].split == TRAIN_SPLIT

assert instances[10].input.text.startswith(
"Foi ótimo ver algumas das minhas estrelas favoritas de 30 anos atrás, "
"incluindo John Ritter, Ben Gazarra e Audrey Hepburn."
)
assert len(instances[10].input.text) == 1549

assert instances[10].references == [
Reference(
output=Output(text="negativo"),
tags=[CORRECT_TAG],
)
]
Loading