Skip to content

Commit

Permalink
adding the imdb_ptbr scenario
Browse files Browse the repository at this point in the history
  • Loading branch information
thallysonjsa committed Jan 20, 2025
1 parent da32235 commit a13ef55
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 0 deletions.
30 changes: 30 additions & 0 deletions src/helm/benchmark/run_specs/imdb_ptbr_run_specs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from helm.benchmark.adaptation.common_adapter_specs import get_generation_adapter_spec
from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs, get_classification_metric_specs
from helm.benchmark.run_spec import RunSpec, run_spec_function
from helm.benchmark.scenarios.scenario import ScenarioSpec


@run_spec_function("imdb_ptbr")
def get_tweetsentbr_spec() -> RunSpec:
scenario_spec = ScenarioSpec(class_name="helm.benchmark.scenarios.imdb_ptbr_scenario.IMDB_PTBRScenario", args={})

adapter_spec = get_generation_adapter_spec(
instructions="""Classifique a resenha do usuário sobre o filme como "positivo" ou "negativo".
Resenha: Tudo sobre o filme é maravilhoso. Atuações, trilha sonora, fotografia. Amei tudo!
Classe: positivo
Resenha: Achei um filme bem fraco, não gostei da história.
Classe: negativo
""",
input_noun="Resenha",
output_noun="Classe",
)

return RunSpec(
name="imdb_ptbr",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs() + get_classification_metric_specs(),
groups=["imdb_ptbr"],
)
60 changes: 60 additions & 0 deletions src/helm/benchmark/scenarios/imdb_ptbr_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from typing import Any, List, Dict
from pathlib import Path
from datasets import load_dataset
from helm.common.hierarchical_logger import hlog
from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)


class IMDB_PTBRScenario(Scenario):
"""
The IMDB dataset is a widely-used benchmark dataset for natural language processing (NLP)
particularly for text classification and sentiment analysis.
This is a translated version that is meant to evaluate PT-BR models.
It consists of movie reviews from the Internet Movie Database (IMDB) and
includes both positive and negative sentiments labeled for supervised learning.
"""

name = "simple_classification"
description = "Classify movie reviews between positive or negative."
tags = ["classification"]

def process_dataset(self, dataset: Any, split: str) -> List[Instance]:
instances: List[Instance] = []
label_names = {0: "negativo", 1: "positivo"}
for example in dataset[split]:
input = Input(text=example["text"])
# NOTE: For classification scenarios, the reference outputs should be the same
# for all instances, and should include both correct and incorrect classes.
# HELM only supports single-label classification. Exactly one reference
# should have the CORRECT_TAG tag.
references = [
Reference(Output(text=label_names[example["label"]]), tags=[CORRECT_TAG]),
]
instance = Instance(input=input, references=references, split=split)
instances.append(instance)
return instances

def get_instances(self, output_path: str) -> List[Instance]:
instances: List[Instance] = []
cache_dir = str(Path(output_path) / "data")
dataset = load_dataset("maritaca-ai/imdb_pt", cache_dir=cache_dir)
splits: Dict[str, str] = {
"train": TRAIN_SPLIT,
"test": TEST_SPLIT,
}
for split in splits:
if split not in splits.keys():
hlog(f"{split} split doesn't exist, skipping")
continue
instances.extend(self.process_dataset(dataset, splits[split]))

return instances
26 changes: 26 additions & 0 deletions src/helm/benchmark/scenarios/test_imdb_ptbr_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
from tempfile import TemporaryDirectory

from helm.benchmark.scenarios.imdb_ptbr_scenario import IMDB_PTBRScenario
from helm.benchmark.scenarios.scenario import TRAIN_SPLIT, CORRECT_TAG, Output, Reference


#@pytest.mark.scenarios
def test_imdb_ptbr_scenario():
imdb_ptbr = IMDB_PTBRScenario()
with TemporaryDirectory() as tmpdir:
instances = imdb_ptbr.get_instances(tmpdir)
assert len(instances) == 30000
assert instances[0].split == TRAIN_SPLIT

assert instances[10].input.text.startswith(
"Foi ótimo ver algumas das minhas estrelas favoritas de 30 anos atrás, incluindo John Ritter, Ben Gazarra e Audrey Hepburn."
)
assert len(instances[10].input.text) == 1549

assert instances[10].references == [
Reference(
output=Output(text="negativo"),
tags=[CORRECT_TAG],
)
]

0 comments on commit a13ef55

Please sign in to comment.