Skip to content

Commit

Permalink
[Feature] Support G-Pass@k and LiveMathBench (#1772)
Browse files Browse the repository at this point in the history
* support G-Pass@k and livemathbench

* fix bugs

* fix comments of GPassKEvaluator

* update saved details of GPassKEvaluator

* update saved details of GPassKEvaluator

* fix eval api configs & update openai_api for ease of debugging

* update huggingface path

* fix method name of G-Pass@k

* fix default value of eval_model_name

* refactor G-Pass@k evaluator

* log generation params for each backend

* fix evaluation resume

* add notimplementerror
  • Loading branch information
jnanliu authored Dec 30, 2024
1 parent 42b54d6 commit 8e8d4f1
Show file tree
Hide file tree
Showing 10 changed files with 531 additions and 331 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from mmengine.config import read_base

with read_base():
from .livemathbench_gen_caed8f import livemathbench_datasets # noqa: F401, F403
from .livemathbench_gen_9befbf import livemathbench_datasets # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer

from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator


livemathbench_dataset = dict(
type=LiveMathBenchDataset,
path='',
k=16,
replication=3,
dataset_splits=['CNMO', 'CCEE', 'AMC', 'WLPMC'],
dataset_languages=['cn', 'en'],
cot=True,
version='202412',
abbr='LiveMathBench-v202412',
reader_cfg=dict(
input_columns=['prompt'],
output_column='answer'
),
infer_cfg=dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{prompt}'),
]
)
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(
type=GenInferencer,
max_out_len=8192
),
),
eval_cfg=dict(
evaluator=dict(
type=LiveMathBenchEvaluator,
model_name='',
url=[],
use_extract_model=False,
extract_url=[],
extract_model_name='',
k=[4, 8, 16],
replication=3,
thresholds=[0.0, 0.25, 0.5, 0.75, 1.0]
)
)
)
livemathbench_datasets = [livemathbench_dataset]
Loading

0 comments on commit 8e8d4f1

Please sign in to comment.