Skip to content

Commit

Permalink
Merge branch 'main' into feat/2048-dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
andreaskoepf authored Mar 5, 2025
2 parents cd02e7d + d1e505a commit d9e2d66
Show file tree
Hide file tree
Showing 133 changed files with 1,768 additions and 993 deletions.
17 changes: 15 additions & 2 deletions eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,23 @@ Options:
- `--size`: Default dataset size (default: 100)
- `--seed`: Default dataset seed (default: 42)
- `--include-params`: Include all configuration parameters (default: False)
- `--category`: Only include datasets from this category (default: None)

### Running Evaluations
#### Generating Config for a Specific Category

To generate a configuration file containing only datasets from a specific category:

```bash
python generate_config.py --category algorithmic --output algorithmic_datasets.yaml --model "anthropic/claude-3.5-sonnet"
```

To run evaluations:
This will create a configuration file that includes only datasets in the "algorithmic" category. This is useful when you want to focus your evaluation on a specific type of reasoning tasks.

Example categories include: math, arithmetic, reasoning, algorithmic, etc. The category is automatically extracted from the dataset's module name (e.g., from `reasoning_gym.math.dataset_name`, it extracts "math").

You can see all available categories by running the script without the `--category` option, as it will print all categories at the end of execution.

### Running Evaluations

```bash
python eval.py --config configs/your_config.yaml
Expand Down
2 changes: 1 addition & 1 deletion eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ async def main_async():
print("Warning: OPENROUTER_API_KEY environment variable is not set")
print("Please set it using: export OPENROUTER_API_KEY=your-api-key")
print("Or provide it directly with --api-key")
print("Continuing without API key...")
return 1

# Load configuration
config_path = args.config
Expand Down
30 changes: 25 additions & 5 deletions eval/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
--size SIZE Default dataset size (default: 100)
--seed SEED Default dataset seed (default: 42)
--include-params Include all configuration parameters (default: False)
--category CATEGORY Only include datasets from this category (default: None)
"""

import argparse
Expand All @@ -35,14 +36,27 @@ def extract_category(module_name):
return "other"


def generate_config(model, provider, size, seed, include_params):
"""Generate configuration with all registered datasets."""
def generate_config(model, provider, size, seed, include_params, category=None):
"""Generate configuration with all registered datasets.
Args:
model: Model name
provider: Provider name
size: Default dataset size
seed: Default dataset seed
include_params: Whether to include all configuration parameters
category: If specified, only include datasets from this category
"""
# Group datasets by category
categories = defaultdict(list)

for dataset_name, (dataset_cls, config_cls) in DATASETS.items():
# Extract category from module name
category = extract_category(dataset_cls.__module__)
dataset_category = extract_category(dataset_cls.__module__)

# Skip if a specific category was requested and this doesn't match
if category and dataset_category != category:
continue

# Create dataset entry
dataset_entry = {"dataset": dataset_name}
Expand All @@ -62,7 +76,7 @@ def generate_config(model, provider, size, seed, include_params):
dataset_entry["params"] = params

# Add to appropriate category
categories[category].append(dataset_entry)
categories[dataset_category].append(dataset_entry)

# Create configuration structure
config = {
Expand Down Expand Up @@ -90,12 +104,18 @@ def main():
parser.add_argument("--size", type=int, default=100, help="Default dataset size")
parser.add_argument("--seed", type=int, default=42, help="Default dataset seed")
parser.add_argument("--include-params", action="store_true", help="Include all configuration parameters")
parser.add_argument("--category", help="Only include datasets from this category")

args = parser.parse_args()

# Generate configuration
config = generate_config(
model=args.model, provider=args.provider, size=args.size, seed=args.seed, include_params=args.include_params
model=args.model,
provider=args.provider,
size=args.size,
seed=args.seed,
include_params=args.include_params,
category=args.category,
)

# Write to file
Expand Down
1 change: 1 addition & 0 deletions notebooks/codeio/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
raw_files/
output/
Loading

0 comments on commit d9e2d66

Please sign in to comment.