Skip to content

Commit

Permalink
added linting checks
Browse files Browse the repository at this point in the history
  • Loading branch information
joesharratt1229 committed Jan 31, 2025
1 parent 4fea3c3 commit 37375f0
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 11 deletions.
6 changes: 3 additions & 3 deletions reasoning_gym/games/countdown.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from random import Random
from typing import List, Optional, Tuple, Dict, Any
from typing import Any, Dict, List, Optional, Tuple

import sympy
from sympy import Symbol, symbols
Expand Down Expand Up @@ -158,7 +158,7 @@ def _generate_expression(self, rng: Random) -> Tuple[str, List[int], int]:
continue

raise ValueError(f"Failed to generate valid expression after {max_attempts} attempts")

def score_answer(self, answer: Optional[str], metadata: Dict[str, Any]) -> float:
"""Determine if the solution provided solves the problem"""
reward = 0.0
Expand All @@ -168,7 +168,7 @@ def score_answer(self, answer: Optional[str], metadata: Dict[str, Any]) -> float
solved = user_answer == metadata["target"]
if solved:
reward = 1.0
elif (len(answer.strip()) > 0): # encourage partial solutions
elif len(answer.strip()) > 0: # encourage partial solutions
reward = 0.05
else:
reward = 0.01
Expand Down
18 changes: 10 additions & 8 deletions tests/test_countdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,16 @@ def test_countdown_game_items():

# Verify expression evaluates correctly
expr = item["metadata"]["expression"]

#check score
assert dataset.score_answer(answer=expr, metadata=item["metadata"]) == 1.0 #correct answer
assert dataset.score_answer(answer="45+2", metadata=item["metadata"]) == 0.05 #wrong answer but an attempt
assert dataset.score_answer(answer="a wrong solution", metadata=item["metadata"]) == 0.01 #wrong answer but incorrectly formatted
assert dataset.score_answer(answer="", metadata=item["metadata"]) == 0.01 #wrong answer but empty string
assert dataset.score_answer(answer=None, metadata=item["metadata"]) == 0.0 #no answer


# check score
assert dataset.score_answer(answer=expr, metadata=item["metadata"]) == 1.0 # correct answer
assert dataset.score_answer(answer="45+2", metadata=item["metadata"]) == 0.05 # wrong answer but an attempt
assert (
dataset.score_answer(answer="a wrong solution", metadata=item["metadata"]) == 0.01
) # wrong answer but incorrectly formatted
assert dataset.score_answer(answer="", metadata=item["metadata"]) == 0.01 # wrong answer but empty string
assert dataset.score_answer(answer=None, metadata=item["metadata"]) == 0.0 # no answer

try:
result = eval(expr) # Safe here since we control expression generation
assert result == item["metadata"]["target"]
Expand Down

0 comments on commit 37375f0

Please sign in to comment.