Skip to content

Commit

Permalink
Minor question template & score_answer improvements (#261)
Browse files Browse the repository at this point in the history
* math prompt improvements
* ignore brackets in complex_arithmetic results
* improve additional instruction in prompt of polynomial_equations
* more strict tests for score_answer in polynomial_equations
* simplify special reward handling
* fix test_intermediate_integration
* fix sokoban dataset
* add common dataset score_answer consistency test
  • Loading branch information
andreaskoepf authored Mar 4, 2025
1 parent 061282e commit 5d7fbac
Show file tree
Hide file tree
Showing 106 changed files with 394 additions and 498 deletions.
4 changes: 4 additions & 0 deletions reasoning_gym/algebra/complex_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def parse_string_to_complex(answer: str) -> complex:
# Normalize the answer string by removing spaces and converting to lowercase
answer = answer.replace(" ", "").lower()

# remove brackets
while len(answer) > 1 and answer[0] == "(" and answer[-1] == ")":
answer = answer[1:-1]

# Convert mathematical notation 'i' to Python's 'j' for complex numbers
answer = answer.replace("i", "j")

Expand Down
15 changes: 6 additions & 9 deletions reasoning_gym/algebra/intermediate_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ def __init__(self, config: IntermediateIntegrationConfig):
"Evaluate the indefinite integral: ∫ {integrand} dx",
]
self.added_instruction = """
In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
3. Use `exp(x)` or `E**(x)` for the exponential function (i.e. use capital E for Euler's number).
"""

def _get_outer_constant(self, rng: random.Random) -> int:
Expand Down Expand Up @@ -245,7 +246,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
"""Determine if the solution provided solves the problem"""
reward = 0.0
metadata = entry["metadata"]
if answer is not None:
if isinstance(answer, str):
try:
var = metadata["variable"]
x = sympy.Symbol(var)
Expand All @@ -258,12 +259,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Check mathematical equivalence through simplification
if sympy.simplify(derivative - integrand) == 0:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
22 changes: 13 additions & 9 deletions reasoning_gym/algebra/polynomial_equations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ class PolynomialEquationsConfig:
seed: Optional[int] = None
size: int = 500
# reward function hyperparameters
penalty_missing_factor = 0.1
penalty_extra_factor = 0.05
penalty_missing_factor = 0.5
penalty_extra_factor = 0.5
exp_distance_factor = -10.0

def validate(self) -> None:
"""Validate configuration parameters."""
Expand Down Expand Up @@ -62,12 +63,15 @@ def __init__(self, config: PolynomialEquationsConfig):
"Solve the polynomial equation for real {variable}:\n{polynomial_expanded} = 0",
]
self.added_instruction = """
In solving the equations, please abide by the following instruction:
## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc.
## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005".
## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "".
## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers.
## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p.
In solving equations, please follow these instructions:
1. Provide all answers as comma-separated decimal values. For example: "-0.3773, 0.4005"
2. For solutions that can be expressed in exact form (like "u = 2 + sqrt(4560)/172" and "u = 2 - sqrt(4560)/172"), convert them to decimal form in your final answer.
3. If there are no real values that satisfy the equation, report your answer as an empty string: ""
4. Format your answer based on the number of solutions:
- For 1 solution: a single decimal number
- For 2 solutions: two comma-separated decimal numbers
- For 3 or more solutions: all values as comma-separated decimal numbers
5. Round all decimal values to 4 decimal places (rounding down when the 5th decimal place is 5 or greater).
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -238,7 +242,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Remove matched oracle solution
oracle_solutions.pop(matched_distance_index)
# Exponential decay reward
total_reward += math.exp(-matched_distance)
total_reward += math.exp(matched_distance * self.config.exp_distance_factor)
else:
# Extra predicted solution
extra_solutions += 1
Expand Down
18 changes: 6 additions & 12 deletions reasoning_gym/algebra/polynomial_multiplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ def __init__(self, config: PolynomialMultiplicationConfig):
"Calculate the following: {polynomial_expr}",
]
self.added_instruction = """
In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers.
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -106,10 +106,9 @@ def __getitem__(self, idx: int) -> dict:

return {
"question": question,
"answer": product,
"answer": str(product),
"metadata": {
"polynomial_expr": str(polynomial_expr),
"result": str(product),
"variables": list(product.free_symbols),
},
}
Expand Down Expand Up @@ -147,21 +146,16 @@ def _generate_polynomial(self, rng: random.Random, monomials: Optional[list]):

def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 0.0
metadata = entry["metadata"]
if answer is not None:
try:
predicted_poly = sp.parse_expr(answer)
target_poly = sp.parse_expr(metadata["result"])
target_poly = sp.parse_expr(entry["answer"])

# Check if the difference simplifies to zero (i.e. they are equivalent).
if predicted_poly == target_poly:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except Exception:
reward = 0.01
reward = 0.0
return reward


Expand Down
12 changes: 4 additions & 8 deletions reasoning_gym/algebra/simple_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def __init__(self, config: SimpleIntegrationConfig):
"Evaluate the indefinite integral: ∫ {integrand} dx",
]
self.added_instruction = """
In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -103,12 +103,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Check mathematical equivalence through simplification
if sympy.simplify(derivative - integrand) == 0:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
7 changes: 2 additions & 5 deletions reasoning_gym/algorithmic/ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,9 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if answer == None:
return 0.0
if answer != entry["answer"]:
return 0.01
else:
if answer == entry["answer"]:
return 1.0 # Yay
return 0.0


# Register the dataset
Expand Down
6 changes: 3 additions & 3 deletions reasoning_gym/algorithmic/binary_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# check if answer is python list of lists
answer = self._matrix_to_str(eval(answer))
if answer == oracle_answer:
return 0.5
except Exception as e:
return 0.01
return 0.1
except Exception:
return 0.0
return 0.0

def __getitem__(self, idx: int) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/cryptarithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
Returns:
float: The computed score between 0.0 and 1.0.
"""
if not answer:
if not isinstance(answer, str):
return 0.0

correct_mapping = {}
Expand Down
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/game_of_life.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
ans_arr = json.loads(answer)
correct_arr = json.loads(entry["answer"])
except Exception:
return 0.01
return 0.0

total_cells = 0
correct_cells = 0
Expand Down
9 changes: 5 additions & 4 deletions reasoning_gym/algorithmic/graph_color.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,13 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
try:
danswer = json.loads(answer)
solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer)
if not solved:
return 0.01 # json was parsable but solution incorrect
else:
if solved:
return 1.0 # Yay
else:
return 0.01 # json parsable
except Exception:
return 0.0
pass
return 0.0


register_dataset("graph_color", GraphColorDataset, GraphColorConfig)
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/group_anagrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
if answer_str == oracle_str:
reward = 1.0
else:
reward = 0.01
reward = 0.01 # json parsable
except Exception:
reward = 0.0
return reward
Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/jugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,11 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
danswer = json.loads(answer)
valid, _ = verify_solution(entry["metadata"]["puzzle"], danswer)
if not valid:
return 0.01
return 0.01 # json parsable
else:
return 1.0 # Yay
except Exception as e:
return 0.01
return 0.0


register_dataset("jugs", JugsDataset, JugsConfig)
18 changes: 8 additions & 10 deletions reasoning_gym/algorithmic/letter_jumble.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def partial(self, expected_answer, model_answer):

# Each word in the expected answer is worth an equal fraction of 1.0
total_words = len(expected_words)
score_per_word = 1.0 / total_words if total_words else 0
score_per_word = 1.0 / total_words if total_words > 0 else 0

# Calculate scores word by word
scores = []
Expand All @@ -142,18 +142,16 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if not answer:
if not isinstance(answer, str):
return 0.0

oracle_answer = entry["answer"].strip().lower()
if answer:
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score
return 0.01
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score


register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
2 changes: 0 additions & 2 deletions reasoning_gym/algorithmic/manipulate_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,6 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:

if oracle_answer in answer:
return len(oracle_answer) / len(answer)
else:
return 0.01

return 0.0

Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/palindrome_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
- Correct answer (palindrome with only correct letters in the correct quantities) gives 1.0
- An answer that is a palindrome, but not with the same letters as provided, gives 0.05
- An answer that is a string, but not a palindrome gives 0.02
- An empty string gives 0.01.
- An empty string gives 0.0
- None gives 0.0.
"""
if answer is None or not isinstance(answer, str):
return 0.0 # No answer given

if answer == "":
return 0.01
return 0.0

metadata = entry["metadata"]
answer = answer.strip().lower()
Expand Down
3 changes: 1 addition & 2 deletions reasoning_gym/algorithmic/palindrome_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
oracle = self.to_set_of_tuples(entry["metadata"]["solution"])
if answer == oracle:
return 1.0
return 0.01
except Exception:
return 0.0
pass
return 0.0

def _generate_palindrome_letters(self, rng: Random, length: int) -> list[str]:
Expand Down
4 changes: 1 addition & 3 deletions reasoning_gym/algorithmic/pool_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _average_pool(self, matrix: np.ndarray, pool_size: int) -> np.ndarray:
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
"""Score the answer based on the metadata"""

if not answer:
if not isinstance(answer, str):
return 0.0

reward = 0.0
Expand All @@ -91,8 +91,6 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 1.0
elif oracle_answer.shape == answer.shape:
reward = 0.1
else:
reward = 0.01
except Exception:
pass
return reward
Expand Down
12 changes: 5 additions & 7 deletions reasoning_gym/algorithmic/ransom_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,12 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if answer == None:
return 0.0
if isinstance(answer, str):
s_answer = answer.strip()
if s_answer == str(entry["answer"]):
return 1.0

s_answer = answer.strip()
if not s_answer == str(entry["answer"]):
return 0.01
else:
return 1.0
return 0.0


register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig)
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/sentence_reordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
else:
reward = 0.05
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/spell_backward.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ def __getitem__(self, idx: int) -> dict:
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 0.0
expected_answer = entry["answer"]
if answer is not None:
if isinstance(answer, str):
try:
if expected_answer.lower() == answer.lower():
reward = 1.0
else:
reward = 0.05
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
Loading

0 comments on commit 5d7fbac

Please sign in to comment.