Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor question template & score_answer improvements #261

Merged
merged 11 commits into from
Mar 4, 2025
4 changes: 4 additions & 0 deletions reasoning_gym/algebra/complex_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ def parse_string_to_complex(answer: str) -> complex:
# Normalize the answer string by removing spaces and converting to lowercase
answer = answer.replace(" ", "").lower()

# remove brackets
while len(answer) > 1 and answer[0] == "(" and answer[-1] == ")":
answer = answer[1:-1]

# Convert mathematical notation 'i' to Python's 'j' for complex numbers
answer = answer.replace("i", "j")

Expand Down
15 changes: 6 additions & 9 deletions reasoning_gym/algebra/intermediate_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,10 @@ def __init__(self, config: IntermediateIntegrationConfig):
"Evaluate the indefinite integral: ∫ {integrand} dx",
]
self.added_instruction = """
In addition, when doing calculation, use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
3. Use `exp(x)` or `E**(x)` for the exponential function (i.e. use capital E for Euler's number).
"""

def _get_outer_constant(self, rng: random.Random) -> int:
Expand Down Expand Up @@ -245,7 +246,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
"""Determine if the solution provided solves the problem"""
reward = 0.0
metadata = entry["metadata"]
if answer is not None:
if isinstance(answer, str):
try:
var = metadata["variable"]
x = sympy.Symbol(var)
Expand All @@ -258,12 +259,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Check mathematical equivalence through simplification
if sympy.simplify(derivative - integrand) == 0:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
22 changes: 13 additions & 9 deletions reasoning_gym/algebra/polynomial_equations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ class PolynomialEquationsConfig:
seed: Optional[int] = None
size: int = 500
# reward function hyperparameters
penalty_missing_factor = 0.1
penalty_extra_factor = 0.05
penalty_missing_factor = 0.5
penalty_extra_factor = 0.5
exp_distance_factor = -10.0

def validate(self) -> None:
"""Validate configuration parameters."""
Expand Down Expand Up @@ -62,12 +63,15 @@ def __init__(self, config: PolynomialEquationsConfig):
"Solve the polynomial equation for real {variable}:\n{polynomial_expanded} = 0",
]
self.added_instruction = """
In solving the equations, please abide by the following instruction:
## 1. All answers should be comma-separated. For example "-0.3773, 0.4005" etc.
## 2. In cases where your answer is b = 2 + sqrt(4560) / 172 and b = 2 - sqrt(4560) / 172. Since b can be 2 numbers, resolve your answer like this instead, "-0.3773, 0.4005".
## 3. If there are no real values of i that satisfy the equation, report your answer as empty string, "".
## 4. If there are 2 answers, resolve the answers as comma-separated floats of 2 numbers, if 3 answers, make it comma-separated floats of 3 numbers.
## 5. Resolve all numbers as floats in the string of comma-separated numbers. Round the floats higher than 4 decimal place(d.p) down to 4 d.p.
In solving equations, please follow these instructions:
1. Provide all answers as comma-separated decimal values. For example: "-0.3773, 0.4005"
2. For solutions that can be expressed in exact form (like "u = 2 + sqrt(4560)/172" and "u = 2 - sqrt(4560)/172"), convert them to decimal form in your final answer.
3. If there are no real values that satisfy the equation, report your answer as an empty string: ""
4. Format your answer based on the number of solutions:
- For 1 solution: a single decimal number
- For 2 solutions: two comma-separated decimal numbers
- For 3 or more solutions: all values as comma-separated decimal numbers
5. Round all decimal values to 4 decimal places (rounding down when the 5th decimal place is 5 or greater).
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -238,7 +242,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Remove matched oracle solution
oracle_solutions.pop(matched_distance_index)
# Exponential decay reward
total_reward += math.exp(-matched_distance)
total_reward += math.exp(matched_distance * self.config.exp_distance_factor)
else:
# Extra predicted solution
extra_solutions += 1
Expand Down
18 changes: 6 additions & 12 deletions reasoning_gym/algebra/polynomial_multiplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ def __init__(self, config: PolynomialMultiplicationConfig):
"Calculate the following: {polynomial_expr}",
]
self.added_instruction = """
In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps and even in reporting answers.
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -106,10 +106,9 @@ def __getitem__(self, idx: int) -> dict:

return {
"question": question,
"answer": product,
"answer": str(product),
"metadata": {
"polynomial_expr": str(polynomial_expr),
"result": str(product),
"variables": list(product.free_symbols),
},
}
Expand Down Expand Up @@ -147,21 +146,16 @@ def _generate_polynomial(self, rng: random.Random, monomials: Optional[list]):

def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 0.0
metadata = entry["metadata"]
if answer is not None:
try:
predicted_poly = sp.parse_expr(answer)
target_poly = sp.parse_expr(metadata["result"])
target_poly = sp.parse_expr(entry["answer"])

# Check if the difference simplifies to zero (i.e. they are equivalent).
if predicted_poly == target_poly:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except Exception:
reward = 0.01
reward = 0.0
return reward


Expand Down
12 changes: 4 additions & 8 deletions reasoning_gym/algebra/simple_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def __init__(self, config: SimpleIntegrationConfig):
"Evaluate the indefinite integral: ∫ {integrand} dx",
]
self.added_instruction = """
In addition, When doing calculation, Use the following instructions together with your mathematical ingenuity to solve the integral problems
## 1. Use ** instead ^ to represent powers. For example 7*X**2 instead of 7*X^2.
## 2. Always use * when doing all sorts of multiplcation in your reasoning steps. For example Use [-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C] instead of [-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C].
When performing calculations, please follow these guidelines:
1. Use ** instead of ^ to represent exponents. For example, write 7*X**2 instead of 7*X^2.
2. Always include the * symbol for all multiplication operations in your reasoning steps. For example, write `-3*X**3*sin(X) - 9*X**2*cos(X) + 18*X*sin(X) + 18*cos(X) + C` instead of `-3x3sin(x) - 9x2cos(x) + 18xsin(x) + 18cos(x) + C`.
"""
super().__init__(config=config, seed=config.seed, size=config.size)

Expand Down Expand Up @@ -103,12 +103,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# Check mathematical equivalence through simplification
if sympy.simplify(derivative - integrand) == 0:
reward = 1.0
elif answer.strip():
reward = 0.05
else:
reward = 0.01
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
7 changes: 2 additions & 5 deletions reasoning_gym/algorithmic/ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,9 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if answer == None:
return 0.0
if answer != entry["answer"]:
return 0.01
else:
if answer == entry["answer"]:
return 1.0 # Yay
return 0.0


# Register the dataset
Expand Down
6 changes: 3 additions & 3 deletions reasoning_gym/algorithmic/binary_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
# check if answer is python list of lists
answer = self._matrix_to_str(eval(answer))
if answer == oracle_answer:
return 0.5
except Exception as e:
return 0.01
return 0.1
except Exception:
return 0.0
return 0.0

def __getitem__(self, idx: int) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/cryptarithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
Returns:
float: The computed score between 0.0 and 1.0.
"""
if not answer:
if not isinstance(answer, str):
return 0.0

correct_mapping = {}
Expand Down
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/game_of_life.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
ans_arr = json.loads(answer)
correct_arr = json.loads(entry["answer"])
except Exception:
return 0.01
return 0.0

total_cells = 0
correct_cells = 0
Expand Down
9 changes: 5 additions & 4 deletions reasoning_gym/algorithmic/graph_color.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,12 +228,13 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
try:
danswer = json.loads(answer)
solved, failure = verify_graph_coloring_solution(entry["metadata"]["puzzle"], danswer)
if not solved:
return 0.01 # json was parsable but solution incorrect
else:
if solved:
return 1.0 # Yay
else:
return 0.01 # json parsable
except Exception:
return 0.0
pass
return 0.0


register_dataset("graph_color", GraphColorDataset, GraphColorConfig)
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/group_anagrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
if answer_str == oracle_str:
reward = 1.0
else:
reward = 0.01
reward = 0.01 # json parsable
except Exception:
reward = 0.0
return reward
Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/jugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,11 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
danswer = json.loads(answer)
valid, _ = verify_solution(entry["metadata"]["puzzle"], danswer)
if not valid:
return 0.01
return 0.01 # json parsable
else:
return 1.0 # Yay
except Exception as e:
return 0.01
return 0.0


register_dataset("jugs", JugsDataset, JugsConfig)
18 changes: 8 additions & 10 deletions reasoning_gym/algorithmic/letter_jumble.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def partial(self, expected_answer, model_answer):

# Each word in the expected answer is worth an equal fraction of 1.0
total_words = len(expected_words)
score_per_word = 1.0 / total_words if total_words else 0
score_per_word = 1.0 / total_words if total_words > 0 else 0

# Calculate scores word by word
scores = []
Expand All @@ -142,18 +142,16 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if not answer:
if not isinstance(answer, str):
return 0.0

oracle_answer = entry["answer"].strip().lower()
if answer:
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score
return 0.01
answer = answer.strip().lower()
if answer == oracle_answer:
return 1.0 # Perfect score!
else:
partial_score = self.partial(oracle_answer, answer)
return partial_score


register_dataset("letter_jumble", LetterJumbleDataset, LetterJumbleConfig)
2 changes: 0 additions & 2 deletions reasoning_gym/algorithmic/manipulate_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,6 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:

if oracle_answer in answer:
return len(oracle_answer) / len(answer)
else:
return 0.01

return 0.0

Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/palindrome_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
- Correct answer (palindrome with only correct letters in the correct quantities) gives 1.0
- An answer that is a palindrome, but not with the same letters as provided, gives 0.05
- An answer that is a string, but not a palindrome gives 0.02
- An empty string gives 0.01.
- An empty string gives 0.0
- None gives 0.0.
"""
if answer is None or not isinstance(answer, str):
return 0.0 # No answer given

if answer == "":
return 0.01
return 0.0

metadata = entry["metadata"]
answer = answer.strip().lower()
Expand Down
3 changes: 1 addition & 2 deletions reasoning_gym/algorithmic/palindrome_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,8 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
oracle = self.to_set_of_tuples(entry["metadata"]["solution"])
if answer == oracle:
return 1.0
return 0.01
except Exception:
return 0.0
pass
return 0.0

def _generate_palindrome_letters(self, rng: Random, length: int) -> list[str]:
Expand Down
4 changes: 1 addition & 3 deletions reasoning_gym/algorithmic/pool_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _average_pool(self, matrix: np.ndarray, pool_size: int) -> np.ndarray:
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
"""Score the answer based on the metadata"""

if not answer:
if not isinstance(answer, str):
return 0.0

reward = 0.0
Expand All @@ -91,8 +91,6 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 1.0
elif oracle_answer.shape == answer.shape:
reward = 0.1
else:
reward = 0.01
except Exception:
pass
return reward
Expand Down
12 changes: 5 additions & 7 deletions reasoning_gym/algorithmic/ransom_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,12 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
float: The computed score between 0.0 and 1.0.
"""

if answer == None:
return 0.0
if isinstance(answer, str):
s_answer = answer.strip()
if s_answer == str(entry["answer"]):
return 1.0

s_answer = answer.strip()
if not s_answer == str(entry["answer"]):
return 0.01
else:
return 1.0
return 0.0


register_dataset("ransom_note", RansomNoteDataset, RansomNoteConfig)
2 changes: 1 addition & 1 deletion reasoning_gym/algorithmic/sentence_reordering.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
else:
reward = 0.05
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
4 changes: 2 additions & 2 deletions reasoning_gym/algorithmic/spell_backward.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ def __getitem__(self, idx: int) -> dict:
def score_answer(self, answer: Optional[str], entry: dict[str, Any]) -> float:
reward = 0.0
expected_answer = entry["answer"]
if answer is not None:
if isinstance(answer, str):
try:
if expected_answer.lower() == answer.lower():
reward = 1.0
else:
reward = 0.05
except:
reward = 0.01
reward = 0.0
return reward


Expand Down
Loading