From 057b9f203481356d7f5386a9dcec9a0dc560b464 Mon Sep 17 00:00:00 2001 From: Andreas Koepf Date: Sun, 2 Feb 2025 22:18:54 +0100 Subject: [PATCH] auto-load simple/intermediate integration tasks, stable order for n_queens (set was not stable) --- GALLERY.md | 113 ++++++++++++++++++++++++++++-- reasoning_gym/algebra/__init__.py | 10 ++- reasoning_gym/games/n_queens.py | 13 ++-- tests/test_n_queens.py | 6 +- 4 files changed, 127 insertions(+), 15 deletions(-) diff --git a/GALLERY.md b/GALLERY.md index e134f23c..be5a3a65 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -17,6 +17,7 @@ This gallery shows examples from all available datasets using their default conf - [fraction_simplification](#fraction_simplification) - [game_of_life](#game_of_life) - [gcd](#gcd) +- [intermediate_integration](#intermediate_integration) - [lcm](#lcm) - [leg_counting](#leg_counting) - [letter_counting](#letter_counting) @@ -35,6 +36,7 @@ This gallery shows examples from all available datasets using their default conf - [sentence_reordering](#sentence_reordering) - [simple_equations](#simple_equations) - [simple_geometry](#simple_geometry) +- [simple_integration](#simple_integration) - [spell_backward](#spell_backward) - [sudoku](#sudoku) - [syllogism](#syllogism) @@ -746,6 +748,48 @@ Metadata: {'numbers': [297, 30], 'result': 3} ```` +### intermediate_integration +Generates intermediate integration problem - either + by substitution or by parts + +Default configuration: +```python +problem_types = ('substitution', 'by_parts') +substitution_types = ('linear', 'trigonometric', 'exponential', 'radical') +by_parts_types = ('polynomial_exp_trig', 'log_inverse_trig', 'cyclic', 'repeated_parts') +seed = 42 +size = 500 +linear_lower_bound = 1 +linear_upper_bound = 10 +min_linear_degree = 2 +max_linear_degree = 4 +outer_constant_min = 1 +outer_constant_max = 3 +min_poly_degree = 1 +max_poly_degree = 3 +symbols = ('x', 'X') +operators = ('+', '-') +``` + +Example tasks: +```` +Example 1: +Question: Find the indefinite integral: ∫ -3*exp(3*x + 9) dx +Answer: -exp(3*x + 9) + C +Metadata: {'integrand': '-3*exp(3*x + 9)', 'problem_type': 'substitution', 'variable': 'x', 'type': 'exponential', 'expected_answer_expression': -exp(3*x + 9)} + +Example 2: +Question: Evaluate the indefinite integral: ∫ -6*sin(2*X + 10)*cos(2*X + 10)**4 dx +Answer: 3*cos(2*X + 10)**5/5 + C +Metadata: {'integrand': '-6*sin(2*X + 10)*cos(2*X + 10)**4', 'problem_type': 'substitution', 'variable': 'X', 'type': 'trigonometric', 'expected_answer_expression': 3*cos(2*X + 10)**5/5} + +Example 3: +Question: Find the indefinite integral: ∫ 2*asin(x) dx +Answer: 2*Integral(asin(x), x) + C +Metadata: {'integrand': '2*asin(x)', 'problem_type': 'by_parts', 'variable': 'x', 'type': 'log_inverse_trig', 'expected_answer_expression': 2*Integral(asin(x), x)} + +```` + ### lcm Generates Least Common Multiple (LCM) tasks @@ -1030,8 +1074,15 @@ No two queens attack each other if they are not in the same row, column, or diag Place a queen by replacing an underscore (_) with a Q. -Answer: {'_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _'} -Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 1} +Answer: _ _ _ _ _ _ Q _ +_ Q _ _ _ _ _ _ +_ _ _ Q _ _ _ _ +Q _ _ _ _ _ _ _ +_ _ _ _ _ _ _ Q +_ _ _ _ Q _ _ _ +_ _ Q _ _ _ _ _ +_ _ _ _ _ Q _ _ +Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solutions': [[['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 1, 'valid_answers': ['_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _']} Example 2: Question: Solve this N Queens puzzle: @@ -1050,8 +1101,15 @@ No two queens attack each other if they are not in the same row, column, or diag Place a queen by replacing an underscore (_) with a Q. -Answer: {'_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ Q _ _\n_ _ _ _ _ _ _ Q\n_ _ Q _ _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _'} -Metadata: {'puzzle': [['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']], 'solution': [[['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']]], 'num_removed': 3} +Answer: _ Q _ _ _ _ _ _ +_ _ _ Q _ _ _ _ +_ _ _ _ _ Q _ _ +_ _ _ _ _ _ _ Q +_ _ Q _ _ _ _ _ +Q _ _ _ _ _ _ _ +_ _ _ _ _ _ Q _ +_ _ _ _ Q _ _ _ +Metadata: {'puzzle': [['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']], 'solutions': [[['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']]], 'num_removed': 3, 'valid_answers': ['_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ Q _ _\n_ _ _ _ _ _ _ Q\n_ _ Q _ _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _']} Example 3: Question: Solve this N Queens puzzle: @@ -1070,8 +1128,15 @@ No two queens attack each other if they are not in the same row, column, or diag Place a queen by replacing an underscore (_) with a Q. -Answer: {'_ _ _ _ _ _ _ Q\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ Q _ _ _\n_ Q _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\nQ _ _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _'} -Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 5} +Answer: _ _ _ _ Q _ _ _ +_ Q _ _ _ _ _ _ +_ _ _ _ _ _ _ Q +Q _ _ _ _ _ _ _ +_ _ _ Q _ _ _ _ +_ _ _ _ _ _ Q _ +_ _ Q _ _ _ _ _ +_ _ _ _ _ Q _ _ +Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solutions': [[['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 5, 'valid_answers': ['_ _ _ _ Q _ _ _\n_ Q _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\nQ _ _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ _ Q\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _']} ```` @@ -1512,6 +1577,42 @@ Metadata: {'n_sides': 6, 'known_angles': [143.0, 148.0, 39.0, 55.0, 107.0], 'sum ```` +### simple_integration +Generates simple integration problems with one variable + +Default configuration: +```python +min_terms = 2 +max_terms = 5 +min_degree = 1 +max_degree = 10 +min_bounds = 1 +max_bounds = 10 +operators = ('+', '-') +symbols = ('x', 'X') +seed = 42 +size = 500 +``` + +Example tasks: +```` +Example 1: +Question: Find the indefinite integral: ∫ 70*x**6 + 12*x**2/5 dx +Answer: 10*x**7 + 4*x**3/5 + C +Metadata: {'integrand': '70*x**6 + 12*x**2/5', 'variable': 'x', 'expected_answer_expression': 10*x**7 + 4*x**3/5} + +Example 2: +Question: Find the indefinite integral: ∫ 49*x**6/10 + 48*x**5 - 4*x - 10/9 dx +Answer: 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9 + C +Metadata: {'integrand': '49*x**6/10 + 48*x**5 - 4*x - 10/9', 'variable': 'x', 'expected_answer_expression': 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9} + +Example 3: +Question: Find the indefinite integral: ∫ -28*X**3 + 8*X dx +Answer: -7*X**4 + 4*X**2 + C +Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expression': -7*X**4 + 4*X**2} + +```` + ### spell_backward Generates tasks to spell words backward diff --git a/reasoning_gym/algebra/__init__.py b/reasoning_gym/algebra/__init__.py index 69d4b91e..fc7a867a 100644 --- a/reasoning_gym/algebra/__init__.py +++ b/reasoning_gym/algebra/__init__.py @@ -1,9 +1,15 @@ +from .intermediate_integration import IntermediateIntegrationConfig, IntermediateIntegrationDataset from .polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsDataset from .simple_equations import SimpleEquationsConfig, SimpleEquationsDataset +from .simple_integration import SimpleIntegrationConfig, SimpleIntegrationDataset __all__ = [ - "SimpleEquationsDataset", - "SimpleEquationsConfig", + "IntermediateIntegrationConfig", + "IntermediateIntegrationDataset", "PolynomialEquationsConfig", "PolynomialEquationsDataset", + "SimpleEquationsDataset", + "SimpleEquationsConfig", + "SimpleIntegrationConfig", + "SimpleIntegrationDataset", ] diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py index 0af85a0d..1fef6c62 100644 --- a/reasoning_gym/games/n_queens.py +++ b/reasoning_gym/games/n_queens.py @@ -136,16 +136,21 @@ def __getitem__(self, idx: int) -> dict: # Filter all solutions that are intractable from the puzzle's starting state valid_solutions = [board for board in self._solutions if self._is_tractable_solution(puzzle, board)] - valid_solutions_str = {self._board_to_string(board) for board in valid_solutions} + valid_solutions_str = sorted({self._board_to_string(board) for board in valid_solutions}) return { "question": QUESTION_TEMPLATE.format(puzzle=puzzle_str, n=len(puzzle), num_removed=num_removed), - "answer": valid_solutions_str, - "metadata": {"puzzle": puzzle, "solution": valid_solutions, "num_removed": num_removed}, + "answer": rng.choice(valid_solutions_str), # choose arbitary answer (e.g. for SFT) + "metadata": { + "puzzle": puzzle, + "solutions": valid_solutions, + "num_removed": num_removed, + "valid_answers": valid_solutions_str, + }, } def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float: - valid_solutions = entry["answer"] + valid_solutions = entry["metadata"]["valid_answers"] reward = 0.0 if answer is not None: if answer in valid_solutions: diff --git a/tests/test_n_queens.py b/tests/test_n_queens.py index 946685be..16911220 100644 --- a/tests/test_n_queens.py +++ b/tests/test_n_queens.py @@ -110,15 +110,15 @@ def test_nqueens_score_answer(): # Test a few items for i in range(len(dataset)): item = dataset[i] - + # Test correct answer gets score 1.0 valid_answer = item["metadata"]["valid_answers"][0] assert dataset.score_answer(valid_answer, item) == 1.0 - + # Test invalid answer gets score 0.01 invalid_answer = "_ _ _ _\n_ _ _ _\n_ _ _ _\n_ _ _ _" assert dataset.score_answer(invalid_answer, item) == 0.01 - + # Test None answer gets score 0.0 assert dataset.score_answer(None, item) == 0.0