auto-load simple/intermediate integration tasks, stable order for n_q…

…ueens (set was not stable)
open-thought · Feb 2, 2025 · 057b9f2 · 057b9f2
1 parent 7517738
commit 057b9f2
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 15 deletions.
diff --git a/GALLERY.md b/GALLERY.md
@@ -17,6 +17,7 @@ This gallery shows examples from all available datasets using their default conf
 - [fraction_simplification](#fraction_simplification)
 - [game_of_life](#game_of_life)
 - [gcd](#gcd)
+- [intermediate_integration](#intermediate_integration)
 - [lcm](#lcm)
 - [leg_counting](#leg_counting)
 - [letter_counting](#letter_counting)
@@ -35,6 +36,7 @@ This gallery shows examples from all available datasets using their default conf
 - [sentence_reordering](#sentence_reordering)
 - [simple_equations](#simple_equations)
 - [simple_geometry](#simple_geometry)
+- [simple_integration](#simple_integration)
 - [spell_backward](#spell_backward)
 - [sudoku](#sudoku)
 - [syllogism](#syllogism)
@@ -746,6 +748,48 @@ Metadata: {'numbers': [297, 30], 'result': 3}
 
 ````
 
+### intermediate_integration
+Generates intermediate integration problem - either
+    by substitution or by parts
+
+Default configuration:
+```python
+problem_types = ('substitution', 'by_parts')
+substitution_types = ('linear', 'trigonometric', 'exponential', 'radical')
+by_parts_types = ('polynomial_exp_trig', 'log_inverse_trig', 'cyclic', 'repeated_parts')
+seed = 42
+size = 500
+linear_lower_bound = 1
+linear_upper_bound = 10
+min_linear_degree = 2
+max_linear_degree = 4
+outer_constant_min = 1
+outer_constant_max = 3
+min_poly_degree = 1
+max_poly_degree = 3
+symbols = ('x', 'X')
+operators = ('+', '-')
+```
+
+Example tasks:
+````
+Example 1:
+Question: Find the indefinite integral: ∫ -3*exp(3*x + 9) dx
+Answer: -exp(3*x + 9) + C
+Metadata: {'integrand': '-3*exp(3*x + 9)', 'problem_type': 'substitution', 'variable': 'x', 'type': 'exponential', 'expected_answer_expression': -exp(3*x + 9)}
+
+Example 2:
+Question: Evaluate the indefinite integral: ∫ -6*sin(2*X + 10)*cos(2*X + 10)**4 dx
+Answer: 3*cos(2*X + 10)**5/5 + C
+Metadata: {'integrand': '-6*sin(2*X + 10)*cos(2*X + 10)**4', 'problem_type': 'substitution', 'variable': 'X', 'type': 'trigonometric', 'expected_answer_expression': 3*cos(2*X + 10)**5/5}
+
+Example 3:
+Question: Find the indefinite integral: ∫ 2*asin(x) dx
+Answer: 2*Integral(asin(x), x) + C
+Metadata: {'integrand': '2*asin(x)', 'problem_type': 'by_parts', 'variable': 'x', 'type': 'log_inverse_trig', 'expected_answer_expression': 2*Integral(asin(x), x)}
+
+````
+
 ### lcm
 Generates Least Common Multiple (LCM) tasks
 
@@ -1030,8 +1074,15 @@ No two queens attack each other if they are not in the same row, column, or diag
 
 Place a queen by replacing an underscore (_) with a Q.
 
-Answer: {'_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _'}
-Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 1}
+Answer: _ _ _ _ _ _ Q _
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+_ _ _ _ Q _ _ _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solutions': [[['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 1, 'valid_answers': ['_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _']}
 
 Example 2:
 Question: Solve this N Queens puzzle:
@@ -1050,8 +1101,15 @@ No two queens attack each other if they are not in the same row, column, or diag
 
 Place a queen by replacing an underscore (_) with a Q.
 
-Answer: {'_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ Q _ _\n_ _ _ _ _ _ _ Q\n_ _ Q _ _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _'}
-Metadata: {'puzzle': [['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']], 'solution': [[['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']]], 'num_removed': 3}
+Answer: _ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+_ _ _ _ _ Q _ _
+_ _ _ _ _ _ _ Q
+_ _ Q _ _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ _ _ Q _ _ _
+Metadata: {'puzzle': [['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']], 'solutions': [[['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']]], 'num_removed': 3, 'valid_answers': ['_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ Q _ _\n_ _ _ _ _ _ _ Q\n_ _ Q _ _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _']}
 
 Example 3:
 Question: Solve this N Queens puzzle:
@@ -1070,8 +1128,15 @@ No two queens attack each other if they are not in the same row, column, or diag
 
 Place a queen by replacing an underscore (_) with a Q.
 
-Answer: {'_ _ _ _ _ _ _ Q\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ Q _ _ _\n_ Q _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\nQ _ _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _'}
-Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 5}
+Answer: _ _ _ _ Q _ _ _
+_ Q _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+Q _ _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solutions': [[['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 5, 'valid_answers': ['_ _ _ _ Q _ _ _\n_ Q _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\nQ _ _ _ _ _ _ _\n_ _ _ Q _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ Q _\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ _ Q\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _', '_ _ _ _ _ _ _ Q\n_ Q _ _ _ _ _ _\n_ _ _ Q _ _ _ _\nQ _ _ _ _ _ _ _\n_ _ _ _ _ _ Q _\n_ _ _ _ Q _ _ _\n_ _ Q _ _ _ _ _\n_ _ _ _ _ Q _ _']}
 
 ````
 
@@ -1512,6 +1577,42 @@ Metadata: {'n_sides': 6, 'known_angles': [143.0, 148.0, 39.0, 55.0, 107.0], 'sum
 
 ````
 
+### simple_integration
+Generates simple integration problems with one variable
+
+Default configuration:
+```python
+min_terms = 2
+max_terms = 5
+min_degree = 1
+max_degree = 10
+min_bounds = 1
+max_bounds = 10
+operators = ('+', '-')
+symbols = ('x', 'X')
+seed = 42
+size = 500
+```
+
+Example tasks:
+````
+Example 1:
+Question: Find the indefinite integral: ∫ 70*x**6 + 12*x**2/5 dx
+Answer: 10*x**7 + 4*x**3/5 + C
+Metadata: {'integrand': '70*x**6 + 12*x**2/5', 'variable': 'x', 'expected_answer_expression': 10*x**7 + 4*x**3/5}
+
+Example 2:
+Question: Find the indefinite integral: ∫ 49*x**6/10 + 48*x**5 - 4*x - 10/9 dx
+Answer: 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9 + C
+Metadata: {'integrand': '49*x**6/10 + 48*x**5 - 4*x - 10/9', 'variable': 'x', 'expected_answer_expression': 7*x**7/10 + 8*x**6 - 2*x**2 - 10*x/9}
+
+Example 3:
+Question: Find the indefinite integral: ∫ -28*X**3 + 8*X dx
+Answer: -7*X**4 + 4*X**2 + C
+Metadata: {'integrand': '-28*X**3 + 8*X', 'variable': 'X', 'expected_answer_expression': -7*X**4 + 4*X**2}
+
+````
+
 ### spell_backward
 Generates tasks to spell words backward
 

diff --git a/reasoning_gym/algebra/__init__.py b/reasoning_gym/algebra/__init__.py
@@ -1,9 +1,15 @@
+from .intermediate_integration import IntermediateIntegrationConfig, IntermediateIntegrationDataset
 from .polynomial_equations import PolynomialEquationsConfig, PolynomialEquationsDataset
 from .simple_equations import SimpleEquationsConfig, SimpleEquationsDataset
+from .simple_integration import SimpleIntegrationConfig, SimpleIntegrationDataset
 
 __all__ = [
-    "SimpleEquationsDataset",
-    "SimpleEquationsConfig",
+    "IntermediateIntegrationConfig",
+    "IntermediateIntegrationDataset",
     "PolynomialEquationsConfig",
     "PolynomialEquationsDataset",
+    "SimpleEquationsDataset",
+    "SimpleEquationsConfig",
+    "SimpleIntegrationConfig",
+    "SimpleIntegrationDataset",
 ]
diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py
@@ -136,16 +136,21 @@ def __getitem__(self, idx: int) -> dict:
 
         # Filter all solutions that are intractable from the puzzle's starting state
         valid_solutions = [board for board in self._solutions if self._is_tractable_solution(puzzle, board)]
-        valid_solutions_str = {self._board_to_string(board) for board in valid_solutions}
+        valid_solutions_str = sorted({self._board_to_string(board) for board in valid_solutions})
 
         return {
             "question": QUESTION_TEMPLATE.format(puzzle=puzzle_str, n=len(puzzle), num_removed=num_removed),
-            "answer": valid_solutions_str,
-            "metadata": {"puzzle": puzzle, "solution": valid_solutions, "num_removed": num_removed},
+            "answer": rng.choice(valid_solutions_str),  # choose arbitary answer (e.g. for SFT)
+            "metadata": {
+                "puzzle": puzzle,
+                "solutions": valid_solutions,
+                "num_removed": num_removed,
+                "valid_answers": valid_solutions_str,
+            },
         }
 
     def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
-        valid_solutions = entry["answer"]
+        valid_solutions = entry["metadata"]["valid_answers"]
         reward = 0.0
         if answer is not None:
             if answer in valid_solutions:

diff --git a/tests/test_n_queens.py b/tests/test_n_queens.py
@@ -110,15 +110,15 @@ def test_nqueens_score_answer():
     # Test a few items
     for i in range(len(dataset)):
         item = dataset[i]
-        
+
         # Test correct answer gets score 1.0
         valid_answer = item["metadata"]["valid_answers"][0]
         assert dataset.score_answer(valid_answer, item) == 1.0
-        
+
         # Test invalid answer gets score 0.01
         invalid_answer = "_ _ _ _\n_ _ _ _\n_ _ _ _\n_ _ _ _"
         assert dataset.score_answer(invalid_answer, item) == 0.01
-        
+
         # Test None answer gets score 0.0
         assert dataset.score_answer(None, item) == 0.0