fix: Unify Prompts (#254)

* remove cot * fix prompt template * fix pool matrix * spiral matrix fixed
open-thought · Mar 3, 2025 · 01e1c8f · 01e1c8f
1 parent 49db4ed
commit 01e1c8f
Show file tree

Hide file tree

Showing 31 changed files with 65 additions and 354 deletions.
diff --git a/reasoning_gym/algorithmic/ab.py b/reasoning_gym/algorithmic/ab.py
@@ -102,19 +102,7 @@ def __getitem__(self, idx: int) -> dict:
     B# #B ... becomes ... nothing
 
 In other words, whenever two neighbor tokens have their '#' facing each-other,
-they must be rewritten according to the corresponding rule. For example, the
-first example shown here is computed as:
-
-    B# A# #B #A B# =
-    B# #B A# #A B# =
-    A# #A B# =
-    B#
-
-The steps were:
-1. We replaced `A# #B` by `#B A#`.
-2. We replaced `B# #B` by nothing.
-3. We replaced `A# #A` by nothing.
-The final result was just `B#`.
+they must be rewritten according to the corresponding rule.
 
 Now, consider the following program:
 

diff --git a/reasoning_gym/algorithmic/base_conversion.py b/reasoning_gym/algorithmic/base_conversion.py
@@ -10,19 +10,6 @@
 
 If the target base is > 10, use lowercase letters a-z for digits above 9.
 
-Example:
-- Input: Convert the base-9 number 440 to base-5
-- Output: 2420
-- Explanation
-    - First, we convert the base-9 number 440 to base-10: 4 * 9**2 + 4 * 9**1 + 0 * 9**0 = 324 + 36 + 0 = 360
-    - Next, we convert the base-10 number 360 to base-5:
-        - 360 // 5 = 72 remainder 0
-        - 72 // 5 = 14 remainder 2
-        - 14 // 5 = 2 remainder 4
-        - 2 // 5 = 0 remainder 2
-    - Reading the remainders in reverse order gives us the base-5 number 2 4 2 0
-    - Hence, the final answer is 2420
-
 Now, convert the {source_name} number {source_repr} to {target_name}
 """
 

diff --git a/reasoning_gym/algorithmic/binary_alternation.py b/reasoning_gym/algorithmic/binary_alternation.py
@@ -15,10 +15,6 @@
 
 Any two characters may be swapped, even if they are not adjacent.
 
-Example:
-- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
-- Output: 1
-
 Now, determine the minimum number of swaps to make the following binary string alternating: {string}
 """
 

diff --git a/reasoning_gym/algorithmic/binary_matrix.py b/reasoning_gym/algorithmic/binary_matrix.py
@@ -13,22 +13,7 @@
 
 QUESTION_TEMPLATE = """Given a square matrix, your job is to find the taxicab (Manhattan) distance of the nearest 0 for each cell.
 
-Example:
-- Input: Find the distance to the nearest 0 for each cell in the matrix below:
-0 0 0
-0 1 0
-1 1 1
-- Output:
-0 0 0
-0 1 0
-1 2 1
-- Explanation
-    - Each cell with a 0 has a distance of 0 to itself.
-    - The cell at (1, 1) has a distance of 1 to the nearest 0 (any of the three 0's at (1, 0), (0, 1), (1, 2)).
-    - The cell at (2, 0) has a distance of 1 to the nearest 0 (the 0 at (1, 0)).
-    - The cell at (2, 1) has a distance of 2 to the nearest 0 (any of the two 0's at (1, 0), (1, 2))
-    - The cell at (2, 2) has a distance of 1 to the nearest 0 (the 0 at (1, 2)).
-    - Hence, the final answer is the matrix is the output shown above, where each cell contains the distance to the nearest 0, in the same format as the input matrix.
+The output should be a matrix of the same size as the input matrix, where each cell contains the distance to the nearest 0.
 
 Find the distance to the nearest 0 for each cell in the matrix below:
 {matrix}

diff --git a/reasoning_gym/algorithmic/cryptarithm.py b/reasoning_gym/algorithmic/cryptarithm.py
@@ -17,26 +17,6 @@
 
 from ..factory import ProceduralDataset, register_dataset
 
-EXAMPLE_CASE = """- Input:
-  BASE
-+ BALL
-------
- GAMES
-
-- Output: B=7, A=4, S=8, E=3, L=5, M=9, G=1
-- Explanation:
-    * BASE + BALL = GAMES, two 4-digit numbers sum to 5 digits, so G = 1.
-    * Units: E + L = S (no carry).
-    * Tens: S + L = E + 10 (carry 1). Substitute S = E + L to get E + 2L = E + 10, so L = 5.
-    * Since S = E + 5 and S is one digit, E < 5.
-    * Hundreds: 2A + 1 = M (with carry).
-    * Thousands: 2B = A + 10 (carry makes G = 1). So A = 2B - 10.
-    * Try B = 7: Then A = 4 and M = 2(4) + 1 = 9.
-    * With E < 5, try E = 3: Then S = 8.
-    * Solution: B = 7, A = 4, S = 8, E = 3, L = 5, M = 9, G = 1
-    * Verify: BASE (7483) + BALL (7455) = GAMES (14938).
-"""
-
 
 @dataclass
 class CryptarithmConfig:
@@ -45,7 +25,6 @@ class CryptarithmConfig:
     min_words: int = 2  # Minimum number of addends
     max_words: int = 3  # Maximum number of addends
     allow_leading_zero: bool = False
-    include_example: bool = True
     seed: Optional[int] = None
     size: int = 500  # Number of puzzle instances to generate
 
@@ -189,8 +168,6 @@ def int_to_letter_str(num: int) -> str:
             )
             + 'Provide a comma separated mapping from letters to digits that satisfies the equation in your final answer. Output format: "A=1,B=2,C=3" (without quotes)\n'
         )
-        if self.config.include_example:
-            question_str += "\nHere's an example:\n" + EXAMPLE_CASE
 
         # 8) Create a human-readable answer, e.g. "A=1,B=0,C=9,..."
         sorted_letter_keys = sorted(letter_to_digit.keys())

diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py
@@ -21,12 +21,7 @@
 
 Your job is to group the anagrams together. You can return the answer in any order.
 
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
-    - There is no string in the input that can be rearranged to form "bat".
-    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+The output is a list of lists of strings, where each outer list contains a group of anagrams, e.g. [["eat", "tea"], ["tan", "nat"]].
 
 Group the following list of words into anagrams:
 {words}

diff --git a/reasoning_gym/algorithmic/isomorphic_strings.py b/reasoning_gym/algorithmic/isomorphic_strings.py
@@ -18,19 +18,6 @@
 
 No two characters may map to the same character, but a character may map to itself.
 
-Example 1:
-Input: egg add
-Output: True
-Explanation: The strings s and t can be made identical by:
-    - Mapping 'e' to 'a'.
-    - Mapping 'g' to 'd'.
-
-Example 2:
-Input: foo bar
-Output: False
-Explanation:
-    - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
-
 Return True if the following two strings are isomorphic, or False otherwise:
 {s} {t}
 """

diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py
@@ -15,20 +15,7 @@
 
 The order of the words in the sentence is preserved. Moreover, the style of the sentence is preserved (i.e. punctuation, capitalization, new lines, etc.).
 
-Example:
-- Input: Unscramble these words: raendgmeins yWh nya hilcd anc od hatt
-- Output: meanderings Why any child can do that
-- Explanation
-    - We unscramble each of the words independently.
-    - raendgmeins -> meanderings
-    - yWh -> Why
-    - nya -> any
-    - hilcd -> child
-    - anc -> can
-    - od -> do
-    - hatt -> that
-    - The final answer is: meanderings Why any child can do that
-    - Notice that the order of the words is preserved, no new words / symbols (e.g. new lines) are added.
+Your output should be a sentence with the words unscrambled.
 
 Now, unscramble these words: {words}
 """

diff --git a/reasoning_gym/algorithmic/palindrome_generation.py b/reasoning_gym/algorithmic/palindrome_generation.py
@@ -11,13 +11,7 @@
 
 If there are multiple possible answers, only respond with one of them. You must use all the letters provided.
 
-Example:
-- Input: Form a valid palindrome using the following letters: a, a, b
-- Output: aba
-- Explanation:
-    - The phrase aba reads the same forwards and backwards.
-    - The output answer is a valid palindrome using all the letters provided.
-    - The answer is a string, rather than a list of characters.
+Your output should be a single string, with no spaces or punctuation.
 
 Now, form a valid palindrome using the following letters: {letters}
 """

diff --git a/reasoning_gym/algorithmic/palindrome_partitioning.py b/reasoning_gym/algorithmic/palindrome_partitioning.py
@@ -18,13 +18,7 @@
 
 You may return all possible palindrome partitioning in any order.
 
-Example:
-- Input: Partition the following string into palindromes: aab
-- Output: [["a","a","b"],["aa","b"]]
-- Explanation:
-    - One way to partition the string is "a" | "a" | "b", where each substring is a palindrome.
-    - Another way to partition the string is "aa" | "b", where again each substring is a palindrome.
-    - Therefore, the final result is a list of the two palindrome partitions.
+Your output should be a list of lists, where each list represents a palindrome partition, e.g. [["a","a","b"],["aa","b"]].
 
 Partition the following string into palindromes: {string}
 """

diff --git a/reasoning_gym/algorithmic/pool_matrix.py b/reasoning_gym/algorithmic/pool_matrix.py
@@ -11,25 +11,8 @@
 QUESTION_TEMPLATE = """Your job is to perform max/average pooling on the given matrix.
 The stride is equal to the kernel size, meaning there is no overlap between the pooling regions.
 
-Example 1:
-- Input: Perform max pooling on the following matrix with a kernel size of 2:
-1 2 3 4
-5 6 7 8
-9 10 11 12
-13 14 15 16
-- Output:
-6 8
-14 16
-
-Example 2:
-- Input: Perform average pooling on the following matrix with a kernel size of 2:
-1 2 3 4
-5 6 7 8
-9 10 11 12
-13 14 15 16
-- Output:
-3.5 5.5
-11.5 13.5
+Your output should be a matrix in the same format as the input matrix.
+The output matrix is smaller than the input matrix when the kernel size is greater than 1, and its elements may be floating-point numbers.
 
 Perform {pool_type} pooling on the following matrix with a kernel size of {pool_size}:
 {matrix}

diff --git a/reasoning_gym/algorithmic/rotate_matrix.py b/reasoning_gym/algorithmic/rotate_matrix.py
@@ -13,17 +13,7 @@
 
 QUESTION_TEMPLATE = """Given a square matrix, your job is to rotate it clockwise.
 
-Example:
-
-Input: Rotate the matrix below by 90 degrees clockwise:
-1 2 3
-4 5 6
-7 8 9
-
-Output:
-7 4 1
-8 5 2
-9 6 3
+Your output should be a matrix in the same format as the input.
 
 Rotate the matrix below by {degrees} degrees clockwise:
 {matrix}

diff --git a/reasoning_gym/algorithmic/rotten_oranges.py b/reasoning_gym/algorithmic/rotten_oranges.py
@@ -21,13 +21,6 @@
 Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
 If this is impossible, return -1.
 
-Example:
-- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
-    2 1 1
-    1 1 0
-    0 1 1
-- Output: 4
-
 Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
 {matrix}
 """

diff --git a/reasoning_gym/algorithmic/spiral_matrix.py b/reasoning_gym/algorithmic/spiral_matrix.py
@@ -12,19 +12,14 @@
 
 QUESTION_TEMPLATE = """Given a matrix, your job is to generate a list of elements in spiral order, starting from the top-left element.
 
-Example:
-- Input: For the matrix below, what is the list of elements in spiral order?
-1 2 3
-4 5 6
-7 8 9
-- Output: 1 2 3 6 9 8 7 4 5
-- Explanation:
-    - We start from the top-left element (1) and move right until we reach the end of the row: 1 2 3
-    - Then, we move down until we reach the last column: 1 2 3 6 9
-    - Next, we move left until we reach the first column: 1 2 3 6 9 8 7
-    - Then, we move up until we reach the second row (i.e. one below the previously traversed row): 1 2 3 6 9 8 7 4
-    - Finally, we move right until we reach the second to last column: 1 2 3 6 9 8 7 4 5
-    - The output format is a space-separated list of elements in spiral order (as opposed to a python list)
+The spiral order is clockwise, starting from the top-left corner. More precisely:
+- Start from the top-left corner and move right.
+- Move down towards the bottom-right corner.
+- Move left towards the bottom-left corner.
+- Move up towards the top-right corner.
+- Repeat the steps for the inner elements of the matrix until every entry is visited.
+
+Your output should be a space-separated list of integers, e.g. 1 2 3 4 5 6
 
 For the matrix below, what is the list of elements in spiral order?
 {matrix}

diff --git a/reasoning_gym/algorithmic/string_insertion.py b/reasoning_gym/algorithmic/string_insertion.py
@@ -18,15 +18,7 @@
 
 Once you have inserted a character, you have to skip over the substring and the inserted character and continue the search from the next character.
 
-Example
-- Input: DDABCDEEDEAB
-- Output: DDABCDAEEDEABD
-- Explanation:
-    - Theere are two inserted characters: DDABCD[A]EEDEAB[D] (shown in square brackets)
-    - First, we insert A after ABCD.
-    - Even though with the newly inserted 'A' we can obtain the substring BCD[A], we can't use it to insert another character.
-    - Lastly, we insert D after DEAB.
-    - Therefore, the final answer is DDABCDAEEDEABD (represented as a string, instead of a list of characters).
+Your output should be a string that has been modified according to the pattern.
 
 Given the following string, provide the answer after inserting the characters according to the pattern: {string}
 """

diff --git a/reasoning_gym/algorithmic/string_manipulation.py b/reasoning_gym/algorithmic/string_manipulation.py
@@ -17,18 +17,7 @@
 Once you have applied a rule, repeat the process with the new string until no further transformations can be performed (i.e. the string doesn't change), or a state is repeated.
 If a state is repeated, the process is terminated, and the repeated state is discarded (i.e. is not considered as the final answer) and the state before the repeated state is considered as the final answer.
 
-Example:
-- Input:
-    - String: abbac
-    - Rules:
-        1. If the string prefix is 'ab', replace it with 'ca'.
-        2. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end.
-        3. If the string ends with 'aa', replace it with 'cc'.
-- Output: bbbacc
-- Explanation:
-    - In the first iteration, rule 1 is applied to the string abbac, resulting in cabac
-    - In the second interation, rule 1 doesn't apply, but rule 2 is applied to the string cabac, resulting in bbbacc
-    - In the third iteration, none of the rules (1, 2, 3) apply, so the process is terminated, and the final answer is bbbacc
+Your output should be the final transformed string after applying all the rules.
 
 Transform the following string according to the above list of rules:
 {string}

diff --git a/reasoning_gym/algorithmic/string_splitting.py b/reasoning_gym/algorithmic/string_splitting.py
@@ -23,17 +23,6 @@
 The output should be the count of each machine and part type after the rules have been exhaustively applied in the following order: A B C X Y Z.
 For example 1 0 1 5 4 3 means that you have 1 machine A, 0 machine B, 1 machine C, 5 part X, 4 part Y, and 3 part Z.
 
-Example:
-- Input: You have 2 machines A, 0 machines B, and 1 machine C.
-- Output: 0 0 1 2 0 2
-- Explanation
-    0. Initial state: 2 0 1 0 0 0
-    1. We can apply rule 1 and trade 1 machine A for 2 part X and 1 part Y: 1 0 1 2 1 0
-    2. Starting over, we can apply rule 1 again: 0 0 1 4 2 0
-    3. In the next iteration, we can apply rule 5 and trade 1 part X and 1 part Y for 1 part Z: 0 0 1 3 1 1
-    4. In the next iteration, we can apply rule 5 again: 0 0 1 2 0 2
-    5. We can't apply any more rules, so the final answer is 0 0 1 2 0 2
-
 Now, you have {A_machine} machine A, {B_machine} machine B, and {C_machine} machine C. Provide the count of each machine and part type after applying the above rules.
 """
 

diff --git a/reasoning_gym/algorithmic/string_synthesis.py b/reasoning_gym/algorithmic/string_synthesis.py
@@ -23,16 +23,6 @@
 The output should be the count of each block type after the rules have been applied in the order they are listed above.
 For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C).
 
-Example:
-- Input: You have 2 [A], 3 [B], and 3 [C].
-- Output: 0 0 0 2 1 0 0 0 0
-- Explanation:
-    0. Initial state: 2 3 3 0 0 0 0 0 0
-    1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0
-    2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0
-    3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0
-    4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0
-
 Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules.
 """
 

diff --git a/reasoning_gym/algorithmic/word_sorting.py b/reasoning_gym/algorithmic/word_sorting.py
@@ -21,16 +21,7 @@ class TextTransformation(StrEnum):
 
 QUESTION_TEMPLATE = """Your task is to sort words in ascending or descending order using ASCII/Unicode ordering.
 
-Example:
-- Input: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list: freely, idea, indemnify, last, END, solving
-- Output: END, freely, idea, indemnify, last, solving
-- Explanation:
-    - Uppercase letters come before lowercase letters, hence why "END" comes first.
-    - "freely" comes before "idea" because "f" comes before "i".
-    - "idea" comes before "indemnify" because even though they both start with "i", "d" comes before "n".
-    - "indemnify" comes before "last" because "i" comes before "l".
-    - "last" comes before "solving" because "l" comes before "s".
-    - Finally, the output is provided as a comma separated list of the sorted words.
+Your output should be a comma-separated list of words, e.g. word_1, word_2, word_3
 
 Now, sort these words in {direction} order (using ASCII/Unicode ordering) and return them as a comma-separated list: {words}
 """