From 01e1c8f9af9710042d7db82f1a6c08dcabd9108f Mon Sep 17 00:00:00 2001
From: Zafir Stojanovski <zaf.stojano@gmail.com>
Date: Mon, 3 Mar 2025 21:55:53 +0100
Subject: [PATCH] fix: Unify Prompts (#254)

* remove cot
* fix prompt template
* fix pool matrix
* spiral matrix fixed
---
 reasoning_gym/algorithmic/ab.py               | 14 +--------
 reasoning_gym/algorithmic/base_conversion.py  | 13 --------
 .../algorithmic/binary_alternation.py         |  4 ---
 reasoning_gym/algorithmic/binary_matrix.py    | 17 +---------
 reasoning_gym/algorithmic/cryptarithm.py      | 23 --------------
 reasoning_gym/algorithmic/group_anagrams.py   |  7 +----
 .../algorithmic/isomorphic_strings.py         | 13 --------
 reasoning_gym/algorithmic/letter_jumble.py    | 15 +--------
 .../algorithmic/palindrome_generation.py      |  8 +----
 .../algorithmic/palindrome_partitioning.py    |  8 +----
 reasoning_gym/algorithmic/pool_matrix.py      | 21 ++-----------
 reasoning_gym/algorithmic/rotate_matrix.py    | 12 +------
 reasoning_gym/algorithmic/rotten_oranges.py   |  7 -----
 reasoning_gym/algorithmic/spiral_matrix.py    | 21 +++++--------
 reasoning_gym/algorithmic/string_insertion.py | 10 +-----
 .../algorithmic/string_manipulation.py        | 13 +-------
 reasoning_gym/algorithmic/string_splitting.py | 11 -------
 reasoning_gym/algorithmic/string_synthesis.py | 10 ------
 reasoning_gym/algorithmic/word_sorting.py     | 11 +------
 reasoning_gym/arithmetic/leg_counting.py      | 10 ------
 reasoning_gym/arithmetic/number_format.py     |  7 +----
 reasoning_gym/arithmetic/power_function.py    | 14 ---------
 reasoning_gym/cognition/rectangle_count.py    | 28 +----------------
 reasoning_gym/games/countdown.py              |  1 +
 reasoning_gym/games/emoji_mystery.py          | 21 +++++++------
 reasoning_gym/games/mahjong.py                |  8 +----
 reasoning_gym/games/n_queens.py               | 15 +--------
 reasoning_gym/games/tower_of_hanoi.py         | 11 +++----
 reasoning_gym/geometry/advanced_geometry.py   | 16 ++++------
 reasoning_gym/graphs/shortest_path.py         | 19 +++---------
 reasoning_gym/logic/propositional_logic.py    | 31 +++++++++----------
 31 files changed, 65 insertions(+), 354 deletions(-)

diff --git a/reasoning_gym/algorithmic/ab.py b/reasoning_gym/algorithmic/ab.py
index 139e4af8..9ec679af 100644
--- a/reasoning_gym/algorithmic/ab.py
+++ b/reasoning_gym/algorithmic/ab.py
@@ -102,19 +102,7 @@ def __getitem__(self, idx: int) -> dict:
     B# #B ... becomes ... nothing
 
 In other words, whenever two neighbor tokens have their '#' facing each-other,
-they must be rewritten according to the corresponding rule. For example, the
-first example shown here is computed as:
-
-    B# A# #B #A B# =
-    B# #B A# #A B# =
-    A# #A B# =
-    B#
-
-The steps were:
-1. We replaced `A# #B` by `#B A#`.
-2. We replaced `B# #B` by nothing.
-3. We replaced `A# #A` by nothing.
-The final result was just `B#`.
+they must be rewritten according to the corresponding rule.
 
 Now, consider the following program:
 
diff --git a/reasoning_gym/algorithmic/base_conversion.py b/reasoning_gym/algorithmic/base_conversion.py
index bd897a89..5ab5b5b5 100644
--- a/reasoning_gym/algorithmic/base_conversion.py
+++ b/reasoning_gym/algorithmic/base_conversion.py
@@ -10,19 +10,6 @@
 
 If the target base is > 10, use lowercase letters a-z for digits above 9.
 
-Example:
-- Input: Convert the base-9 number 440 to base-5
-- Output: 2420
-- Explanation
-    - First, we convert the base-9 number 440 to base-10: 4 * 9**2 + 4 * 9**1 + 0 * 9**0 = 324 + 36 + 0 = 360
-    - Next, we convert the base-10 number 360 to base-5:
-        - 360 // 5 = 72 remainder 0
-        - 72 // 5 = 14 remainder 2
-        - 14 // 5 = 2 remainder 4
-        - 2 // 5 = 0 remainder 2
-    - Reading the remainders in reverse order gives us the base-5 number 2 4 2 0
-    - Hence, the final answer is 2420
-
 Now, convert the {source_name} number {source_repr} to {target_name}
 """
 
diff --git a/reasoning_gym/algorithmic/binary_alternation.py b/reasoning_gym/algorithmic/binary_alternation.py
index ca204c6d..ea50b0c8 100644
--- a/reasoning_gym/algorithmic/binary_alternation.py
+++ b/reasoning_gym/algorithmic/binary_alternation.py
@@ -15,10 +15,6 @@
 
 Any two characters may be swapped, even if they are not adjacent.
 
-Example:
-- Input: Determine the minimum number of swaps to make the following binary string alternating: 111000
-- Output: 1
-
 Now, determine the minimum number of swaps to make the following binary string alternating: {string}
 """
 
diff --git a/reasoning_gym/algorithmic/binary_matrix.py b/reasoning_gym/algorithmic/binary_matrix.py
index 92317e78..4584a7fb 100644
--- a/reasoning_gym/algorithmic/binary_matrix.py
+++ b/reasoning_gym/algorithmic/binary_matrix.py
@@ -13,22 +13,7 @@
 
 QUESTION_TEMPLATE = """Given a square matrix, your job is to find the taxicab (Manhattan) distance of the nearest 0 for each cell.
 
-Example:
-- Input: Find the distance to the nearest 0 for each cell in the matrix below:
-0 0 0
-0 1 0
-1 1 1
-- Output:
-0 0 0
-0 1 0
-1 2 1
-- Explanation
-    - Each cell with a 0 has a distance of 0 to itself.
-    - The cell at (1, 1) has a distance of 1 to the nearest 0 (any of the three 0's at (1, 0), (0, 1), (1, 2)).
-    - The cell at (2, 0) has a distance of 1 to the nearest 0 (the 0 at (1, 0)).
-    - The cell at (2, 1) has a distance of 2 to the nearest 0 (any of the two 0's at (1, 0), (1, 2))
-    - The cell at (2, 2) has a distance of 1 to the nearest 0 (the 0 at (1, 2)).
-    - Hence, the final answer is the matrix is the output shown above, where each cell contains the distance to the nearest 0, in the same format as the input matrix.
+The output should be a matrix of the same size as the input matrix, where each cell contains the distance to the nearest 0.
 
 Find the distance to the nearest 0 for each cell in the matrix below:
 {matrix}
diff --git a/reasoning_gym/algorithmic/cryptarithm.py b/reasoning_gym/algorithmic/cryptarithm.py
index 15264254..f0946278 100644
--- a/reasoning_gym/algorithmic/cryptarithm.py
+++ b/reasoning_gym/algorithmic/cryptarithm.py
@@ -17,26 +17,6 @@
 
 from ..factory import ProceduralDataset, register_dataset
 
-EXAMPLE_CASE = """- Input:
-  BASE
-+ BALL
-------
- GAMES
-
-- Output: B=7, A=4, S=8, E=3, L=5, M=9, G=1
-- Explanation:
-    * BASE + BALL = GAMES, two 4-digit numbers sum to 5 digits, so G = 1.
-    * Units: E + L = S (no carry).
-    * Tens: S + L = E + 10 (carry 1). Substitute S = E + L to get E + 2L = E + 10, so L = 5.
-    * Since S = E + 5 and S is one digit, E < 5.
-    * Hundreds: 2A + 1 = M (with carry).
-    * Thousands: 2B = A + 10 (carry makes G = 1). So A = 2B - 10.
-    * Try B = 7: Then A = 4 and M = 2(4) + 1 = 9.
-    * With E < 5, try E = 3: Then S = 8.
-    * Solution: B = 7, A = 4, S = 8, E = 3, L = 5, M = 9, G = 1
-    * Verify: BASE (7483) + BALL (7455) = GAMES (14938).
-"""
-
 
 @dataclass
 class CryptarithmConfig:
@@ -45,7 +25,6 @@ class CryptarithmConfig:
     min_words: int = 2  # Minimum number of addends
     max_words: int = 3  # Maximum number of addends
     allow_leading_zero: bool = False
-    include_example: bool = True
     seed: Optional[int] = None
     size: int = 500  # Number of puzzle instances to generate
 
@@ -189,8 +168,6 @@ def int_to_letter_str(num: int) -> str:
             )
             + 'Provide a comma separated mapping from letters to digits that satisfies the equation in your final answer. Output format: "A=1,B=2,C=3" (without quotes)\n'
         )
-        if self.config.include_example:
-            question_str += "\nHere's an example:\n" + EXAMPLE_CASE
 
         # 8) Create a human-readable answer, e.g. "A=1,B=0,C=9,..."
         sorted_letter_keys = sorted(letter_to_digit.keys())
diff --git a/reasoning_gym/algorithmic/group_anagrams.py b/reasoning_gym/algorithmic/group_anagrams.py
index caf46357..b6630ac0 100644
--- a/reasoning_gym/algorithmic/group_anagrams.py
+++ b/reasoning_gym/algorithmic/group_anagrams.py
@@ -21,12 +21,7 @@
 
 Your job is to group the anagrams together. You can return the answer in any order.
 
-Example:
-Input: ["eat", "tea", "tan", "ate", "nat", "bat"]
-Output: [["bat"], ["nat", "tan"], ["ate", "eat", "tea"]]
-Explanation:
-    - There is no string in the input that can be rearranged to form "bat".
-    - The strings "nat" and "tan" are anagrams as they can be rearranged to form each other.
+The output is a list of lists of strings, where each outer list contains a group of anagrams, e.g. [["eat", "tea"], ["tan", "nat"]].
 
 Group the following list of words into anagrams:
 {words}
diff --git a/reasoning_gym/algorithmic/isomorphic_strings.py b/reasoning_gym/algorithmic/isomorphic_strings.py
index 3b4a59e5..bba46343 100644
--- a/reasoning_gym/algorithmic/isomorphic_strings.py
+++ b/reasoning_gym/algorithmic/isomorphic_strings.py
@@ -18,19 +18,6 @@
 
 No two characters may map to the same character, but a character may map to itself.
 
-Example 1:
-Input: egg add
-Output: True
-Explanation: The strings s and t can be made identical by:
-    - Mapping 'e' to 'a'.
-    - Mapping 'g' to 'd'.
-
-Example 2:
-Input: foo bar
-Output: False
-Explanation:
-    - The strings cannot be made identical as 'o' needs to be mapped to both 'a' and 'r'.
-
 Return True if the following two strings are isomorphic, or False otherwise:
 {s} {t}
 """
diff --git a/reasoning_gym/algorithmic/letter_jumble.py b/reasoning_gym/algorithmic/letter_jumble.py
index 3aab43f8..5917e55c 100644
--- a/reasoning_gym/algorithmic/letter_jumble.py
+++ b/reasoning_gym/algorithmic/letter_jumble.py
@@ -15,20 +15,7 @@
 
 The order of the words in the sentence is preserved. Moreover, the style of the sentence is preserved (i.e. punctuation, capitalization, new lines, etc.).
 
-Example:
-- Input: Unscramble these words: raendgmeins yWh nya hilcd anc od hatt
-- Output: meanderings Why any child can do that
-- Explanation
-    - We unscramble each of the words independently.
-    - raendgmeins -> meanderings
-    - yWh -> Why
-    - nya -> any
-    - hilcd -> child
-    - anc -> can
-    - od -> do
-    - hatt -> that
-    - The final answer is: meanderings Why any child can do that
-    - Notice that the order of the words is preserved, no new words / symbols (e.g. new lines) are added.
+Your output should be a sentence with the words unscrambled.
 
 Now, unscramble these words: {words}
 """
diff --git a/reasoning_gym/algorithmic/palindrome_generation.py b/reasoning_gym/algorithmic/palindrome_generation.py
index 0e54579a..2f7b5fb5 100644
--- a/reasoning_gym/algorithmic/palindrome_generation.py
+++ b/reasoning_gym/algorithmic/palindrome_generation.py
@@ -11,13 +11,7 @@
 
 If there are multiple possible answers, only respond with one of them. You must use all the letters provided.
 
-Example:
-- Input: Form a valid palindrome using the following letters: a, a, b
-- Output: aba
-- Explanation:
-    - The phrase aba reads the same forwards and backwards.
-    - The output answer is a valid palindrome using all the letters provided.
-    - The answer is a string, rather than a list of characters.
+Your output should be a single string, with no spaces or punctuation.
 
 Now, form a valid palindrome using the following letters: {letters}
 """
diff --git a/reasoning_gym/algorithmic/palindrome_partitioning.py b/reasoning_gym/algorithmic/palindrome_partitioning.py
index e0d41870..8a0c07b3 100644
--- a/reasoning_gym/algorithmic/palindrome_partitioning.py
+++ b/reasoning_gym/algorithmic/palindrome_partitioning.py
@@ -18,13 +18,7 @@
 
 You may return all possible palindrome partitioning in any order.
 
-Example:
-- Input: Partition the following string into palindromes: aab
-- Output: [["a","a","b"],["aa","b"]]
-- Explanation:
-    - One way to partition the string is "a" | "a" | "b", where each substring is a palindrome.
-    - Another way to partition the string is "aa" | "b", where again each substring is a palindrome.
-    - Therefore, the final result is a list of the two palindrome partitions.
+Your output should be a list of lists, where each list represents a palindrome partition, e.g. [["a","a","b"],["aa","b"]].
 
 Partition the following string into palindromes: {string}
 """
diff --git a/reasoning_gym/algorithmic/pool_matrix.py b/reasoning_gym/algorithmic/pool_matrix.py
index 002c0c0c..bf839ad4 100644
--- a/reasoning_gym/algorithmic/pool_matrix.py
+++ b/reasoning_gym/algorithmic/pool_matrix.py
@@ -11,25 +11,8 @@
 QUESTION_TEMPLATE = """Your job is to perform max/average pooling on the given matrix.
 The stride is equal to the kernel size, meaning there is no overlap between the pooling regions.
 
-Example 1:
-- Input: Perform max pooling on the following matrix with a kernel size of 2:
-1 2 3 4
-5 6 7 8
-9 10 11 12
-13 14 15 16
-- Output:
-6 8
-14 16
-
-Example 2:
-- Input: Perform average pooling on the following matrix with a kernel size of 2:
-1 2 3 4
-5 6 7 8
-9 10 11 12
-13 14 15 16
-- Output:
-3.5 5.5
-11.5 13.5
+Your output should be a matrix in the same format as the input matrix.
+The output matrix is smaller than the input matrix when the kernel size is greater than 1, and its elements may be floating-point numbers.
 
 Perform {pool_type} pooling on the following matrix with a kernel size of {pool_size}:
 {matrix}
diff --git a/reasoning_gym/algorithmic/rotate_matrix.py b/reasoning_gym/algorithmic/rotate_matrix.py
index adeaa47c..2154243f 100644
--- a/reasoning_gym/algorithmic/rotate_matrix.py
+++ b/reasoning_gym/algorithmic/rotate_matrix.py
@@ -13,17 +13,7 @@
 
 QUESTION_TEMPLATE = """Given a square matrix, your job is to rotate it clockwise.
 
-Example:
-
-Input: Rotate the matrix below by 90 degrees clockwise:
-1 2 3
-4 5 6
-7 8 9
-
-Output:
-7 4 1
-8 5 2
-9 6 3
+Your output should be a matrix in the same format as the input.
 
 Rotate the matrix below by {degrees} degrees clockwise:
 {matrix}
diff --git a/reasoning_gym/algorithmic/rotten_oranges.py b/reasoning_gym/algorithmic/rotten_oranges.py
index 92e35a20..3b849c4d 100644
--- a/reasoning_gym/algorithmic/rotten_oranges.py
+++ b/reasoning_gym/algorithmic/rotten_oranges.py
@@ -21,13 +21,6 @@
 Your task is determine the minimum number of minutes that must elapse until no cell has a fresh orange.
 If this is impossible, return -1.
 
-Example:
-- Input: Determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
-    2 1 1
-    1 1 0
-    0 1 1
-- Output: 4
-
 Now, determine the minimum number of minutes that must elapse until no cell in the grid below has a fresh orange:
 {matrix}
 """
diff --git a/reasoning_gym/algorithmic/spiral_matrix.py b/reasoning_gym/algorithmic/spiral_matrix.py
index 17aff844..63492a37 100644
--- a/reasoning_gym/algorithmic/spiral_matrix.py
+++ b/reasoning_gym/algorithmic/spiral_matrix.py
@@ -12,19 +12,14 @@
 
 QUESTION_TEMPLATE = """Given a matrix, your job is to generate a list of elements in spiral order, starting from the top-left element.
 
-Example:
-- Input: For the matrix below, what is the list of elements in spiral order?
-1 2 3
-4 5 6
-7 8 9
-- Output: 1 2 3 6 9 8 7 4 5
-- Explanation:
-    - We start from the top-left element (1) and move right until we reach the end of the row: 1 2 3
-    - Then, we move down until we reach the last column: 1 2 3 6 9
-    - Next, we move left until we reach the first column: 1 2 3 6 9 8 7
-    - Then, we move up until we reach the second row (i.e. one below the previously traversed row): 1 2 3 6 9 8 7 4
-    - Finally, we move right until we reach the second to last column: 1 2 3 6 9 8 7 4 5
-    - The output format is a space-separated list of elements in spiral order (as opposed to a python list)
+The spiral order is clockwise, starting from the top-left corner. More precisely:
+- Start from the top-left corner and move right.
+- Move down towards the bottom-right corner.
+- Move left towards the bottom-left corner.
+- Move up towards the top-right corner.
+- Repeat the steps for the inner elements of the matrix until every entry is visited.
+
+Your output should be a space-separated list of integers, e.g. 1 2 3 4 5 6
 
 For the matrix below, what is the list of elements in spiral order?
 {matrix}
diff --git a/reasoning_gym/algorithmic/string_insertion.py b/reasoning_gym/algorithmic/string_insertion.py
index d09b8a92..1d597364 100644
--- a/reasoning_gym/algorithmic/string_insertion.py
+++ b/reasoning_gym/algorithmic/string_insertion.py
@@ -18,15 +18,7 @@
 
 Once you have inserted a character, you have to skip over the substring and the inserted character and continue the search from the next character.
 
-Example
-- Input: DDABCDEEDEAB
-- Output: DDABCDAEEDEABD
-- Explanation:
-    - Theere are two inserted characters: DDABCD[A]EEDEAB[D] (shown in square brackets)
-    - First, we insert A after ABCD.
-    - Even though with the newly inserted 'A' we can obtain the substring BCD[A], we can't use it to insert another character.
-    - Lastly, we insert D after DEAB.
-    - Therefore, the final answer is DDABCDAEEDEABD (represented as a string, instead of a list of characters).
+Your output should be a string that has been modified according to the pattern.
 
 Given the following string, provide the answer after inserting the characters according to the pattern: {string}
 """
diff --git a/reasoning_gym/algorithmic/string_manipulation.py b/reasoning_gym/algorithmic/string_manipulation.py
index b382921f..434a241e 100644
--- a/reasoning_gym/algorithmic/string_manipulation.py
+++ b/reasoning_gym/algorithmic/string_manipulation.py
@@ -17,18 +17,7 @@
 Once you have applied a rule, repeat the process with the new string until no further transformations can be performed (i.e. the string doesn't change), or a state is repeated.
 If a state is repeated, the process is terminated, and the repeated state is discarded (i.e. is not considered as the final answer) and the state before the repeated state is considered as the final answer.
 
-Example:
-- Input:
-    - String: abbac
-    - Rules:
-        1. If the string prefix is 'ab', replace it with 'ca'.
-        2. If the string prefix is 'ca', replace it with 'bb' and append 'c' to the end.
-        3. If the string ends with 'aa', replace it with 'cc'.
-- Output: bbbacc
-- Explanation:
-    - In the first iteration, rule 1 is applied to the string abbac, resulting in cabac
-    - In the second interation, rule 1 doesn't apply, but rule 2 is applied to the string cabac, resulting in bbbacc
-    - In the third iteration, none of the rules (1, 2, 3) apply, so the process is terminated, and the final answer is bbbacc
+Your output should be the final transformed string after applying all the rules.
 
 Transform the following string according to the above list of rules:
 {string}
diff --git a/reasoning_gym/algorithmic/string_splitting.py b/reasoning_gym/algorithmic/string_splitting.py
index da3b82e0..6679e812 100644
--- a/reasoning_gym/algorithmic/string_splitting.py
+++ b/reasoning_gym/algorithmic/string_splitting.py
@@ -23,17 +23,6 @@
 The output should be the count of each machine and part type after the rules have been exhaustively applied in the following order: A B C X Y Z.
 For example 1 0 1 5 4 3 means that you have 1 machine A, 0 machine B, 1 machine C, 5 part X, 4 part Y, and 3 part Z.
 
-Example:
-- Input: You have 2 machines A, 0 machines B, and 1 machine C.
-- Output: 0 0 1 2 0 2
-- Explanation
-    0. Initial state: 2 0 1 0 0 0
-    1. We can apply rule 1 and trade 1 machine A for 2 part X and 1 part Y: 1 0 1 2 1 0
-    2. Starting over, we can apply rule 1 again: 0 0 1 4 2 0
-    3. In the next iteration, we can apply rule 5 and trade 1 part X and 1 part Y for 1 part Z: 0 0 1 3 1 1
-    4. In the next iteration, we can apply rule 5 again: 0 0 1 2 0 2
-    5. We can't apply any more rules, so the final answer is 0 0 1 2 0 2
-
 Now, you have {A_machine} machine A, {B_machine} machine B, and {C_machine} machine C. Provide the count of each machine and part type after applying the above rules.
 """
 
diff --git a/reasoning_gym/algorithmic/string_synthesis.py b/reasoning_gym/algorithmic/string_synthesis.py
index c78ed35b..63cafa98 100644
--- a/reasoning_gym/algorithmic/string_synthesis.py
+++ b/reasoning_gym/algorithmic/string_synthesis.py
@@ -23,16 +23,6 @@
 The output should be the count of each block type after the rules have been applied in the order they are listed above.
 For example 1 0 3 0 2 0 0 0 1 means that you have 1 [A] 0 [B] 3 [C] 0 {{A}} 2 {{B}} 0 {{C}} 0 (A) 0 (B) 1 (C).
 
-Example:
-- Input: You have 2 [A], 3 [B], and 3 [C].
-- Output: 0 0 0 2 1 0 0 0 0
-- Explanation:
-    0. Initial state: 2 3 3 0 0 0 0 0 0
-    1. We can apply Rule 1 and obtain 1 {{A}}. New state: 1 2 2 1 0 0 0 0 0
-    2. We can apply Rule 1 again and obtain 1 {{A}}. New state 0 1 1 2 0 0 0 0 0
-    3. We can apply Rule 3 and obtain 1 {{B}}. New state 0 0 0 2 1 0 0 0 0
-    4. No more rules can be applied. The answer is 0 0 0 2 1 0 0 0 0
-
 Now, you have {A_square} [A], {B_square} [B], and {C_square} [C] blocks. Provide the count of each block type after applying the above rules.
 """
 
diff --git a/reasoning_gym/algorithmic/word_sorting.py b/reasoning_gym/algorithmic/word_sorting.py
index 61c8b61d..fc65c976 100644
--- a/reasoning_gym/algorithmic/word_sorting.py
+++ b/reasoning_gym/algorithmic/word_sorting.py
@@ -21,16 +21,7 @@ class TextTransformation(StrEnum):
 
 QUESTION_TEMPLATE = """Your task is to sort words in ascending or descending order using ASCII/Unicode ordering.
 
-Example:
-- Input: Sort these words in ascending order (using ASCII/Unicode ordering) and return them as a comma-separated list: freely, idea, indemnify, last, END, solving
-- Output: END, freely, idea, indemnify, last, solving
-- Explanation:
-    - Uppercase letters come before lowercase letters, hence why "END" comes first.
-    - "freely" comes before "idea" because "f" comes before "i".
-    - "idea" comes before "indemnify" because even though they both start with "i", "d" comes before "n".
-    - "indemnify" comes before "last" because "i" comes before "l".
-    - "last" comes before "solving" because "l" comes before "s".
-    - Finally, the output is provided as a comma separated list of the sorted words.
+Your output should be a comma-separated list of words, e.g. word_1, word_2, word_3
 
 Now, sort these words in {direction} order (using ASCII/Unicode ordering) and return them as a comma-separated list: {words}
 """
diff --git a/reasoning_gym/arithmetic/leg_counting.py b/reasoning_gym/arithmetic/leg_counting.py
index f7da2d35..3acc2f32 100644
--- a/reasoning_gym/arithmetic/leg_counting.py
+++ b/reasoning_gym/arithmetic/leg_counting.py
@@ -56,16 +56,6 @@
 
 QUESTION_TEMPLATE = """Your task is to count how many legs there are in total when given a list of animals.
 
-Example:
-- Input: How many legs are there in total if you have 1 duck, 2 deers, 1 spider, 3 cows?
-- Output: 30
-- Explanation:
-    - Ducks have 2 legs each, so 1 duck has 2 legs.
-    - Deers have 4 legs each, so 2 deers have 8 legs.
-    - Spiders have 8 legs each, so 1 spider has 8 legs.
-    - Cows have 4 legs each, so 3 cows have 12 legs.
-    - Therefore, the total number of legs is 2 + 8 + 8 + 12 = 30
-
 Now, how many legs are there in total if you have {animals}?
 """
 
diff --git a/reasoning_gym/arithmetic/number_format.py b/reasoning_gym/arithmetic/number_format.py
index 36e66c1d..0c2b79d5 100644
--- a/reasoning_gym/arithmetic/number_format.py
+++ b/reasoning_gym/arithmetic/number_format.py
@@ -8,12 +8,7 @@
 
 QUESTION_TEMPLATE = """Your task is to pick the largest/smallest number out of several options.
 
-Example
-- Input: Pick the largest number of the following candidates: 857575.23 8.975554e+05 887,555.62
-- Output: 8.975554e+05
-- Explanation:
-    - Sorting the numbers written in various notations we get: 857575.23 < 887,555.62 < 8.975554e+05
-    - Therefore, the largest number is 8.975554e+05
+Your output should be only the number of interest.
 
 Now, pick the {size} number of the following candidates: {numbers}
 """
diff --git a/reasoning_gym/arithmetic/power_function.py b/reasoning_gym/arithmetic/power_function.py
index a4fb93c7..879f4848 100644
--- a/reasoning_gym/arithmetic/power_function.py
+++ b/reasoning_gym/arithmetic/power_function.py
@@ -9,20 +9,6 @@
 
 QUESTION_TEMPLATE = """Your task is to compute an exponentiation of a number.
 
-Example:
-- Input: Compute 2^3
-- Output: 8
-- Explanation:
-    - 2^3 = 2 * 2 * 2 = 8
-    - Therefore, the final answer is 8
-
-Example:
-- Input: Compute 412.5^3
-- Output: 70189453.125
-- Explanation:
-    - 412.5^3 = 412.5 * 412.5 * 412.5 = 70189453.125
-    - Therefore, the final answer is 70189453.125
-
 Compute {base}^{exponent}
 """
 
diff --git a/reasoning_gym/cognition/rectangle_count.py b/reasoning_gym/cognition/rectangle_count.py
index 5a86d467..b258b615 100644
--- a/reasoning_gym/cognition/rectangle_count.py
+++ b/reasoning_gym/cognition/rectangle_count.py
@@ -8,33 +8,7 @@
 
 Single rectangles are outlined with a '#', overlapping rectangles (max 2) are shown with '█'.
 
-Example:
-- Input: How many rectangles are in the grid below?
-
-              ####
-              #  #
-              ####
-
-
-
-
-
-
-
-
-
-
- #########
- #       █##
- #       █ #
- ########█ #
-         # #
-         ###
-- Output: 3
-- Explanation:
-    - The first rectangle is the 3x4 rectangle in the top right.
-    - The other two rectangles are overlapping in the bottom left corner.
-    - Therefore, the final answer is 3.
+Your output should be a single number, representing the total count of rectangles.
 
 Now, it's your turn. How many rectangles do you see in the grid below?
 {puzzle}
diff --git a/reasoning_gym/games/countdown.py b/reasoning_gym/games/countdown.py
index 88ad913d..751c67d7 100644
--- a/reasoning_gym/games/countdown.py
+++ b/reasoning_gym/games/countdown.py
@@ -9,6 +9,7 @@
 from ..factory import ProceduralDataset, register_dataset
 
 QUESTION_FORMAT_TEMPLATE = """{question}
+
 Final answer format instructions:
 1. Provide your solution as a arithmetic expression (no '=' sign).
 2. Do not include the target number in the expression.
diff --git a/reasoning_gym/games/emoji_mystery.py b/reasoning_gym/games/emoji_mystery.py
index 59b20b16..27f48d87 100644
--- a/reasoning_gym/games/emoji_mystery.py
+++ b/reasoning_gym/games/emoji_mystery.py
@@ -118,8 +118,8 @@
     "🤍",
 ]
 
-
-hint_function = """
+# Keep the hint function in a separate variable to control the visibility of the hint
+hint_function = """Here is a hint:
 ```python
 def variance_selector_to_byte(variation_selector):
     variation_selector_codepoint = ord(variation_selector)
@@ -129,6 +129,7 @@ def variance_selector_to_byte(variation_selector):
         return variation_selector_codepoint - 0xE0100 + 16
     else:
         return None
+
 def decode(encoded_sentence):
     decoded_bytes = []
     variation_selectors_part = encoded_sentence[1:]
@@ -141,14 +142,14 @@ def decode(encoded_sentence):
 """
 
 
-QUESTION_TEMPLATE = "\n".join(
-    [
-        "The following emoji is encoded with a sentence.",
-        "Decode the following sentence from the emoji: {sentence}",
-        "Here is a hint: {hint_function}",
-        "Return the secret sentence as your final answer.",
-    ]
-)
+QUESTION_TEMPLATE = """The following emoji is encoded with a sentence.
+
+Decode the following sentence from the emoji: {sentence}
+
+{hint_function}
+
+Return the secret sentence as your final answer.
+"""
 
 
 @dataclass
diff --git a/reasoning_gym/games/mahjong.py b/reasoning_gym/games/mahjong.py
index 7816320b..77071057 100644
--- a/reasoning_gym/games/mahjong.py
+++ b/reasoning_gym/games/mahjong.py
@@ -19,13 +19,7 @@
 6. "Peng" takes precedence over "Chi".
 7. The card that is removed does not affect the result determination of the current round.
 
-Example:
-- Input: Given the initial cards ABBCCDDEEFFGH, what is the result at the end of performing the following rounds of operations:
-Round 1: Add a B card and remove an E card.
-Round 2: Add a C card and remove an H card.
-Round 3: Add an E card and remove a D card.
-Round 4: Add a D card and remove an F card.
-- Output: Chi
+Your output should be one of the following: "Peng", "Chi", or "Pass" (without quotes).
 
 Now, given the initial cards {cards}, what is the result at the end of performing the following rounds of operations:
 {operations}
diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py
index 61f6ea66..8fcecfd4 100644
--- a/reasoning_gym/games/n_queens.py
+++ b/reasoning_gym/games/n_queens.py
@@ -20,20 +20,7 @@
 
 You can place a queen by replacing an underscore (_) with a Q.
 
-Example:
-- Input: Given the below board of size 4 x 4 your job is to place 2 queen(s) on the board such that no two queens attack each other.
-_ Q _ _
-_ _ _ _
-_ _ _ _
-_ _ Q _
-- Output:
-_ Q _ _
-_ _ _ Q
-Q _ _ _
-_ _ Q _
-- Explanation
-    - None of the queens attack each other vertically, horizontally, or diagonally.
-    - The added queens are marked with Q at the positions (1, 3) and (2, 0).
+Your output should be also a board in the same format as the input, with queens placed on the board by replacing underscores with the letter Q.
 
 Given the below board of size {n} x {n} your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
 {puzzle}
diff --git a/reasoning_gym/games/tower_of_hanoi.py b/reasoning_gym/games/tower_of_hanoi.py
index 47afbb05..052d72bf 100644
--- a/reasoning_gym/games/tower_of_hanoi.py
+++ b/reasoning_gym/games/tower_of_hanoi.py
@@ -13,16 +13,13 @@
 - Only one disk can be moved at a time.
 - A larger disk cannot be placed on top of a smaller disk.
 - All disks must be on a peg at all times.
-Example:
-Move disk 1 from Peg 1 to Peg 3
-Move disk 2 from Peg 1 to Peg 2
-Move disk 1 from Peg 3 to Peg 2
 
 Provide the sequence of moves.
+
 Formatting guidelines:
-Each instruction should be placed on a single line.
-Each line should be formatted as 'Move disk X from Peg Y to Peg Z'
-Do not include any other text or formatting.
+- Each instruction should be placed on a single line.
+- Each line should be formatted as 'Move disk X from Peg Y to Peg Z'
+- Do not include any other text or formatting.
 """
 
 
diff --git a/reasoning_gym/geometry/advanced_geometry.py b/reasoning_gym/geometry/advanced_geometry.py
index 90c8b463..816401e4 100644
--- a/reasoning_gym/geometry/advanced_geometry.py
+++ b/reasoning_gym/geometry/advanced_geometry.py
@@ -36,16 +36,12 @@ def validate(self):
         assert len(self.task_types) > 0, "Must specify at least one task type."
 
 
-# Join format instructions into a single string
-GEOMETRY_FORMAT_INSTRUCTIONS = "\n".join(
-    [
-        "For all geometry problems:",
-        "1. Give coordinates in the form (x, y)",
-        "2. Round decimal answers to 3 decimal places",
-        "3. Use the degree symbol ° for angles",
-        "4. Return only th angle, coordinates, or radius as your answer.",
-    ]
-)
+GEOMETRY_FORMAT_INSTRUCTIONS = """For all geometry problems:
+1. Give coordinates in the form (x, y)
+2. Round decimal answers to 3 decimal places
+3. Use the degree symbol ° for angles
+4. Return only the angle, coordinates, or radius as your answer.
+"""
 
 
 class AdvancedGeometryDataset(ProceduralDataset):
diff --git a/reasoning_gym/graphs/shortest_path.py b/reasoning_gym/graphs/shortest_path.py
index 0b6f4def..d915e114 100644
--- a/reasoning_gym/graphs/shortest_path.py
+++ b/reasoning_gym/graphs/shortest_path.py
@@ -16,23 +16,12 @@
 - X: a blocked cell
 
 Therefore, you need to find the shortest path from * to #, moving only through open cells.
+
+You may only move in four directions: up, down, left, and right.
+
 If there is no path from * to #, simply write "infeasible" (without quotes).
 
-Example 1:
-- Input: Find the length of the shortest path from * to # in the following grid:
-    X X X X X
-    X * O O X
-    X O X O X
-    X X X O #
-- Output: right right down down right
-
-Example 2:
-- Input: Find the length of the shortest path from * to # in the following grid:
-    X X X X X
-    X * O O X
-    X O X O X
-    X X X X #
-- Output: infeasible
+Your output should be a sequence of directions that leads from * to #, e.g. right right down down up left
 
 Now, find the length of the shortest path from * to # in the following grid:
 {grid}
diff --git a/reasoning_gym/logic/propositional_logic.py b/reasoning_gym/logic/propositional_logic.py
index dec8a5a7..8732bc1b 100644
--- a/reasoning_gym/logic/propositional_logic.py
+++ b/reasoning_gym/logic/propositional_logic.py
@@ -62,22 +62,21 @@ class Operator(StrEnum):
     IFF = "↔"
 
 
-QUESTION_FORMAT = "\n".join(
-    [
-        "The following question is a propositional logic reasoning question.",
-        "In the question we provide a list of premises",
-        "The task is to infer a correct conclusion from the premise.",
-        "FORMAT INSTRUCTIONS:",
-        "Return the conclusion logic statement, as your final answer.",
-        "Use the following notation to denote symbols",
-        "OR = \u2228",
-        "AND = \u2227",
-        "IMPLIES = \u2192",
-        "IFF = \u2194",
-        "NOT = \u00ac",
-        "Here is the question:",
-    ]
-)
+QUESTION_FORMAT = """The following question is a propositional logic reasoning question.
+
+In the question we provide a list of premises. The task is to infer a correct conclusion from the premise.
+
+FORMAT INSTRUCTIONS:
+- Return the conclusion logic statement, as your final answer.
+- Use the following notation to denote symbols
+    - OR = \u2228
+    - AND = \u2227
+    - IMPLIES = \u2192
+    - IFF = \u2194
+    - NOT = \u00ac
+
+Here is the question:
+"""
 
 
 @dataclass