Merge pull request #47 from zafstojano/feat/n-queens

feat(env): N Queens
open-thought · Feb 2, 2025 · 3dd5a4d · 3dd5a4d
2 parents c4c0897 + 1912c57
commit 3dd5a4d
Show file tree

Hide file tree

Showing 5 changed files with 408 additions and 0 deletions.
diff --git a/GALLERY.md b/GALLERY.md
@@ -36,6 +36,7 @@ This gallery shows examples from all available datasets using their default conf
 - [simple_geometry](#simple_geometry)
 - [spell_backward](#spell_backward)
 - [sudoku](#sudoku)
+- [n_queens](#n_queens)
 - [syllogism](#syllogism)
 - [time_intervals](#time_intervals)
 - [tower_of_hanoi](#tower_of_hanoi)
@@ -1545,6 +1546,130 @@ Metadata: {'puzzle': [[0, 0, 1, 2, 3, 0, 0, 0, 9], [3, 0, 0, 1, 8, 5, 6, 7, 2],
 
 ````
 
+
+### n_queens
+
+Generates N-Queens puzzles with configurable board size and number of starting queens
+
+Default configuration:
+```python
+n = 8
+min_remove = 1
+max_remove = 7
+size = 500
+```
+
+Example tasks:
+````
+Example 1
+Question: Solve this N Queens puzzle:
+_ _ _ _ _ _ Q _
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+_ _ _ _ Q _ _ _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+The board size is 8x8 and your job is to place 1 queen(s) on the board such that no two queens attack each other.
+
+No two queens attack each other if they are not in the same row, column, or diagonal.
+
+Place a queen by replacing an underscore (_) with a Q.
+
+Answer 1:
+_ _ _ _ _ _ Q _
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+_ _ _ _ Q _ _ _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 1}
+
+Example 2
+Question: Solve this N Queens puzzle:
+_ Q _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ Q _ _
+_ _ _ _ _ _ _ Q
+_ _ _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ _ _ Q _ _ _
+
+The board size is 8x8 and your job is to place 3 queen(s) on the board such that no two queens attack each other.
+
+No two queens attack each other if they are not in the same row, column, or diagonal.
+
+Place a queen by replacing an underscore (_) with a Q.
+
+Answer 1:
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+_ _ _ _ _ Q _ _
+_ _ _ _ _ _ _ Q
+_ _ Q _ _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ _ _ Q _ _ _
+
+Metadata: {'puzzle': [['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']], 'solution': [[['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_']]], 'num_removed': 3}
+
+Example 3
+Question: Solve this N Queens puzzle:
+_ _ _ _ _ _ _ _
+_ Q _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+The board size is 8x8 and your job is to place 5 queen(s) on the board such that no two queens attack each other.
+
+No two queens attack each other if they are not in the same row, column, or diagonal.
+
+Place a queen by replacing an underscore (_) with a Q.
+
+Answer 1:
+_ _ _ _ Q _ _ _
+_ Q _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+Q _ _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+Answer 2:
+_ _ _ _ _ _ Q _
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ _ Q
+_ _ _ _ Q _ _ _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+Answer 3:
+_ _ _ _ _ _ _ Q
+_ Q _ _ _ _ _ _
+_ _ _ Q _ _ _ _
+Q _ _ _ _ _ _ _
+_ _ _ _ _ _ Q _
+_ _ _ _ Q _ _ _
+_ _ Q _ _ _ _ _
+_ _ _ _ _ Q _ _
+
+Metadata: {'puzzle': [['_', '_', '_', '_', '_', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], 'solution': [[['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']], [['_', '_', '_', '_', '_', '_', '_', 'Q'], ['_', 'Q', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', 'Q', '_', '_', '_', '_'], ['Q', '_', '_', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', '_', 'Q', '_'], ['_', '_', '_', '_', 'Q', '_', '_', '_'], ['_', '_', 'Q', '_', '_', '_', '_', '_'], ['_', '_', '_', '_', '_', 'Q', '_', '_']]], 'num_removed': 5}
+````
+
+
 ### syllogism
 Generates syllogism reasoning tasks
 

diff --git a/README.md b/README.md
@@ -126,6 +126,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets
 - `MiniSudokuDataset`: Generate 4x4 Mini Sudoku puzzles with configurable difficulty
 - `MazeDataset`: Generate a maze with a start and a goal
 - `CountdownDataset`: Generate number game tasks where numbers and operators must be combined to reach a target value
+- `NQueensDataset`: Generate N-Queens puzzles with configurable board size and number of starting queens
 
 ## Future Generator Ideas
 

diff --git a/reasoning_gym/games/__init__.py b/reasoning_gym/games/__init__.py
@@ -10,6 +10,7 @@
 from .game_of_life import GameOfLifeConfig, GameOfLifeDataset
 from .maze import MazeConfig, MazeDataset
 from .mini_sudoku import MiniSudokuConfig, MiniSudokuDataset
+from .n_queens import NQueensDataset
 from .sudoku import SudokuConfig, SudokuDataset
 from .tower_of_hanoi import HanoiConfig, HanoiDataset
 
@@ -26,4 +27,5 @@
     "GameOfLifeDataset",
     "HanoiConfig",
     "HanoiDataset",
+    "NQueensDataset",
 ]
diff --git a/reasoning_gym/games/n_queens.py b/reasoning_gym/games/n_queens.py
@@ -0,0 +1,158 @@
+"""N Queens puzzle generator
+
+A generalization of the 8-queens puzzle to any board size.
+https://en.wikipedia.org/wiki/Eight_queens_puzzle
+"""
+
+from copy import deepcopy
+from dataclasses import dataclass
+from random import Random
+from typing import Dict, List, Optional
+
+from ..factory import ProceduralDataset, register_dataset
+
+MIN_BOARD_SIZE = 4
+MAX_BOARD_SIZE = 12
+
+QUESTION_TEMPLATE = """Solve this N Queens puzzle:
+{puzzle}
+
+The board size is {n}x{n} and your job is to place {num_removed} queen(s) on the board such that no two queens attack each other.
+
+No two queens attack each other if they are not in the same row, column, or diagonal.
+
+Place a queen by replacing an underscore (_) with a Q.
+"""
+
+
+@dataclass
+class NQueensConfig:
+    """Configuration for N Queens puzzle generation"""
+
+    n: int = 8  # Board size
+    min_remove: int = 1  # Minimum number of queens to remove from solved board
+    max_remove: int = 7  # Maximum number of queens to remove from solved board
+
+    size: int = 500  # Virtual dataset size
+    seed: Optional[int] = None
+
+    def validate(self):
+        """Validate configuration parameters"""
+        assert MIN_BOARD_SIZE <= self.n <= MAX_BOARD_SIZE, f"n must be between {MIN_BOARD_SIZE} and {MAX_BOARD_SIZE}"
+        assert 1 <= self.min_remove <= self.max_remove, "min_remove must be between 1 and max_remove"
+        assert self.min_remove <= self.max_remove <= self.n, "max_remove must be between min_remove and n"
+
+
+class NQueensDataset(ProceduralDataset):
+    """Generates N Queens puzzles with configurable difficulty"""
+
+    def __init__(self, config: NQueensConfig):
+        super().__init__(config=config, seed=config.seed, size=config.size)
+        self._solutions = self._get_all_solutions(config.n)
+
+    def __len__(self) -> int:
+        return self.config.size
+
+    def __iter__(self):
+        self._current_idx = 0
+        return self
+
+    def __next__(self):
+        if self._current_idx >= self.config.size:
+            raise StopIteration
+        item = self[self._current_idx]
+        self._current_idx += 1
+        return item
+
+    def _get_all_solutions(self, n: int) -> List[List[List[str]]]:
+        """Get all solutions for the N Queens puzzle"""
+
+        visited_cols = set()
+        visited_pos_diag = set()
+        visited_neg_diag = set()
+
+        res = []
+        board = [["_"] * n for _ in range(n)]
+
+        def backtrack(row: int):
+            if row == n:
+                res.append(deepcopy(board))
+                return
+
+            for col in range(n):
+                if col in visited_cols or (row + col) in visited_pos_diag or (row - col) in visited_neg_diag:
+                    continue
+
+                visited_cols.add(col)
+                visited_pos_diag.add(row + col)
+                visited_neg_diag.add(row - col)
+                board[row][col] = "Q"
+                backtrack(row + 1)
+                visited_cols.remove(col)
+                visited_pos_diag.remove(row + col)
+                visited_neg_diag.remove(row - col)
+                board[row][col] = "_"
+
+        backtrack(0)
+        return res
+
+    def _create_puzzle(self, solved_board: List[List[str]], num_removed: int, rng: Random) -> List[List[str]]:
+        """Create puzzle by removing queens from solved board"""
+        puzzle = deepcopy(solved_board)
+        queens = [(i, j) for i in range(len(puzzle)) for j in range(len(puzzle)) if puzzle[i][j] == "Q"]
+        rng.shuffle(queens)
+        for i in range(num_removed):
+            x, y = queens[i]
+            puzzle[x][y] = "_"
+        return puzzle
+
+    def _board_to_string(self, board: List[List[str]]) -> str:
+        """Convert board to string representation"""
+        return "\n".join(" ".join(x for x in row) for row in board)
+
+    def _string_to_board(self, board_str: str) -> List[List[str]]:
+        """Convert string representation to board"""
+        return [list(row.split()) for row in board_str.strip().split("\n")]
+
+    def _is_tractable_solution(self, puzzle: List[List[str]], solution: List[List[str]]) -> bool:
+        """Check if a solution is achievable from the starting state of the puzzle"""
+        for r in range(len(puzzle)):
+            for c in range(len(puzzle)):
+                if puzzle[r][c] == "Q" and solution[r][c] != "Q":
+                    return False
+        return True
+
+    def __getitem__(self, idx: int) -> dict:
+        """Generate a single N Queens puzzle"""
+        rng = Random(self.seed + idx)
+
+        # Randomly select a valid solution
+        solved_board = rng.choice(self._solutions)
+
+        # Create puzzle by removing queens
+        num_removed = rng.randint(self.config.min_remove, self.config.max_remove)
+        puzzle = self._create_puzzle(solved_board, num_removed, rng)
+        puzzle_str = self._board_to_string(puzzle)
+
+        # Filter all solutions that are intractable from the puzzle's starting state
+        valid_solutions = [board for board in self._solutions if self._is_tractable_solution(puzzle, board)]
+        valid_solutions_str = {self._board_to_string(board) for board in valid_solutions}
+
+        return {
+            "question": QUESTION_TEMPLATE.format(puzzle=puzzle_str, n=len(puzzle), num_removed=num_removed),
+            "answer": valid_solutions_str,
+            "metadata": {"puzzle": puzzle, "solution": valid_solutions, "num_removed": num_removed},
+        }
+
+    def score_answer(self, answer: Optional[str], entry: Dict[str, any]) -> float:
+        valid_solutions = entry["answer"]
+        reward = 0.0
+        if answer is not None:
+            if answer in valid_solutions:
+                reward = 1.0
+            else:
+                reward = 0.01
+        return reward
+
+
+register_dataset("n_queens", NQueensDataset, NQueensConfig)