diff --git a/GALLERY.md b/GALLERY.md index be5a3a65..d48a3aa5 100644 --- a/GALLERY.md +++ b/GALLERY.md @@ -32,6 +32,7 @@ This gallery shows examples from all available datasets using their default conf - [prime_factorization](#prime_factorization) - [propositional_logic](#propositional_logic) - [quantum_lock](#quantum_lock) +- [largest_island](#largest_island) - [rubiks_cube](#rubiks_cube) - [sentence_reordering](#sentence_reordering) - [simple_equations](#simple_equations) @@ -1407,6 +1408,93 @@ Metadata: {'difficulty': 10, 'solution_path': ['B', 'B', 'B', 'B', 'B', 'B', 'B' ```` +### largest_island + +Generate a grid with islands and find the largest one + +Default configuration: +```python +rows = 10 +cols = 10 +max_num_islands = 5 +max_island_size = 10 +``` + +Example tasks: +```` +Example 1: +Question: You are given the following 10 x 10 binary matrix grid: +0 0 0 1 0 0 0 0 0 0 +1 1 0 1 0 0 0 0 0 1 +0 1 0 1 1 0 0 0 0 1 +0 1 0 0 0 0 0 0 0 1 +0 0 0 0 0 0 0 0 0 1 +0 0 0 0 0 0 0 0 1 1 +0 0 0 0 0 0 0 0 1 0 +0 0 0 0 0 0 0 0 1 0 +1 1 0 1 1 0 0 0 1 1 +1 1 1 1 1 0 0 0 0 0 + +An island is a group of 1's (representing land) connected 4-directionally (horizontal or vertical). +You may assume all four edges of the grid are surrounded by water. + +The area of an island is the number of cells with a value 1 in the island. + +Return the maximum area of an island in grid. If there is no island, return 0. + +Answer: 10 + +Metadata: {'grid': [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [1, 1, 0, 1, 0, 0, 0, 0, 0, 1], [0, 1, 0, 1, 1, 0, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [1, 1, 0, 1, 1, 0, 0, 0, 1, 1], [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]], 'solution': 10} + +Example 2: +Question: You are given the following 10 x 10 binary matrix grid: +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 + +An island is a group of 1's (representing land) connected 4-directionally (horizontal or vertical). +You may assume all four edges of the grid are surrounded by water. + +The area of an island is the number of cells with a value 1 in the island. + +Return the maximum area of an island in grid. If there is no island, return 0. + +Answer: 0 + +Metadata: {'grid': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'solution': 0} + +Example 3: +Question: You are given the following 10 x 10 binary matrix grid: +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +1 1 0 0 0 0 0 0 0 0 +1 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 1 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 1 0 +0 0 0 0 0 0 0 0 0 0 + +An island is a group of 1's (representing land) connected 4-directionally (horizontal or vertical). +You may assume all four edges of the grid are surrounded by water. + +The area of an island is the number of cells with a value 1 in the island. + +Return the maximum area of an island in grid. If there is no island, return 0. + +Answer: 3 + +Metadata: {'grid': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'solution': 3} +```` + ### rubiks_cube Generates RubiksCube tasks diff --git a/README.md b/README.md index a1aa3791..d771db66 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ See the [Dataset Gallery](GALLERY.md) for a complete list of available datasets - `FamilyRelationshipsDataset`: Generate family relationship reasoning tasks with family trees - `QuantumLockDataset`: Generates puzzles which involve stateful arithmetic and a correct sequence of operations +- `LargestIslandDataset`: Generate a grid with islands and find the largest one ### Game Tasks diff --git a/reasoning_gym/graphs/__init__.py b/reasoning_gym/graphs/__init__.py index 6bbe7d67..ee722e38 100644 --- a/reasoning_gym/graphs/__init__.py +++ b/reasoning_gym/graphs/__init__.py @@ -1,4 +1,5 @@ from .family_relationships import FamilyRelationshipsConfig, FamilyRelationshipsDataset +from .largest_island import LargestIslandDataset from .quantum_lock import QuantumLockConfig, QuantumLockDataset __all__ = [ @@ -6,4 +7,5 @@ "FamilyRelationshipsDataset", "QuantumLockConfig", "QuantumLockDataset", + "LargestIslandDataset", ] diff --git a/reasoning_gym/graphs/largest_island.py b/reasoning_gym/graphs/largest_island.py new file mode 100644 index 00000000..ee2615e7 --- /dev/null +++ b/reasoning_gym/graphs/largest_island.py @@ -0,0 +1,149 @@ +"""Find the largest island in a grid of 1s and 0s. + +A popular Leetcode problem: +https://leetcode.com/problems/max-area-of-island/description/ +""" + +from collections import deque +from dataclasses import dataclass +from random import Random +from typing import List, Optional + +from ..factory import ProceduralDataset, register_dataset + +MIN_MAP_DIM = 1 + +QUESTION_TEMPLATE = """You are given the following {rows} x {cols} binary matrix grid: +{grid} + +An island is a group of 1's (representing land) connected 4-directionally (horizontal or vertical). +You may assume all four edges of the grid are surrounded by water. + +The area of an island is the number of cells with a value 1 in the island. + +Return the maximum area of an island in grid. If there is no island, return 0. +""" + + +@dataclass +class LargestIslandConfig: + """Configuration for Largest Island dataset generation""" + + rows: int = 10 # Number of rows in the grid + cols: int = 10 # Number of columns in the grid + max_num_islands: int = ( + 5 # Maximum number of islands (actual max might be smaller due to merging of islands during random walk) + ) + max_island_size: int = ( + 10 # Maximum size of an island (actual max might be larger due to merging of islands during random walk) + ) + + size: int = 500 # Virtual dataset size + seed: Optional[int] = None + + def validate(self): + """Validate configuration parameters""" + assert MIN_MAP_DIM <= self.rows, f"rows must be between larger than {MIN_MAP_DIM}" + assert MIN_MAP_DIM <= self.cols, f"cols must be between larger than {MIN_MAP_DIM}" + assert 0 <= self.max_num_islands, "max_num_islands must be non-negative" + assert 0 <= self.max_island_size, "max_island_size must be non-negative" + + +class LargestIslandDataset(ProceduralDataset): + """Generates Largest Island exercises with configurable difficulty""" + + def __init__(self, config: LargestIslandConfig): + super().__init__(config=config, seed=config.seed, size=config.size) + + def __len__(self) -> int: + return self.config.size + + def __iter__(self): + self._current_idx = 0 + return self + + def __next__(self): + if self._current_idx >= self.config.size: + raise StopIteration + item = self[self._current_idx] + self._current_idx += 1 + return item + + def _is_valid_cell(self, r: int, c: int) -> bool: + return 0 <= r < self.config.rows and 0 <= c < self.config.cols + + def _create_grid(self, rng: Random) -> List[List[int]]: + """Create a random grid of islands using a random walk algorithm""" + grid = [[0] * self.config.cols for _ in range(self.config.rows)] + directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] # Up, Down, Left, Right + + def create_island(): + r, c = rng.randint(0, self.config.rows - 1), rng.randint(0, self.config.cols - 1) + capped_size = min(rng.randint(0, self.config.max_island_size), self.config.rows * self.config.cols) + for _ in range(capped_size): + grid[r][c] = 1 + rng.shuffle(directions) + for dr, dc in directions: + new_r, new_c = r + dr, c + dc + if self._is_valid_cell(new_r, new_c) and grid[new_r][new_c] == 0: + r, c = new_r, new_c + break + + num_islands = rng.randint(0, self.config.max_num_islands) + for _ in range(num_islands): + create_island() + + return grid + + def _get_largest_island(self, grid: List[List[int]]) -> int: + """Find the largest island in the grid""" + directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] # Up, Down, Left, Right + visited = set() + + def bfs(r, c): + area = 1 + visited.add((r, c)) + queue = deque([(r, c)]) + while queue: + r, c = queue.popleft() + for dr, dc in directions: + new_r, new_c = r + dr, c + dc + if self._is_valid_cell(new_r, new_c) and (new_r, new_c) not in visited and grid[new_r][new_c] == 1: + area += 1 + visited.add((new_r, new_c)) + queue.append((new_r, new_c)) + return area + + max_area = 0 + for r in range(self.config.rows): + for c in range(self.config.cols): + if grid[r][c] == 1 and (r, c) not in visited: + max_area = max(max_area, bfs(r, c)) + + return max_area + + def _grid_to_string(self, grid: List[List[int]]) -> str: + """Convert grid to a string representation""" + return "\n".join(" ".join(str(cell) for cell in row) for row in grid) + + def _string_to_board(self, grid_str: str) -> List[List[int]]: + """Convert string representation to a grid""" + return [[int(cell) for cell in row.split()] for row in grid_str.split("\n")] + + def __getitem__(self, idx: int) -> dict: + """Generate a single Largest Island question""" + rng = Random(self.seed + idx) + + grid = self._create_grid(rng) + grid_str = self._grid_to_string(grid) + + answer = self._get_largest_island(grid) + + return { + "question": QUESTION_TEMPLATE.format(rows=self.config.rows, cols=self.config.cols, grid=grid_str), + "answer": str(answer), + "metadata": {"grid": grid, "solution": answer}, + } + + +register_dataset("largest_island", LargestIslandDataset, LargestIslandConfig) diff --git a/tests/test_largest_island.py b/tests/test_largest_island.py new file mode 100644 index 00000000..70475dd6 --- /dev/null +++ b/tests/test_largest_island.py @@ -0,0 +1,127 @@ +"""Tests for Largest Island puzzle generation""" + +import pytest + +from reasoning_gym.graphs.largest_island import LargestIslandConfig, LargestIslandDataset + + +def test_largest_island_config_validation(): + """Test that invalid configs raise appropriate errors""" + with pytest.raises(AssertionError): + config = LargestIslandConfig(rows=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = LargestIslandConfig(rows=0) # Zero not allowed + config.validate() + + with pytest.raises(AssertionError): + config = LargestIslandConfig(cols=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = LargestIslandConfig(cols=0) # Zero not allowed + config.validate() + + with pytest.raises(AssertionError): + config = LargestIslandConfig(max_num_islands=-1) # Negative not allowed + config.validate() + + with pytest.raises(AssertionError): + config = LargestIslandConfig(max_island_size=-1) # Negative not allowed + config.validate() + + +def test_largest_island_dataset_deterministic(): + """Test that dataset generates same items with same seed""" + config = LargestIslandConfig(seed=42, size=10) + dataset1 = LargestIslandDataset(config) + dataset2 = LargestIslandDataset(config) + + for i in range(len(dataset1)): + assert dataset1[i] == dataset2[i] + + +def test_largest_island_dataset_items(): + """Test basic properties of generated items""" + config = LargestIslandConfig(rows=8, cols=8, max_island_size=5, size=10, seed=42) + dataset = LargestIslandDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + # Check item structure + assert isinstance(item, dict) + assert "question" in item + assert "answer" in item + assert "metadata" in item + + # Check metadata + assert "grid" in item["metadata"] + assert "solution" in item["metadata"] + + grid = item["metadata"]["grid"] + solution = item["metadata"]["solution"] + + # Verify grid dimensions + assert len(grid) == 8 + assert all(len(row) == 8 for row in grid) + assert 0 <= solution <= 5 + + +def test_largest_island_dataset_iteration(): + """Test that iteration respects dataset size""" + config = LargestIslandConfig(size=5, seed=42) + dataset = LargestIslandDataset(config) + + items = list(dataset) + assert len(items) == config.size + + # Test multiple iterations yield same items + assert items == list(dataset) + + +def test_largest_island_grid_generation(): + """Test that generated grids are valid""" + config = LargestIslandConfig(rows=10, cols=10, max_island_size=3, size=5, seed=42) + dataset = LargestIslandDataset(config) + + for i in range(len(dataset)): + item = dataset[i] + assert item["metadata"]["solution"] <= 3 + for row in item["metadata"]["grid"]: + assert all(cell in {0, 1} for cell in row) + + +def test_largest_island_answer(): + """Test the _get_largest_island method""" + config = LargestIslandConfig(rows=5, cols=5, seed=42) + dataset = LargestIslandDataset(config) + + grid = [ + [1, 1, 0, 0, 0], + [1, 0, 0, 0, 1], + [0, 0, 0, 1, 1], + [0, 0, 0, 1, 1], + [0, 0, 0, 1, 1], + ] + assert dataset._get_largest_island(grid) == 7 + + # Test empty grid + grid = [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + assert dataset._get_largest_island(grid) == 0 + + # Test neighboring grids are only horizontally or vertically connected (not diagonally) + grid = [ + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [0, 0, 0, 1, 1], + [0, 0, 0, 1, 1], + ] + assert dataset._get_largest_island(grid) == 9