post merge lint

open-thought · Feb 2, 2025 · f396d3d · f396d3d
1 parent 02cfa95
commit f396d3d
Show file tree

Hide file tree

Showing 6 changed files with 198 additions and 64 deletions.
diff --git a/GALLERY.md b/GALLERY.md
@@ -3,6 +3,7 @@ This gallery shows examples from all available datasets using their default conf
 
 ## Available Datasets
 - [advanced_geometry](#advanced_geometry)
+- [aiw](#aiw)
 - [base_conversion](#base_conversion)
 - [basic_arithmetic](#basic_arithmetic)
 - [bf](#bf)
@@ -73,6 +74,50 @@ Metadata: {'A': (6, 7), 'B': (-7, -5), 'C': (2, -3), 'incircle_radius_exact': 's
 
 ````
 
+### aiw
+A procedural dataset inspired by the "Alice in Wonderland" paper.
+
+    The dataset is inspired by the following paper:
+       @inproceedings{nezhurina2024alice,
+       title={Alice in Wonderland: Simple Tasks Reveal Severe Generalization and
+              Basic Reasoning Deficits in State-Of-the-Art Large Language Models},
+       author={Marianna Nezhurina and Lucia Cipolina-Kun and Mehdi Cherti and
+              Jenia Jitsev},
+       booktitle={NeurIPS 2024 Workshop on Scientific Methods for Understanding
+                  Deep Learning},
+       year={2024},
+       url={https://openreview.net/forum?id=Mkl7dzjYiW}
+       }
+
+Default configuration:
+```python
+male_names = ['James', 'John', 'Robert', 'Michael', 'William', 'David', 'Richard', 'Joseph', 'Thomas', 'Charles', 'Bob']
+female_names = ['Mary', 'Patricia', 'Jennifer', 'Linda', 'Elizabeth', 'Barbara', 'Susan', 'Jessica', 'Sarah', 'Margaret', 'Alice']
+task_types = [<TaskType.SIBLINGS: 'siblings'>, <TaskType.FRIENDS: 'friends'>, <TaskType.COLLEAGUES: 'colleagues'>]
+seed = 42
+size = 10
+max_entities = 6
+```
+
+Example tasks:
+````
+Example 1:
+Question: Patricia has 6 male colleagues and she also has 3 female colleagues. These are all colleagues that Patricia has. All these mentioned persons around Patricia are colleagues of each other. James has 2 male colleagues and 2 female colleagues in total. All these mentioned persons around James are colleagues of each other. The people in the circle around James do not have other colleagues aside - with the only exception of Matilda. She is colleague of James and she is also colleague of Patricia, being part of Patricia's circle. How many female colleagues does Matilda have?
+Answer: 4
+Metadata: {'task_type': 'colleagues'}
+
+Example 2:
+Question: Elizabeth has 4 brothers and she also has 3 sisters. How many sisters does Elizabeth's brother have?
+Answer: 4
+Metadata: {'task_type': 'siblings'}
+
+Example 3:
+Question: Sarah has 6 male friends and she also has 1 female friends. They all are friends with each other and have no other friends aside. How many female friends does Thomas, a male friend of Sarah, have?
+Answer: 2
+Metadata: {'task_type': 'friends'}
+
+````
+
 ### base_conversion
 Generates base conversion tasks
 
@@ -1548,7 +1593,7 @@ Metadata: {'task_type': 'datetime_tz', 'start_time': datetime.datetime(2964, 6,
 Example 2:
 Question: A video call started at 09:44 and ended at 12:22. How long was the call? Answer in HH:MM.
 Answer: 02:38
-Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 1, 9, 44), 'end_time': datetime.datetime(2025, 2, 1, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
+Metadata: {'task_type': 'time', 'start_time': datetime.datetime(2025, 2, 2, 9, 44), 'end_time': datetime.datetime(2025, 2, 2, 12, 22), 'format': '%H:%M', 'expected_format': 'HH:MM'}
 
 Example 3:
 Question: Calculate the time difference between Sat Dec 22 2677 and Thu Mar 21 2678. Express the result in D days.

diff --git a/reasoning_gym/geometry/advanced_geometry.py b/reasoning_gym/geometry/advanced_geometry.py
@@ -21,11 +21,13 @@ class AdvancedGeometryConfig:
 
     # Probability or list of tasks we want to generate
     # For demonstration, we have three categories:
-    task_types: List[str] = field(default_factory=lambda: [
-        "orthocenter",
-        "incircle_radius",
-        "angle_measure",
-    ])
+    task_types: List[str] = field(
+        default_factory=lambda: [
+            "orthocenter",
+            "incircle_radius",
+            "angle_measure",
+        ]
+    )
 
     def validate(self):
         assert self.min_coord < self.max_coord, "min_coord must be < max_coord."

diff --git a/reasoning_gym/graphs/family_relationships.py b/reasoning_gym/graphs/family_relationships.py
@@ -65,22 +65,100 @@ class FamilyRelationshipsConfig:
 
     min_family_size: int = 4
     max_family_size: int = 8
-    male_names: List[str] = field(default_factory=lambda: [
-        "James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph",
-        "Thomas", "Charles", "Peter", "Daniel", "Matthew", "Christopher", "Andrew",
-        "George", "Edward", "Benjamin", "Henry", "Samuel", "Alexander", "Oliver",
-        "Jack", "Harry", "Jacob", "Noah", "Ethan", "Lucas", "Mason", "Logan",
-        "Sebastian", "Theodore", "Owen", "Liam", "Aiden", "Kai", "Jayden", "Zion",
-        "Phoenix", "Atlas", "Axel", "Ryder", "Finn"
-    ])
-    female_names: List[str] = field(default_factory=lambda: [
-        "Mary", "Patricia", "Jennifer", "Linda", "Elizabeth", "Barbara", "Susan",
-        "Jessica", "Sarah", "Karen", "Emma", "Lisa", "Anna", "Margaret", "Victoria",
-        "Charlotte", "Sophia", "Isabella", "Olivia", "Ava", "Mia", "Emily",
-        "Abigail", "Amelia", "Eleanor", "Grace", "Alice", "Lucy", "Chloe",
-        "Sophie", "Lily", "Hannah", "Zoe", "Luna", "Nova", "Aria", "Willow",
-        "Aurora", "Sage", "River", "Winter", "Sky", "Rain"
-    ])
+    male_names: List[str] = field(
+        default_factory=lambda: [
+            "James",
+            "John",
+            "Robert",
+            "Michael",
+            "William",
+            "David",
+            "Richard",
+            "Joseph",
+            "Thomas",
+            "Charles",
+            "Peter",
+            "Daniel",
+            "Matthew",
+            "Christopher",
+            "Andrew",
+            "George",
+            "Edward",
+            "Benjamin",
+            "Henry",
+            "Samuel",
+            "Alexander",
+            "Oliver",
+            "Jack",
+            "Harry",
+            "Jacob",
+            "Noah",
+            "Ethan",
+            "Lucas",
+            "Mason",
+            "Logan",
+            "Sebastian",
+            "Theodore",
+            "Owen",
+            "Liam",
+            "Aiden",
+            "Kai",
+            "Jayden",
+            "Zion",
+            "Phoenix",
+            "Atlas",
+            "Axel",
+            "Ryder",
+            "Finn",
+        ]
+    )
+    female_names: List[str] = field(
+        default_factory=lambda: [
+            "Mary",
+            "Patricia",
+            "Jennifer",
+            "Linda",
+            "Elizabeth",
+            "Barbara",
+            "Susan",
+            "Jessica",
+            "Sarah",
+            "Karen",
+            "Emma",
+            "Lisa",
+            "Anna",
+            "Margaret",
+            "Victoria",
+            "Charlotte",
+            "Sophia",
+            "Isabella",
+            "Olivia",
+            "Ava",
+            "Mia",
+            "Emily",
+            "Abigail",
+            "Amelia",
+            "Eleanor",
+            "Grace",
+            "Alice",
+            "Lucy",
+            "Chloe",
+            "Sophie",
+            "Lily",
+            "Hannah",
+            "Zoe",
+            "Luna",
+            "Nova",
+            "Aria",
+            "Willow",
+            "Aurora",
+            "Sage",
+            "River",
+            "Winter",
+            "Sky",
+            "Rain",
+        ]
+    )
     seed: Optional[int] = None
     size: int = 500
 

diff --git a/reasoning_gym/logic/__init__.py b/reasoning_gym/logic/__init__.py
@@ -6,10 +6,13 @@
 - Syllogisms
 """
 
+from .aiw import AliceInWonderlandConfig, AliceInWonderlandDataset
 from .propositional_logic import PropositionalLogicConfig, PropositionalLogicDataset
 from .syllogisms import SyllogismConfig, SyllogismDataset, Term
 
 __all__ = [
+    "AliceInWonderlandConfig",
+    "AliceInWonderlandDataset",
     "PropositionalLogicConfig",
     "PropositionalLogicDataset",
     "SyllogismConfig",

diff --git a/reasoning_gym/logic/aiw.py b/reasoning_gym/logic/aiw.py
@@ -1,14 +1,15 @@
 from dataclasses import dataclass, field
-from typing import List, Optional
 from enum import Enum
 from random import Random
 from string import Template
+from typing import List, Optional
 
 from ..factory import ProceduralDataset, register_dataset
 
 
 class TaskType(Enum):
     """Defines the type of task for the Alice in Wonderland dataset."""
+
     SIBLINGS = "siblings"
     FRIENDS = "friends"
     COLLEAGUES = "colleagues"  # Added colleagues task
@@ -26,21 +27,39 @@ class AliceInWonderlandConfig:
         size (int): Number of samples in the dataset.
         max_entities (int): Max number of siblings/friends/colleagues in questions.
     """
+
     male_names: List[str] = field(
         default_factory=lambda: [
-            "James", "John", "Robert", "Michael", "William", "David",
-            "Richard", "Joseph", "Thomas", "Charles", "Bob"
+            "James",
+            "John",
+            "Robert",
+            "Michael",
+            "William",
+            "David",
+            "Richard",
+            "Joseph",
+            "Thomas",
+            "Charles",
+            "Bob",
         ]
     )
     female_names: List[str] = field(
         default_factory=lambda: [
-            "Mary", "Patricia", "Jennifer", "Linda", "Elizabeth",
-            "Barbara", "Susan", "Jessica", "Sarah", "Margaret", "Alice"
+            "Mary",
+            "Patricia",
+            "Jennifer",
+            "Linda",
+            "Elizabeth",
+            "Barbara",
+            "Susan",
+            "Jessica",
+            "Sarah",
+            "Margaret",
+            "Alice",
         ]
     )
     task_types: List[TaskType] = field(
-        default_factory=lambda: [
-            TaskType.SIBLINGS, TaskType.FRIENDS, TaskType.COLLEAGUES]  # Added Colleagues
+        default_factory=lambda: [TaskType.SIBLINGS, TaskType.FRIENDS, TaskType.COLLEAGUES]  # Added Colleagues
     )
     seed: Optional[int] = None
     size: int = 10
@@ -57,19 +76,19 @@ def validate(self) -> None:
 
 class AliceInWonderlandDataset(ProceduralDataset):
     """
-     A procedural dataset inspired by the "Alice in Wonderland" paper.
-
-     The dataset is inspired by the following paper:
-        @inproceedings{nezhurina2024alice,
-        title={Alice in Wonderland: Simple Tasks Reveal Severe Generalization and
-               Basic Reasoning Deficits in State-Of-the-Art Large Language Models},
-        author={Marianna Nezhurina and Lucia Cipolina-Kun and Mehdi Cherti and
-               Jenia Jitsev},
-        booktitle={NeurIPS 2024 Workshop on Scientific Methods for Understanding
-                   Deep Learning},
-        year={2024},
-        url={https://openreview.net/forum?id=Mkl7dzjYiW}
-        }
+    A procedural dataset inspired by the "Alice in Wonderland" paper.
+
+    The dataset is inspired by the following paper:
+       @inproceedings{nezhurina2024alice,
+       title={Alice in Wonderland: Simple Tasks Reveal Severe Generalization and
+              Basic Reasoning Deficits in State-Of-the-Art Large Language Models},
+       author={Marianna Nezhurina and Lucia Cipolina-Kun and Mehdi Cherti and
+              Jenia Jitsev},
+       booktitle={NeurIPS 2024 Workshop on Scientific Methods for Understanding
+                  Deep Learning},
+       year={2024},
+       url={https://openreview.net/forum?id=Mkl7dzjYiW}
+       }
 
     """
 
@@ -152,14 +171,10 @@ def _get_aiw(self, rng: Random) -> dict:
                 num_female=num_female,
             )
         elif task_type == TaskType.COLLEAGUES:
-            num_male_colleagues_alice_circle = rng.randint(
-                1, self.config.max_entities)
-            num_female_colleagues_alice_circle = rng.randint(
-                1, self.config.max_entities)
-            num_male_colleagues_bob_circle = rng.randint(
-                1, self.config.max_entities)
-            num_female_colleagues_bob_circle = rng.randint(
-                1, self.config.max_entities)
+            num_male_colleagues_alice_circle = rng.randint(1, self.config.max_entities)
+            num_female_colleagues_alice_circle = rng.randint(1, self.config.max_entities)
+            num_male_colleagues_bob_circle = rng.randint(1, self.config.max_entities)
+            num_female_colleagues_bob_circle = rng.randint(1, self.config.max_entities)
 
             answer = num_female_colleagues_alice_circle + 1
             template = rng.choice(self.templates[TaskType.COLLEAGUES])
@@ -169,16 +184,10 @@ def _get_aiw(self, rng: Random) -> dict:
                 num_male_colleagues_alice_circle=num_male_colleagues_alice_circle,
                 num_female_colleagues_alice_circle=num_female_colleagues_alice_circle,
                 num_male_colleagues_bob_circle=num_male_colleagues_bob_circle,
-                num_female_colleagues_bob_circle=num_female_colleagues_bob_circle
+                num_female_colleagues_bob_circle=num_female_colleagues_bob_circle,
             )
 
-        return {
-            "question": question,
-            "answer": answer,
-            "metadata": {
-                "task_type": task_type.value
-            }
-        }
+        return {"question": question, "answer": answer, "metadata": {"task_type": task_type.value}}
 
     def __getitem__(self, idx: int) -> dict:
         rng = Random(self.seed + idx)

diff --git a/tests/test_aiw.py b/tests/test_aiw.py
@@ -14,8 +14,7 @@ def test_aiw_config_validation():
         config.validate()
 
     with pytest.raises(AssertionError):
-        config = AliceInWonderlandConfig(
-            female_names=["Mary", "Jane"])  # No Alice
+        config = AliceInWonderlandConfig(female_names=["Mary", "Jane"])  # No Alice
         config.validate()
 
     with pytest.raises(AssertionError):
@@ -56,8 +55,7 @@ def test_aiw_items():
         # Verify question task type characteristics
         task_type = item["metadata"]["task_type"]
         if task_type == TaskType.SIBLINGS.value:
-            assert any(phrase in item["question"]
-                       for phrase in ["brothers", "sisters"])
+            assert any(phrase in item["question"] for phrase in ["brothers", "sisters"])
         elif task_type == TaskType.FRIENDS.value:
             assert "friends" in item["question"]
         elif task_type == TaskType.COLLEAGUES:
@@ -95,5 +93,4 @@ def test_aiw_random_ranges():
         numbers = [int(n) for n in question.split() if n.isdigit()]
 
         # Check all numbers are in reasonable range (1-6 as per implementation)
-        assert all(
-            1 <= n <= 12 for n in numbers), f"Numbers out of range: {numbers}"
+        assert all(1 <= n <= 12 for n in numbers), f"Numbers out of range: {numbers}"