Merge pull request #10 from haidark/mathquiz

Initial Mathquiz Implementation
haidark · Jul 28, 2024 · b417867 · b417867
2 parents 8db3ae3 + 23a3c62
commit b417867
Show file tree

Hide file tree

Showing 9 changed files with 287 additions and 19 deletions.
diff --git a/configs/chess.yaml b/configs/chess.yaml
@@ -9,7 +9,8 @@ game:
     - name: chess_player
       args:
         id: gpt4 white
-        role: White
+        roles: 
+          - White
         optimize: false
         dataset: chess_dataset
         dataset_args:
@@ -39,7 +40,8 @@ game:
     - name: chess_player
       args:
         id: gpt4 black
-        role: Black
+        roles: 
+          - Black
         optimize: false
         dataset: chess_dataset
         dataset_args:

diff --git a/configs/mathquiz.yaml b/configs/mathquiz.yaml
@@ -0,0 +1,41 @@
+manager:
+  args:
+    max_rounds: 10
+    win_conditions:
+      - StudentCorrect
+game:
+  name: mathquiz
+  players:
+    - name: mathquiz_teacher
+      args:
+        id: gpt4 teacher
+        roles: 
+          - TeacherGenerateQuestion
+          - TeacherAnswerQuestion
+        lm:
+          type: AzureOpenAI
+          args:
+            api_base: https://allam-swn-gpt-01.openai.azure.com/
+            api_version: 2023-07-01-preview
+            deployment_id: gpt-4o-900ptu
+            max_tokens: 800
+            temperature: 0.8
+            top_p: 0.95
+            frequency_penalty: 0
+            presence_penalty: 0
+    - name: mathquiz_student
+      args:
+        id: gpt4 student
+        roles: 
+          - StudentAnswerQuestion
+        lm:
+          type: AzureOpenAI
+          args:
+            api_base: https://allam-swn-gpt-01.openai.azure.com/
+            api_version: 2023-07-01-preview
+            deployment_id: gpt-4o-900ptu
+            max_tokens: 800
+            temperature: 0.8
+            top_p: 0.95
+            frequency_penalty: 0
+            presence_penalty: 0
diff --git a/main.py b/main.py
@@ -27,7 +27,7 @@ def main():
 
     game_manager = GameManager(config)
 
-    logger.info("Starting a new game of chess.")
+    logger.info("Starting a new game!")
 
     final_state = game_manager.start()
 

diff --git a/zero_sum_eval/game_manager.py b/zero_sum_eval/game_manager.py
@@ -34,10 +34,10 @@ def _init_players(self):
                 player_config["name"],
                 **player_config["args"],
             )
-            if player.role not in self.games[0].roles: 
+            if player.roles[0] not in self.games[0].roles: 
                 raise ValueError(f"Role {player.role} is not defined in {self.games[0].__class__.__name__}")
-
-            self.players[player.role] = player
+            for role in player.roles:
+                self.players[role] = player
 
     def start(self):
         return self.do_eval(self.games[0])
@@ -52,8 +52,8 @@ def do_eval(self, game_state):
                 break
             game_status = game_state.query_game()
             player = self.players[game_status.roles[0]]
-            logger.info(f"{player.id} turn {turn_count}:\n{game_state.display()}")
             game_state = self.do_turn(game_status, player)
+            logger.info(f"{player.id} turn {turn_count}:\n{game_state.display()}")
             round_count += 1
         return game_state
 

diff --git a/zero_sum_eval/games/chess/chess_player.py b/zero_sum_eval/games/chess/chess_player.py
@@ -63,8 +63,9 @@ def forward(self, board_state, role, history):
 
 @PLAYER_REGISTRY.register("chess", "chess_player")
 class ChessPlayer(Player):
-    def _build_module(self, **module_args):
-        return ChessCoT(**module_args)
+    def _build_modules(self, **module_args):
+        self.main_module = ChessCoT(**module_args)
+        return [self.main_module]
 
     def make_move(self, game_state):
         """
@@ -77,7 +78,7 @@ def make_move(self, game_state):
         str: The move made by the player
         """
         export = game_state.export()
-        trace = self.module(board_state=export['environment'],
+        trace = self.main_module(board_state=export['environment'],
                                     role=export['roles'][0], 
                                     history=game_state.formatted_move_history()) 
         return trace.move

diff --git a/zero_sum_eval/games/mathquiz/__init__.py b/zero_sum_eval/games/mathquiz/__init__.py
@@ -0,0 +1,3 @@
+# This file is used to import the classes from the chess module
+from .mathquiz_player import MathQuizTeacher, MathQuizStudent
+from .mathquiz_game import MathQuizGame
diff --git a/zero_sum_eval/games/mathquiz/mathquiz_game.py b/zero_sum_eval/games/mathquiz/mathquiz_game.py
@@ -0,0 +1,124 @@
+from zero_sum_eval.game_state import GameState
+from random import randint
+from zero_sum_eval.registry import GAME_REGISTRY
+
+
+@GAME_REGISTRY.register("mathquiz")
+class MathQuizGame(GameState):
+    """
+    This is a two player game where the players take turns to answer math questions.
+    In each round:
+        1. the environment is initialized with a target number
+        2. The first player to move creates a math question with the answer as the target number
+        and proves that the question is valid.
+        3. If the first player succeeds, the second player is given a chance to answer the question.
+        4. The game continues for a fixed number of rounds.
+
+    The roles for this game are:
+        TeacherGenerateQuestion
+        TeacherAnswerQuestion
+        StudentAnswerQuestion
+
+    The environment for this game is:
+        question: a math question
+        teacher_answer: the teacher's answer to the math question
+        student_answer: the student's answer to the math question
+    """
+    def __init__(self, roles=None, environment=None, context=None, target=None):
+        super().__init__()
+        self.environment = environment if environment is not None else \
+            {"question": None, "teacher_answer": None, "student_answer": None}
+        self.roles = self.get_next_roles(self.environment) if roles is None else roles
+        self.context = context if context is not None else {"history": [], "message": None}
+        self.target = target if target is not None else str(randint(1, 1000))
+
+    def initialize(self, roles=None, environment=None, context=None, target=None):
+        return MathQuizGame(
+            roles=roles,
+            environment=environment,
+            context=context,
+            target=target
+        )
+
+    def update_game(self, move):
+        new_context = self.context.copy()
+        new_environment = self.environment.copy()
+        if self.roles[0] == "TeacherGenerateQuestion":
+            new_environment['question'] = move
+        elif self.roles[0] == "TeacherAnswerQuestion":
+            new_environment['teacher_answer'] = move
+        elif self.roles[0] == "StudentAnswerQuestion":
+            new_environment['student_answer'] = move
+
+        return self.initialize(
+            roles=self.roles,
+            environment=new_environment,
+            context=new_context,
+            target=self.target
+        )
+
+    def query_game(self):  
+        new_context = self.context.copy()
+        new_roles = self.get_next_roles(self.environment)
+        msg = self.validate_game() 
+        new_context['message'] = msg if msg is not None else f"You will move as {new_roles[0]}" 
+
+        return self.initialize(
+            environment=self.environment,
+            context=new_context,
+            roles=new_roles,
+            target=self.target
+        )
+
+    def verify_answer(self, answer):
+        return str(answer) == str(self.target)
+
+    def validate_game(self):
+        current_role = self.roles[0]
+        if current_role == "TeacherGenerateQuestion":
+            return None
+        elif current_role == "TeacherAnswerQuestion":
+            if self.verify_answer(self.environment['teacher_answer']):
+                return None
+            else:
+                return "TeacherIncorrect"
+        elif current_role == "StudentAnswerQuestion":
+            if self.verify_answer(self.environment['teacher_answer']):
+                return "StudentCorrect"
+            else:
+                return "StudentIncorrect"
+
+    def get_next_roles(self, environment):
+        if environment['question'] is None:
+            return ['TeacherGenerateQuestion', 'TeacherAnswerQuestion', 'StudentAnswerQuestion']
+        elif environment['teacher_answer'] is None:
+            return ['TeacherAnswerQuestion', 'StudentAnswerQuestion']
+        else:
+            return ['StudentAnswerQuestion']
+
+    def export(self):
+        current_role = self.roles[0]
+        if current_role == "TeacherGenerateQuestion":
+            return {
+                'role': self.roles[0],
+                'environment': self.target,
+                'context': self.context
+            }
+        elif current_role  in ("TeacherAnswerQuestion", "StudentAnswerQuestion"):
+            return {
+                'role': self.roles[0],
+                'environment': self.environment['question'],
+                'context': self.context
+            }
+        else:
+            raise ValueError("Invalid role")
+
+    def display(self):
+        display_str = f"Role to Act: {self.roles[0]}\nMessage: {self.context['message']}\n"
+        display_str += f"{self.environment}\n"
+        display_str += f"Target: {self.target}\n"
+        return display_str
+
+
+if __name__ == "__main__":
+    pass
diff --git a/zero_sum_eval/games/mathquiz/mathquiz_player.py b/zero_sum_eval/games/mathquiz/mathquiz_player.py
@@ -0,0 +1,92 @@
+import dspy
+from zero_sum_eval.player import Player
+from zero_sum_eval.registry import PLAYER_REGISTRY
+
+class GenerateQuestion(dspy.Signature):
+    """Given a target number, generate a challenging math question with the target number as the answer. Make sure not to include the answer in the question."""
+
+    target_number = dspy.InputField(desc="target number")
+    math_question = dspy.OutputField(desc="math question with the target number as the answer")
+
+class AnswerQuestion(dspy.Signature):
+    """Given a math question, give the answer to the question as a number only"""
+
+    math_question = dspy.InputField(desc="math question")
+    answer = dspy.OutputField(desc="answer to the math question with number only")
+
+class GenerateQuestionCoT(dspy.Module):
+    def __init__(self):
+        super().__init__()
+        self.cot_question = dspy.ChainOfThought(GenerateQuestion)
+
+
+    def forward(self, target_number):
+        cot_out = self.cot_question(target_number=target_number)
+        return cot_out
+
+class AnswerQuestionCoT(dspy.Module):
+    def __init__(self):
+        super().__init__()
+        self.cot_answer = dspy.ChainOfThought(AnswerQuestion)
+
+    def forward(self, math_question):
+        cot_out = self.cot_answer(math_question=math_question)
+        return cot_out
+
+@PLAYER_REGISTRY.register("mathquiz", "mathquiz_teacher")
+class MathQuizTeacher(Player):
+    def _build_modules(self, **module_args):
+        self.question_module = GenerateQuestionCoT()
+        self.answer_module = AnswerQuestionCoT()
+        return [self.question_module, self.answer_module]
+
+    def make_move(self, game_state):
+        """
+        Abstract method for making a move based on the current game state.
+        
+        Parameters:
+        game_state (GameState): The current state of the game
+        
+        Returns:
+        str: The move made by the player
+        """
+        export = game_state.export()
+        current_role = game_state.roles[0]
+        with dspy.context(lm=self.llm_model):
+            if current_role == "TeacherGenerateQuestion":
+                trace = self.question_module(export['environment'])
+                return trace.math_question
+            elif current_role == "TeacherAnswerQuestion":
+                trace = self.answer_module(export['environment'])
+                return trace.answer
+            else:
+                raise ValueError(f"Invalid role for teacher: {current_role}")
+
+
+@PLAYER_REGISTRY.register("mathquiz", "mathquiz_student")
+class MathQuizStudent(Player):
+    def _build_modules(self, **module_args):
+        self.answer_module = AnswerQuestionCoT()
+        return [self.answer_module]
+
+    def make_move(self, game_state):
+        """
+        Abstract method for making a move based on the current game state.
+        
+        Parameters:
+        game_state (GameState): The current state of the game
+        
+        Returns:
+        str: The move made by the player
+        """
+        export = game_state.export()
+        current_role = game_state.roles[0]
+        with dspy.context(lm=self.llm_model):
+            if current_role == "StudentAnswerQuestion":
+                trace = self.answer_module(export['environment'])
+                return trace.answer
+            else:
+                raise ValueError(f"Invalid role for student: {current_role}")
+
+
+