checking

agential-ai · Dec 22, 2024 · f3e6053 · f3e6053
1 parent 0704f1f
commit f3e6053
Show file tree

Hide file tree

Showing 12 changed files with 150 additions and 114 deletions.
diff --git a/agential/training/agent_optimizer/__init__.py b/agential/training/agent_optimizer/__init__.py
@@ -1 +1 @@
-"""ReAct Agent."""
+"""Prompt Optimizer Agent."""
diff --git a/agential/training/agent_optimizer/agent.py b/agential/training/agent_optimizer/agent.py
@@ -6,36 +6,36 @@
 
 from typing import Any, Dict, Optional
 
-from agential.agents.base.agent import BaseAgent
-from agential.agents.react.output import ReActOutput
-from agential.agents.react.prompts import (
-    REACT_INSTRUCTION_AMBIGNQ,
-    REACT_INSTRUCTION_FEVER,
-    REACT_INSTRUCTION_GSM8K,
-    REACT_INSTRUCTION_HOTPOTQA,
-    REACT_INSTRUCTION_HUMANEVAL,
-    REACT_INSTRUCTION_MBPP,
-    REACT_INSTRUCTION_SVAMP,
-    REACT_INSTRUCTION_TABMWP,
-    REACT_INSTRUCTION_TRIVIAQA,
+from agential.training.agent_optimizer.agent import BaseAgent
+from agential.training.agent_optimizer.output import PromptOptimizerOutput
+from agential.training.agent_optimizer.prompts import (
+    PROMPT_OPTIMIZER_INSTRUCTION_AMBIGNQ,
+    PROMPT_OPTIMIZER_INSTRUCTION_FEVER,
+    PROMPT_OPTIMIZER_INSTRUCTION_GSM8K,
+    PROMPT_OPTIMIZER_INSTRUCTION_HOTPOTQA,
+    PROMPT_OPTIMIZER_INSTRUCTION_HUMANEVAL,
+    PROMPT_OPTIMIZER_INSTRUCTION_MBPP,
+    PROMPT_OPTIMIZER_INSTRUCTION_SVAMP,
+    PROMPT_OPTIMIZER_INSTRUCTION_TABMWP,
+    PROMPT_OPTIMIZER_INSTRUCTION_TRIVIAQA,
 )
-from agential.agents.react.strategies.base import ReActBaseStrategy
-from agential.agents.react.strategies.code import ReActHEvalStrategy, ReActMBPPStrategy
-from agential.agents.react.strategies.math import (
-    ReActGSM8KStrategy,
-    ReActSVAMPStrategy,
-    ReActTabMWPStrategy,
+from agential.training.agent_optimizer.strategies.base import PromptOptimizerBaseStrategy, ReActBaseStrategy
+from agential.training.agent_optimizer.strategies.code import PromptOptimizerHEvalStrategy, PromptOptimizerMBPPStrategy
+from agential.training.agent_optimizer.strategies.math import (
+    PromptOptimizerGSM8KStrategy,
+    PromptOptimizerSVAMPStrategy,
+    PromptOptimizerTabMWPStrategy,
 )
-from agential.agents.react.strategies.qa import (
-    ReActAmbigNQStrategy,
-    ReActFEVERStrategy,
-    ReActHotQAStrategy,
-    ReActTriviaQAStrategy,
+from agential.training.agent_optimizer.strategies.qa import (
+    PromptOptimizerAmbigNQStrategy,
+    PromptOptimizerFEVERStrategy,
+    PromptOptimizerHotQAStrategy,
+    PromptOptimizerTriviaQAStrategy,
 )
 from agential.constants import BENCHMARK_FEWSHOTS, Benchmarks, FewShotType
 from agential.core.llm import BaseLLM
 
-REACT_BENCHMARK_FEWSHOTS = {
+PROMPT_OPTIMIZER_BENCHMARK_FEWSHOTS = {
     Benchmarks.HOTPOTQA: [FewShotType.REACT],
     Benchmarks.FEVER: [FewShotType.REACT],
     Benchmarks.TRIVIAQA: [FewShotType.REACT],
@@ -49,31 +49,31 @@
 
 REACT_PROMPTS = {
     Benchmarks.HOTPOTQA: {
-        "prompt": REACT_INSTRUCTION_HOTPOTQA,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_HOTPOTQA,
     },
     Benchmarks.FEVER: {
-        "prompt": REACT_INSTRUCTION_FEVER,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_FEVER,
     },
     Benchmarks.TRIVIAQA: {
-        "prompt": REACT_INSTRUCTION_TRIVIAQA,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_TRIVIAQA,
     },
     Benchmarks.AMBIGNQ: {
-        "prompt": REACT_INSTRUCTION_AMBIGNQ,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_AMBIGNQ,
     },
     Benchmarks.GSM8K: {
-        "prompt": REACT_INSTRUCTION_GSM8K,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_GSM8K,
     },
     Benchmarks.SVAMP: {
-        "prompt": REACT_INSTRUCTION_SVAMP,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_SVAMP,
     },
     Benchmarks.TABMWP: {
-        "prompt": REACT_INSTRUCTION_TABMWP,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_TABMWP,
     },
     Benchmarks.HUMANEVAL: {
-        "prompt": REACT_INSTRUCTION_HUMANEVAL,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_HUMANEVAL,
     },
     Benchmarks.MBPP: {
-        "prompt": REACT_INSTRUCTION_MBPP,
+        "prompt": PROMPT_OPTIMIZER_INSTRUCTION_MBPP,
     },
 }
 REACT_FEWSHOTS: Dict[str, Dict] = {
@@ -88,20 +88,20 @@
     Benchmarks.MBPP: {},
 }
 REACT_STRATEGIES = {
-    Benchmarks.HOTPOTQA: ReActHotQAStrategy,
-    Benchmarks.FEVER: ReActFEVERStrategy,
-    Benchmarks.TRIVIAQA: ReActTriviaQAStrategy,
-    Benchmarks.AMBIGNQ: ReActAmbigNQStrategy,
-    Benchmarks.GSM8K: ReActGSM8KStrategy,
-    Benchmarks.SVAMP: ReActSVAMPStrategy,
-    Benchmarks.TABMWP: ReActTabMWPStrategy,
-    Benchmarks.HUMANEVAL: ReActHEvalStrategy,
-    Benchmarks.MBPP: ReActMBPPStrategy,
+    Benchmarks.HOTPOTQA: PromptOptimizerHotQAStrategy,
+    Benchmarks.FEVER: PromptOptimizerFEVERStrategy,
+    Benchmarks.TRIVIAQA: PromptOptimizerTriviaQAStrategy,
+    Benchmarks.AMBIGNQ: PromptOptimizerAmbigNQStrategy,
+    Benchmarks.GSM8K: PromptOptimizerGSM8KStrategy,
+    Benchmarks.SVAMP: PromptOptimizerSVAMPStrategy,
+    Benchmarks.TABMWP: PromptOptimizerTabMWPStrategy,
+    Benchmarks.HUMANEVAL: PromptOptimizerHEvalStrategy,
+    Benchmarks.MBPP: PromptOptimizerMBPPStrategy,
 }
 
 
-class AgentOptimizer(BaseAgent):
-    """AgentOptimizer class for optimizing the agent's performance."""
+class PromptOptimizer(BaseAgent):
+    """PromptOptimizer class for optimizing the agent's performance."""
 
     def __init__(
         self,
@@ -116,8 +116,8 @@ def __init__(
         super().__init__(llm=llm, benchmark=benchmark, testing=testing)
 
 
-class AgentOptimizer(BaseAgent):
-    """AgentOptimizer agent.
+class PromptOptimizer(BaseAgent):
+    """PromptOptimizer agent.
 
     Attributes:
         llm (BaseLLM): An instance of a language model used for generating initial answers
@@ -137,7 +137,7 @@ def __init__(
         """Initialization."""
         super().__init__(llm=llm, benchmark=benchmark, testing=testing)
 
-        self.strategy = AgentOptimizer.get_strategy(
+        self.strategy = PromptOptimizer.get_strategy(
             benchmark=self.benchmark,
             llm=self.llm,
             testing=self.testing,
@@ -161,7 +161,7 @@ def get_fewshots(
         if benchmark not in REACT_FEWSHOTS:
             raise ValueError(f"Benchmark '{benchmark}' few-shots not found for ReAct.")
 
-        if fewshot_type not in REACT_BENCHMARK_FEWSHOTS[benchmark]:
+        if fewshot_type not in PROMPT_OPTIMIZER_BENCHMARK_FEWSHOTS[benchmark]:
             raise ValueError(
                 f"Benchmark '{benchmark}' few-shot type not supported for ReAct."
             )
@@ -187,7 +187,7 @@ def get_prompts(benchmark: str, **kwargs: Any) -> Dict[str, str]:
         return REACT_PROMPTS[benchmark]
 
     @staticmethod
-    def get_strategy(benchmark: str, **kwargs: Any) -> ReActBaseStrategy:
+    def get_strategy(benchmark: str, **kwargs: Any) -> PromptOptimizerBaseStrategy:
         """Returns an instance of the appropriate ReAct strategy based on the provided benchmark.
 
         Args:
@@ -212,7 +212,7 @@ def generate(
         additional_keys: Dict[str, str] = {},
         fewshot_type: str = "",
         reset: bool = True,
-    ) -> ReActOutput:
+    ) -> PromptOptimizerOutput:
         """Processes a given question through ReAct.
 
         Iteratively applies the think-act-observe cycle to generate an answer for the question.
@@ -232,11 +232,11 @@ def generate(
         """
         if not prompt or not examples:
             if not fewshot_type:
-                fewshot_type = REACT_BENCHMARK_FEWSHOTS[self.benchmark][0]
-            fewshots = AgentOptimizer.get_fewshots(
+                fewshot_type = PROMPT_OPTIMIZER_BENCHMARK_FEWSHOTS[self.benchmark][0]
+            fewshots = PromptOptimizer.get_fewshots(
                 benchmark=self.benchmark, fewshot_type=fewshot_type
             )
-            prompts = AgentOptimizer.get_prompts(benchmark=self.benchmark)
+            prompts = PromptOptimizer.get_prompts(benchmark=self.benchmark)
             examples = fewshots["examples"]
             prompt = prompts["prompt"]
 

diff --git a/agential/training/agent_optimizer/functional.py b/agential/training/agent_optimizer/functional.py
@@ -6,9 +6,8 @@
 
 from tiktoken import Encoding
 
-from agential.agents.react.output import ReActStepOutput
+from agential.training.agent_optimizer.output import PromptOptimizerStepOutput, PromptOptimizerOutput
 from agential.core.llm import BaseLLM, Response
-from agential.training.agent_optimizer.output import PromptOptimizerOutput
 from agential.training.agent_optimizer.prompts import OPT_PROMPT
 
 
@@ -278,14 +277,14 @@ def parse_code_action(action: str) -> Tuple[str, str]:
     return action_type, query
 
 
-def accumulate_metrics(steps: List[ReActStepOutput]) -> Dict[str, Any]:
-    """Accumulate total metrics from a list of ReActStepOutput objects.
+def accumulate_metrics(steps: List[PromptOptimizerStepOutput]) -> Dict[str, Any]:
+    """Accumulate total metrics from a list of PromptOptimizerStepOutput objects.
 
     This function calculates and aggregates various metrics across all steps in the input list.
     It sums up token counts, costs, and time measurements for both thought and action components.
 
     Args:
-        steps (List[ReActStepOutput]): A list of ReActStepOutput objects representing individual steps.
+        steps (List[PromptOptimizerStepOutput]): A list of PromptOptimizerStepOutput objects representing individual steps.
 
     Returns:
         Dict[str, Any]: A dictionary containing the following accumulated metrics:

diff --git a/agential/training/agent_optimizer/memory.py b/agential/training/agent_optimizer/memory.py
@@ -6,8 +6,8 @@
 from agential.agents.base.modules.memory import BaseMemory
 
 
-class AgentOptimizerMemory(BaseMemory):
-    """AgentOptimizer Memory implementation.
+class PromptOptimizerMemory(BaseMemory):
+    """PromptOptimizer Memory implementation.
 
     Attributes:
         memories (Dict[str, List[Dict[str, Any]]]): A dictionary of memories.
@@ -104,6 +104,7 @@ def remove_function(
             if func_info["name"] == name:
                 self.functions_list.remove(func_info)
 
+
     def load_memories(self, question: str) -> Dict[str, Any]:
         """Load all memories and return as a dictionary.
 

diff --git a/agential/training/agent_optimizer/output.py b/agential/training/agent_optimizer/output.py
@@ -1,4 +1,4 @@
-"""ReAct structured output module."""
+"""PromptOptimizer structured output module."""
 
 from typing import Any, Dict, List
 
@@ -9,7 +9,7 @@
 
 
 class PromptOptimizerStepOutput(BaseModel):
-    """Agent Optimizer step Pydantic output class.
+    """Prompt Optimizer step Pydantic output class.
 
     Attributes:
         thought (str): The thought process of the agent.

diff --git a/agential/training/agent_optimizer/prompts.py b/agential/training/agent_optimizer/prompts.py
@@ -173,7 +173,7 @@
 # ======================================================================== HOTPOTQA ======================================================================== #
 
 
-REACT_INSTRUCTION_HOTPOTQA = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: 
+PROMPT_OPTIMIZER_INSTRUCTION_HOTPOTQA = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types: 
 (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
 (2) Lookup[keyword], which returns the next sentence containing keyword in the last passage successfully found by Search.
 (3) Finish[answer], which returns the answer and finishes the task.
@@ -189,7 +189,7 @@
 # ======================================================================== FEVER ======================================================================== #
 
 
-REACT_INSTRUCTION_FEVER = """Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. Thought can reason about the current situation, and Action can be two types: 
+PROMPT_OPTIMIZER_INSTRUCTION_FEVER = """Determine if there is Observation that SUPPORTS or REFUTES a Claim, or if there is NOT ENOUGH INFORMATION. Thought can reason about the current situation, and Action can be two types: 
 (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
 (2) Lookup[keyword], which returns the next sentence containing keyword in the last passage successfully found by Search.
 (3) Finish[answer], which returns the answer and finishes the task.
@@ -205,7 +205,7 @@
 # ======================================================================== AMBIGNQ ======================================================================== #
 
 
-REACT_INSTRUCTION_AMBIGNQ = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be two types:
+PROMPT_OPTIMIZER_INSTRUCTION_AMBIGNQ = """Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be two types:
 (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
 (2) Lookup[keyword], which returns the next sentence containing keyword in the last passage successfully found by Search.
 (3) Finish[answer], which returns the answer and finishes the task.
@@ -221,7 +221,7 @@
 # ======================================================================== TRIVIAQA ======================================================================== #
 
 
-REACT_INSTRUCTION_TRIVIAQA = """Answer a trivia question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
+PROMPT_OPTIMIZER_INSTRUCTION_TRIVIAQA = """Answer a trivia question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
 (1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
 (2) Lookup[keyword], which returns the next sentence containing keyword in the last passage successfully found by Search.
 (3) Finish[answer], which returns the answer and finishes the task.
@@ -237,7 +237,7 @@
 # ======================================================================== GSM8K ======================================================================== #
 
 
-REACT_INSTRUCTION_GSM8K = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
+PROMPT_OPTIMIZER_INSTRUCTION_GSM8K = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
 (1) Calculate[code], which implements code to answer the math question, saving the answer as the `answer` variable.
 (2) Finish[code], which returns the code to answer the math question and finishes the task, saving the answer as the `answer` variable.
 You have a maximum of {max_steps} steps.
@@ -253,7 +253,7 @@
 # ======================================================================== SVAMP ======================================================================== #
 
 
-REACT_INSTRUCTION_SVAMP = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
+PROMPT_OPTIMIZER_INSTRUCTION_SVAMP = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
 (1) Calculate[code], which implements code to answer the math question, saving the answer as the `answer` variable.
 (2) Finish[code], which returns the code to answer the math question and finishes the task, saving the answer as the `answer` variable.
 You have a maximum of {max_steps} steps.
@@ -269,7 +269,7 @@
 # ======================================================================== TABMWP ======================================================================== #
 
 
-REACT_INSTRUCTION_TABMWP = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
+PROMPT_OPTIMIZER_INSTRUCTION_TABMWP = """Answer a math question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be two types:
 (1) Calculate[code], which implements code to answer the math question, saving the answer as the `answer` variable.
 (2) Finish[code], which returns the code to answer the math question and finishes the task, saving the answer as the `answer` variable.
 You have a maximum of {max_steps} steps.
@@ -285,7 +285,7 @@
 # ======================================================================== HUMANEVAL ======================================================================== #
 
 
-REACT_INSTRUCTION_HUMANEVAL = """Answer a coding question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
+PROMPT_OPTIMIZER_INSTRUCTION_HUMANEVAL = """Answer a coding question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
 (1) Implement[<insert your code here>], which implements the function to answer the question.
 (2) Test[<insert your code here>], which implements assert statement test cases to test the implemented code.
 (3) Finish[<insert your answer here>], which returns the code implementation and finishes the task.
@@ -302,7 +302,7 @@
 # ======================================================================== MBPP ======================================================================== #
 
 
-REACT_INSTRUCTION_MBPP = """Answer a coding question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
+PROMPT_OPTIMIZER_INSTRUCTION_MBPP = """Answer a coding question with interleaving Thought, Action, Observation steps. Thought can reason about the current question and plan the retrieval steps, and Action can be three types:
 (1) Implement[code], which implements the function to answer the question.
 (2) Test[code], which implements assert statement test cases to test the implemented code.
 (3) Finish[answer], which returns the code implementation and finishes the task.

diff --git a/agential/training/agent_optimizer/strategies/base.py b/agential/training/agent_optimizer/strategies/base.py
@@ -1,12 +1,11 @@
-"""Base ReAct Agent strategy class."""
+"""Base Prompt Optimizer Agent strategy class."""
 
 from abc import abstractmethod
 from typing import Any, Dict, Optional, Tuple
 
 from tiktoken import Encoding
 
 from agential.agents.base.strategies import BaseAgentStrategy
-from agential.agents.react.output import ReActOutput
 from agential.core.llm import BaseLLM, Response
 from agential.training.agent_optimizer.output import PromptOptimizerOutput