All-Hands-AI · AlexCuadron · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py
@@ -19,6 +19,7 @@
     'delegator_agent',
     'dummy_agent',
     'browsing_agent',
+    'supervisor_agent',
 ]
 
 for agent in all_microagents.values():

diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,4 +1,3 @@
-import json
 import os
 from collections import deque
 
@@ -9,8 +8,10 @@
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
+from openhands.core.utils import json
 from openhands.events.action import (
     Action,
     AgentDelegateAction,
@@ -85,6 +86,17 @@ def __init__(
         Parameters:
         - llm (LLM): The llm to be used by this agent
         """
+
+        # import pdb; pdb.set_trace()
+        llm_config = LLMConfig(
+            model='litellm_proxy/claude-3-5-sonnet-20241022',
+            api_key='REDACTED',
+            temperature=0.0,
+            base_url='https://llm-proxy.app.all-hands.dev',
+        )
+        llm = LLM(llm_config)
+        # TODO: Remove this once we have a real AgentConfig
+        config = AgentConfig()
         super().__init__(llm, config)
         self.pending_actions: deque[Action] = deque()
         self.reset()
@@ -369,6 +381,11 @@ def step(self, state: State) -> Action:
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
+
+        # If this agent has a supervisor, we need to get the time to stop from the supervisor
+        if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None):
+            self.when_to_stop: bool = state.inputs['when_to_stop']
+
         # Continue with pending actions if any
         if self.pending_actions:
             return self.pending_actions.popleft()
@@ -469,6 +486,14 @@ def _get_messages(self, state: State) -> list[Message]:
             else:
                 raise ValueError(f'Unknown event type: {type(event)}')
 
+            if state.inputs.get('next_step', ''):
+                messages_to_add = [
+                    Message(
+                        role='user',
+                        content=[TextContent(text=state.inputs['next_step'])],
+                    )
+                ]
+
             # Check pending tool call action messages and see if they are complete
             _response_ids_to_remove = []
             for (
@@ -501,6 +526,13 @@ def _get_messages(self, state: State) -> list[Message]:
                         self.prompt_manager.enhance_message(message)
                     messages.append(message)
 
+        if state.inputs.get('next_step', ''):
+            messages.append(
+                Message(
+                    role='user', content=[TextContent(text=state.inputs['next_step'])]
+                )
+            )
+
         if self.llm.is_caching_prompt_active():
             # NOTE: this is only needed for anthropic
             # following logic here:

diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
@@ -445,7 +445,9 @@ def __init__(self):
     ),
 )
 
-_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
+_FINISH_DESCRIPTION = (
+    """Finish the interaction when the task is successfully complete."""
+)
 
 FinishTool = ChatCompletionToolParam(
     type='function',
@@ -455,6 +457,18 @@ def __init__(self):
     ),
 )
 
+_HELP_DESCRIPTION = (
+    """Request assistance when the assistant cannot proceed further with the task."""
+)
+
+HelpTool = ChatCompletionToolParam(
+    type='function',
+    function=ChatCompletionToolParamFunctionChunk(
+        name='help',
+        description=_HELP_DESCRIPTION,
+    ),
+)
+
 
 def combine_thought(action: Action, thought: str) -> Action:
     if not hasattr(action, 'thought'):
@@ -497,7 +511,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                     inputs=arguments,
                 )
             elif tool_call.function.name == 'finish':
-                action = AgentFinishAction()
+                action = AgentFinishAction(outputs={'fixed': True})
+            elif tool_call.function.name == 'help':
+                action = AgentFinishAction(outputs={'fixed': False})
             elif tool_call.function.name == 'edit_file':
                 action = FileEditAction(**arguments)
             elif tool_call.function.name == 'str_replace_editor':
@@ -555,7 +571,7 @@ def get_tools(
     codeact_enable_llm_editor: bool = False,
     codeact_enable_jupyter: bool = False,
 ) -> list[ChatCompletionToolParam]:
-    tools = [CmdRunTool, FinishTool]
+    tools = [CmdRunTool, FinishTool, HelpTool]
     if codeact_enable_browsing:
         tools.append(WebReadTool)
         tools.append(BrowserTool)

diff --git a/openhands/agenthub/delegator_agent/agent.py b/openhands/agenthub/delegator_agent/agent.py
@@ -49,7 +49,6 @@ def step(self, state: State) -> Action:
 
         if not isinstance(last_observation, AgentDelegateObservation):
             raise Exception('Last observation is not an AgentDelegateObservation')
-
         goal, _ = state.get_current_user_intent()
         if self.current_delegate == 'study':
             self.current_delegate = 'coder'

diff --git a/openhands/agenthub/micro/coder/prompt.md b/openhands/agenthub/micro/coder/prompt.md
@@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/study_repo_for_task/prompt.md b/openhands/agenthub/micro/study_repo_for_task/prompt.md
@@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}

diff --git a/openhands/agenthub/micro/verifier/prompt.md b/openhands/agenthub/micro/verifier/prompt.md
@@ -22,7 +22,13 @@ explaining what the problem is.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/supervisor_agent/__init__.py b/openhands/agenthub/supervisor_agent/__init__.py
@@ -0,0 +1,4 @@
+from openhands.agenthub.supervisor_agent.agent import SupervisorAgent
+from openhands.controller.agent import Agent
+
+Agent.register('SupervisorAgent', SupervisorAgent)
diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py
@@ -0,0 +1,154 @@
+import json
+import logging
+import re
+from typing import Any, Dict, List
+
+from openhands.agenthub.supervisor_agent.prompt import get_prompt
+from openhands.controller.agent import Agent
+from openhands.controller.state.state import State
+from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.message import Message, TextContent
+from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
+from openhands.events.observation.delegate import AgentDelegateObservation
+from openhands.events.observation.observation import Observation
+from openhands.llm.llm import LLM
+from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
+from openhands.runtime.plugins.jupyter import JupyterRequirement
+from openhands.runtime.plugins.requirement import PluginRequirement
+
+
+class SupervisorAgent(Agent):
+    VERSION = '1.0'
+    """
+    The Supervisor Agent is an agent that collects information from other agents
+    and makes decisions based on the information.
+    """
+
+    current_delegate: str = ''
+    suggested_approaches: List[Dict[str, List[str]]] = []
+    suggested_approach_index: int = -1  # -1 Because we increment it before using it
+    results: Dict[str, List[Any]] = {'search': [], 'code': []}
+    condensed_information: str = ''
+    does_it_needs_a_test: bool = False
+    task: str = ''
+    test_command: str = ''
+    time_to_stop: int = 60  # Every 60 iterations, we stop and evaluate the approach
+    phase: int = 0
+    steps: str = ''
+
+    sandbox_plugins: list[PluginRequirement] = [
+        # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
+        # AgentSkillsRequirement provides a lot of Python functions,
+        # and it needs to be initialized before Jupyter for Jupyter to use those functions.
+        AgentSkillsRequirement(),
+        JupyterRequirement(),
+    ]
+
+    # Add class attribute for tried_direct_code
+    tried_direct_code: bool = False
+
+    # Add class attribute for augmented_task
+    augmented_task: str = ''
+
+    def __init__(self, llm: LLM, config: AgentConfig):
+        """Initialize the Supervisor Agent with an LLM
+
+        Parameters:
+        - llm (LLM): The llm to be used by this agent
+        """
+        llm_config = LLMConfig(
+            model='openai/o1-preview', api_key='REDACTED', temperature=1.0
+        )
+        llm = LLM(llm_config)
+        # TODO: Remove this once we have a real AgentConfig
+        config = AgentConfig(llm_config='o1-mini')
+        super().__init__(llm, config)
+        # Set up logger
+        self.logger = logging.getLogger(__name__)
+        logging.basicConfig(level=logging.DEBUG)  # Set the logging level
+        self.llm_config = llm.config
+
+    def step(self, state: State) -> Action:
+        self.logger.debug('Starting step with state: %s', state)
+        self.logger.debug('LLM config: %s', self.llm_config)
+        last_observation: Observation | None = None
+        for event in reversed(state.history):
+            if isinstance(event, Observation):
+                last_observation = event
+                break
+
+        task, _ = state.get_current_user_intent()
+        self.task = task or ''
+
+        if self.phase == 0:
+            self.phase += 1
+            prompt = get_prompt(self.task, prompt_type='high_level_task')
+            raw_response = self.get_response(prompt)
+            match = re.search(
+                r'<steps>(.*?)</steps>',
+                raw_response,
+                re.DOTALL,
+            )
+            self.steps = match.group(1).strip('"') if match else self.task
+            return AgentDelegateAction(
+                agent='CodeActAgent',
+                inputs={
+                    'task': self.task,
+                    'plan': self.steps,
+                    'when_to_stop': self.time_to_stop,
+                },
+            )
+
+        if not isinstance(last_observation, AgentDelegateObservation):
+            return AgentFinishAction()
+
+        if not last_observation.outputs.get('fixed', True):
+            trajectory_str: str = last_observation.outputs['trayectory']
+            trajectory_data = json.loads(trajectory_str)
+            deserialized_trajectory = [
+                Message(
+                    role=msg_dict.get('role'),
+                    content=[
+                        TextContent(text=content_text)
+                        for content_text in [
+                            msg_dict['content'][0]['text']
+                            if isinstance(msg_dict['content'], list)
+                            else msg_dict['content']
+                        ]
+                    ],
+                    tool_call_id=msg_dict.get('tool_call_id'),
+                    name=msg_dict.get('name'),
+                )
+                for msg_dict in trajectory_data
+            ]
+            prompt = get_prompt(
+                self.task,
+                'right_track',
+                trajectory=deserialized_trajectory,
+                plan=self.steps,
+            )
+            raw_response = self.get_response(prompt)
+            match = re.search(
+                r'<steps>(.*?)</steps>',
+                raw_response,
+                re.DOTALL,
+            )
+            self.steps = match.group(1).strip('"') if match else self.task
+
+            return AgentDelegateAction(
+                agent='CodeActAgent',
+                inputs={
+                    'task': self.task,
+                    'plan': self.steps,
+                    'when_to_stop': self.time_to_stop,
+                },
+            )
+        return AgentFinishAction()
+
+    def get_response(self, prompt: str) -> str:
+        message = Message(role='user', content=[TextContent(text=prompt)])
+        response = self.llm.completion(
+            messages=self.llm.format_messages_for_llm(message)
+        )
+        return response['choices'][0]['message']['content']