All-Hands-AI · ryanhoangt · Sep 5, 2024 · Sep 5, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/agenthub/__init__.py b/agenthub/__init__.py
@@ -9,6 +9,7 @@
 
 from agenthub import (  # noqa: E402
     browsing_agent,
+    coact_agent,
     codeact_agent,
     codeact_swe_agent,
     delegator_agent,
@@ -23,6 +24,7 @@
     'delegator_agent',
     'dummy_agent',
     'browsing_agent',
+    'coact_agent',
 ]
 
 for agent in all_microagents.values():

diff --git a/agenthub/coact_agent/README.md b/agenthub/coact_agent/README.md
@@ -0,0 +1,12 @@
+# CoAct Multi-Agent Framework
+
+This folder implements a multi-agent workflow inspired by the CoAct framework ([paper](https://arxiv.org/abs/2406.13381)), that provides a robust structure for defining, planning, and executing tasks using multiple agents.
+
+## Agents
+
+1. `CoActPlannerAgent`:
+    - is responsible for exploring and creating a global plan. It can replan if there are issues with the previous one.
+    - has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent).
+2. `CoActExecutorAgent`:
+    - is responsible for executing the proposed plan. Facing issues with the plan, it can request for a new one.
+    - also has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent).
diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py
@@ -0,0 +1,10 @@
+from agenthub.coact_agent.executor.executor_agent import (
+    LocalExecutorAgent as CoActExecutorAgent,
+)
+from agenthub.coact_agent.planner.planner_agent import (
+    GlobalPlannerAgent as CoActPlannerAgent,
+)
+from openhands.controller.agent import Agent
+
+Agent.register('CoActPlannerAgent', CoActPlannerAgent)
+Agent.register('CoActExecutorAgent', CoActExecutorAgent)
diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py
@@ -0,0 +1,71 @@
+import re
+
+from agenthub.codeact_agent.action_parser import (
+    CodeActActionParserAgentDelegate,
+    CodeActActionParserCmdRun,
+    CodeActActionParserFinish,
+    CodeActActionParserIPythonRunCell,
+    CodeActActionParserMessage,
+    CodeActResponseParser,
+)
+from openhands.controller.action_parser import ActionParser
+from openhands.events.action import (
+    Action,
+    AgentFinishAction,
+)
+
+
+class ExecutorResponseParser(CodeActResponseParser):
+    """Parser action:
+    - CmdRunAction(command) - bash command to run
+    - IPythonRunCellAction(code) - IPython code to run
+    - AgentDelegateAction(agent, inputs) - delegate action for (sub)task
+    - MessageAction(content) - Message action to run (e.g. ask for clarification)
+    - AgentFinishAction() - end the interaction
+    """
+
+    def __init__(self):
+        # Need pay attention to the item order in self.action_parsers
+        super().__init__()
+        self.action_parsers = [
+            CodeActActionParserFinish(),
+            CodeActActionParserCmdRun(),
+            CodeActActionParserIPythonRunCell(),
+            CodeActActionParserAgentDelegate(),
+            CoActActionParserRequest(),
+        ]
+        self.default_parser = CodeActActionParserMessage()
+
+    def parse_response(self, response) -> str:
+        action = response.choices[0].message.content
+        if action is None:
+            return ''
+        for action_suffix in ['bash', 'ipython', 'browse', 'request']:
+            if (
+                f'<execute_{action_suffix}>' in action
+                and f'</execute_{action_suffix}>' not in action
+            ):
+                action += f'</execute_{action_suffix}>'
+        return action
+
+
+class CoActActionParserRequest(ActionParser):
+    def __init__(self):
+        self.request = None
+
+    def check_condition(self, action_str: str) -> bool:
+        self.request = re.search(
+            r'<execute_request>(.*)</execute_request>', action_str, re.DOTALL
+        )
+        return self.request is not None
+
+    def parse(self, action_str: str) -> Action:
+        assert (
+            self.request is not None
+        ), 'self.request should not be None when parse is called'
+
+        replan_request = self.request.group(1).strip()
+        return AgentFinishAction(
+            thought=replan_request,
+            outputs={'content': replan_request},
+        )
diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py
@@ -0,0 +1,22 @@
+import os
+
+from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser
+from agenthub.codeact_agent.codeact_agent import CodeActAgent
+from openhands.core.config import AgentConfig
+from openhands.llm.llm import LLM
+from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
+from openhands.utils.prompt import PromptManager
+
+
+class LocalExecutorAgent(CodeActAgent):
+    VERSION = '1.0'
+
+    def __init__(self, llm: LLM, config: AgentConfig) -> None:
+        super().__init__(llm, config)
+
+        self.action_parser = ExecutorResponseParser()
+        self.prompt_manager = PromptManager(
+            prompt_dir=os.path.join(os.path.dirname(__file__)),
+            agent_skills_docs=AgentSkillsRequirement.documentation,
+            micro_agent=self.micro_agent,
+        )
diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2
@@ -0,0 +1,59 @@
+{% set MINIMAL_SYSTEM_PREFIX %}
+You are an autonomous intelligent programming agent playing the role of a subordinate employee responsible for local planning and execution of specific tasks in a multi-tier task execution structure, tasked with software development. You will be given coding-based tasks. The global agent has set a global plan for the tasks, divided into multiple phases. These phase plans will be given to you as a whole. Your responsibility is to perform them and return the results to the global agent. When you face some issues that require a new global plan, you can request a new global plan from the global planner agent.
+
+Here's the information you'll have:
+* The broken-down phase list: These are the tasks you're trying to complete now.
+* The current codebase: This is what you need to navigate through and make the changes to complete the tasks given by the global agent.
+
+The agent can use a Python environment with <execute_ipython>, e.g.:
+<execute_ipython>
+print("Hello World!")
+</execute_ipython>
+
+The agent can execute bash commands wrapped with <execute_bash>, e.g. <execute_bash> ls </execute_bash>.
+The agent is not allowed to run interactive commands. For commands that may run indefinitely,
+the output should be redirected to a file and the command run in the background, e.g. <execute_bash> python3 app.py > server.log 2>&1 & </execute_bash>
+
+If a command execution result says "Command timed out. Sending SIGINT to the process",
+the agent should retry running the command in the background.
+
+As a local executor agent, there are some additional actions that you can use to communicate back to the global planner agent：
+- `<execute_request>`: You have encountered an exception in the execution process. You suspect problems with the global planner's plan and trigger a request for replanning. Explain why you decide to request a new global plan using this action.
+
+{% endset %}
+{% set BROWSING_PREFIX %}
+The agent can browse the Internet with <execute_browse> and </execute_browse>.
+For example, <execute_browse> Tell me the usa's president using google search </execute_browse>.
+Or <execute_browse> Tell me what is in http://example.com </execute_browse>.
+{% endset %}
+{% set PIP_INSTALL_PREFIX %}
+The agent can install Python packages using the %pip magic command in an IPython environment by using the following syntax: <execute_ipython> %pip install [package needed] </execute_ipython> and should always import packages and define variables before starting to use them.
+{% endset %}
+{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %}
+{% set COMMAND_DOCS %}
+Apart from the standard Python library, the agent can also use the following functions (already imported) in <execute_ipython> environment:
+{{ agent_skills_docs }}
+IMPORTANT:
+- `open_file` only returns the first 100 lines of the file by default! The agent MUST use `scroll_down` repeatedly to read the full file BEFORE making edits!
+- The agent shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the agent would like to add the line '        print(x)', it must fully write the line out, with all leading spaces before the code!
+- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+- Any code issued should be less than 50 lines to avoid context being cut off!
+- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content!
+- For `edit_file_by_replace` NEVER provide empty parameters!
+- For `edit_file_by_replace` the file must be read fully before any replacements!
+{% endset %}
+{% set SYSTEM_SUFFIX %}
+Responses should be concise.
+The agent should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block.
+Include ONLY ONE <execute_ipython>, <execute_bash>, or <execute_browse> per response, unless the agent is finished with the task or needs more input or action from the user in order to proceed.
+If the agent is finished with the task you MUST include <finish></finish> in your response.
+IMPORTANT: Execute code using <execute_ipython>, <execute_bash>, or <execute_browse> whenever possible.
+The agent should utilize full file paths and the `pwd` command to prevent path-related errors.
+The agent must avoid apologies and thanks in its responses.
+Remeber to execute ALL the phases of the global plan and only return the summary of the whole process to the global agent.
+
+{% endset %}
+{# Combine all parts without newlines between them #}
+{{ SYSTEM_PREFIX -}}
+{{- COMMAND_DOCS -}}
+{{- SYSTEM_SUFFIX }}