diff --git a/agenthub/__init__.py b/agenthub/__init__.py index d91f2d3ec23a..7016445f90e9 100644 --- a/agenthub/__init__.py +++ b/agenthub/__init__.py @@ -9,6 +9,7 @@ from agenthub import ( # noqa: E402 browsing_agent, + coact_agent, codeact_agent, codeact_swe_agent, delegator_agent, @@ -23,6 +24,7 @@ 'delegator_agent', 'dummy_agent', 'browsing_agent', + 'coact_agent', ] for agent in all_microagents.values(): diff --git a/agenthub/coact_agent/README.md b/agenthub/coact_agent/README.md new file mode 100644 index 000000000000..fa8e7f568e98 --- /dev/null +++ b/agenthub/coact_agent/README.md @@ -0,0 +1,36 @@ +# CoAct Multi-Agent Framework + +This folder implements a multi-agent workflow inspired by the CoAct framework ([paper](https://arxiv.org/abs/2406.13381)), that provides a robust structure for defining, planning, and executing tasks using multiple agents. + +## Agents + +1. `CoActPlannerAgent`: + - is responsible for exploring and creating a global plan. It can replan if there are issues with the previous one. + - has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent). +2. `CoActExecutorAgent`: + - is responsible for executing the proposed plan. Facing issues with the plan, it can request for a new one. + - also has full capabilities of [CodeActAgent](https://github.com/All-Hands-AI/OpenHands/tree/main/agenthub/codeact_agent). + + +## Plan structure +```markdown +The user message is: <> +# Phases +## Phase 1 +- reason: <> +- description: <> +- expected_state: <> + <> + +<> + +## Phase 2 +- reason: ... +- description: ... +- expected_state: ... + ... + +...|... + +## Phase ... +``` diff --git a/agenthub/coact_agent/__init__.py b/agenthub/coact_agent/__init__.py new file mode 100644 index 000000000000..be2273f2d69c --- /dev/null +++ b/agenthub/coact_agent/__init__.py @@ -0,0 +1,10 @@ +from agenthub.coact_agent.executor.executor_agent import ( + LocalExecutorAgent as CoActExecutorAgent, +) +from agenthub.coact_agent.planner.planner_agent import ( + GlobalPlannerAgent as CoActPlannerAgent, +) +from openhands.controller.agent import Agent + +Agent.register('CoActPlannerAgent', CoActPlannerAgent) +Agent.register('CoActExecutorAgent', CoActExecutorAgent) diff --git a/agenthub/coact_agent/executor/action_parser.py b/agenthub/coact_agent/executor/action_parser.py new file mode 100644 index 000000000000..b15eb3bc6e29 --- /dev/null +++ b/agenthub/coact_agent/executor/action_parser.py @@ -0,0 +1,71 @@ +import re + +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) +from openhands.controller.action_parser import ActionParser +from openhands.events.action import ( + Action, + AgentFinishAction, +) + + +class ExecutorResponseParser(CodeActResponseParser): + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__(self): + # Need pay attention to the item order in self.action_parsers + super().__init__() + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + CoActActionParserRequest(), + ] + self.default_parser = CodeActActionParserMessage() + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + if action is None: + return '' + for action_suffix in ['bash', 'ipython', 'browse', 'request']: + if ( + f'' in action + and f'' not in action + ): + action += f'' + return action + + +class CoActActionParserRequest(ActionParser): + def __init__(self): + self.request = None + + def check_condition(self, action_str: str) -> bool: + self.request = re.search( + r'(.*)', action_str, re.DOTALL + ) + return self.request is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.request is not None + ), 'self.request should not be None when parse is called' + + replan_request = self.request.group(1).strip() + return AgentFinishAction( + thought=replan_request, + outputs={'content': replan_request}, + ) diff --git a/agenthub/coact_agent/executor/executor_agent.py b/agenthub/coact_agent/executor/executor_agent.py new file mode 100644 index 000000000000..8f081fe185e0 --- /dev/null +++ b/agenthub/coact_agent/executor/executor_agent.py @@ -0,0 +1,23 @@ +import os + +from agenthub.coact_agent.executor.action_parser import ExecutorResponseParser +from agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.core.config import AgentConfig +from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement +from openhands.utils.prompt import PromptManager + + +class LocalExecutorAgent(CodeActAgent): + VERSION = '1.0' + + def __init__(self, llm: LLM, config: AgentConfig) -> None: + super().__init__(llm, config) + + self.action_parser = ExecutorResponseParser() + self.prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__)), + agent_skills_docs=AgentSkillsRequirement.documentation, + micro_agent=self.micro_agent, + ) + self.stop_sequences.append('') diff --git a/agenthub/coact_agent/executor/system_prompt.j2 b/agenthub/coact_agent/executor/system_prompt.j2 new file mode 100644 index 000000000000..ccc5e99f21ee --- /dev/null +++ b/agenthub/coact_agent/executor/system_prompt.j2 @@ -0,0 +1,62 @@ +{% set MINIMAL_SYSTEM_PREFIX %} +You are an autonomous intelligent programming agent playing the role of a subordinate employee responsible for local planning and execution of specific tasks in a multi-tier task execution structure, tasked with software development. You will be given coding-based tasks. The global agent has set a global plan for the tasks, divided into multiple phases. These phase plans will be given to you as a whole. Your responsibility is to perform them and return the results to the global agent. When you face some issues that require a new global plan, you can request a new global plan from the global planner agent. + +Here's the information you'll have: +* The user message and the broken-down phase list: These are the tasks you're trying to complete now. Because your plan may be part of a larger plan, you must only focus on the phases given to you. +* The current codebase: This is what you need to navigate through and make the changes to complete the tasks given by the global agent. + +The agent can use a Python environment with , e.g.: + +print("Hello World!") + + +The agent can execute bash commands wrapped with , e.g. ls . +If a bash command returns exit code `-1`, this means the process is not yet finished. +The assistant must then send a second . The second can be empty +(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process, +or it can contain the text `ctrl+c` to interrupt the process. + +For commands that may run indefinitely, the output should be redirected to a file and the command run +in the background, e.g. python3 app.py > server.log 2>&1 & +If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background. + +As a local executor agent, there are some additional actions that you can use to communicate back to the global planner agent: +- ``: You have encountered an exception in the execution process. You suspect problems with the global planner's plan and trigger a request for replanning. Explain why you decide to request a new global plan using this action. + +{% endset %} +{% set BROWSING_PREFIX %} +The agent can browse the Internet with and . +For example, Tell me the usa's president using google search . +Or Tell me what is in http://example.com . +{% endset %} +{% set PIP_INSTALL_PREFIX %} +The agent can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +{% endset %} +{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + BROWSING_PREFIX + PIP_INSTALL_PREFIX %} +{% set COMMAND_DOCS %} +Apart from the standard Python library, the agent can also use the following functions (already imported) in environment: +{{ agent_skills_docs }} +IMPORTANT: +- `open_file` only returns the first 100 lines of the file by default! The agent MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! +- The agent shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the agent would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! +- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +- Any code issued should be less than 50 lines to avoid context being cut off! +- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! +- For `edit_file_by_replace` NEVER provide empty parameters! +- For `edit_file_by_replace` the file must be read fully before any replacements! +{% endset %} +{% set SYSTEM_SUFFIX %} +Responses should be concise. +The agent should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , , or per response, unless the agent is finished with the task or needs more input or action from the user in order to proceed. +If the agent is finished with the task you MUST include in your response. +IMPORTANT: Execute code using , , or whenever possible. +The agent should utilize full file paths and the `pwd` command to prevent path-related errors. +The agent must avoid apologies and thanks in its responses. +Remeber to execute ALL the phases of the global plan and only return the summary of the whole process to the global agent. Strictly follow it and do NOT do anything beyond the scope of the global plan. + +{% endset %} +{# Combine all parts without newlines between them #} +{{ SYSTEM_PREFIX -}} +{{- COMMAND_DOCS -}} +{{- SYSTEM_SUFFIX }} diff --git a/agenthub/coact_agent/executor/user_prompt.j2 b/agenthub/coact_agent/executor/user_prompt.j2 new file mode 100644 index 000000000000..020d015a4228 --- /dev/null +++ b/agenthub/coact_agent/executor/user_prompt.j2 @@ -0,0 +1,290 @@ +{% set DEFAULT_EXAMPLE %} +--- START OF EXAMPLE 1 --- + +USER: The user message is: "Create a list of numbers from 1 to 10, and display them in a web page at port 5000. There's an existing Python file `app.py` that you can use." +Execute the following plan to fulfill it: +# Phases +## Phase 1 +- description: Edit the existing Python file `app.py` to insert the code for starting a web server, at the end of the file. +- reason: The existing Python file is missing the code to start the web server. Normally, the server is started using the `app.run(port=5000)` command. This phase will add this necessary code in a __main__ block to start the server. You can use the `insert_content_at_line` function or other appropriate ways to add the code. +- expected_state: The Python file `app.py` is updated with the code to start the web server. + /workspace/app.py + +8| +9|if __name__ == '__main__': +10| app.run(port=5000) + +## Phase 2 +- description: Run the Python file `app.py` to display the list of numbers on a web page at port 5000. +- reason: After updating the Python file, we need to run it to display the list of numbers on a web page. The command a shell command can be: `python3 app.py`. +- expected_state: The list of numbers is displayed on a web page at port 5000. + +ASSISTANT: +Let's start with the first phase: editing the Python file `app.py` to insert the code for starting a web server at line 8. I will open the file at that line to check its current content: + +open_file('/workspace/app.py', 8) + + +USER: +OBSERVATION: +[File: /workspace/app.py (8 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +(this is the end of the file) + +ASSISTANT: +As expected, the file is missing the code to start the web server. Now I will write the task from the plan and save it to the file `app.py`: + +EDITED_CODE=""" +if __name__ == '__main__': + app.run(port=5000)""" + +insert_content_at_line( + 'app.py', + 8, + EDITED_CODE, +) + + +USER: +OBSERVATION: +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 9). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +I have updated the Python file `app.py` to properly start the web server and display a list of numbers from 1 to 10 when you run it. Let's proceed to phase 2 to run the Python file: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 121[1]+ Exit 1 python3 app.py > server.log 2>&1 + +ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log: + +cat server.log + + +USER: +OBSERVATION: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +ASSISTANT: +It seems that Flask is not installed. Let me install Flask for you: + +pip install flask + + +USER: +OBSERVATION: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +ASSISTANT: +Now that Flask is installed, let me run the Python file again: + +python3 app.py > server.log 2>&1 & + + +USER: +OBSERVATION: +[1] 124 + +ASSISTANT: +Let me check the server log again: + +cat server.log + + +USER: +OBSERVATION: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. The tasks are now complete. +Summary: +- Edited the existing Python file `app.py` to insert the code for starting a web server. +- Ran the Python file to display the list of numbers on a web page at port 5000. + + +--- END OF EXAMPLE 1 --- + +--- START OF EXAMPLE 2 --- + +USER: The user message is: "Now browse the newly started server's homepage and show me the content." +Execute the following plan to fulfill it: +# Phases +## Phase 1 +- description: Browse the server's homepage at http://127.0.0.1:5000 +- reason: To verify that the server is running and displaying the list of numbers correctly, we need to browse the server's homepage. We can use the `execute_browse` function to access the URL. +- expected_state: The content of the server's homepage is retrieved. + +ASSISTANT: +Sure! Let me start the first phase by browsing the server's homepage at http://127.0.0.1:5000: + +Get the content on "http://127.0.0.1:5000" + + +USER: +OBSERVATION: +[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". The tasks are now complete. +Summary: +- Browsed the server's homepage at http://127.0.0.1:5000 and retrieved the content "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". + + +--- END OF EXAMPLE 2 --- + +--- START OF EXAMPLE 3 --- + +USER: The user message is: "Now kill the server, make it display the numbers in a table format." +Execute the following plan to fulfill it: +# Phases +## Phase 1 +- description: Stop the server running on port 5000. +- reason: The server needs to be stopped before modifying the Python code. +- expected_state: The server is stopped. +## Phase 2 +- description: Modify the Python code to display the numbers in a table format. +- reason: The current code returns the numbers as a string, which is incorrect. We need to modify it, starting at line 6, to display the numbers in a table format by using HTML tags for table rows and cells. +- expected_state: The Python code is updated to display the numbers in a table format. + /workspace/app.py + +6| numbers = list(range(1, 11)) +7| return "" + "".join([f"" for num in numbers]) + "
{num}
" +
+## Phase 3 +- description: Run the Python file again to display the numbers in a table format on the web page at port 5000. +- reason: After modifying the Python code, we need to run the file again to display the numbers in the expected table format. +- expected_state: The numbers are displayed in a table format on the web page. + +ASSISTANT: +Sure! Let me execute the first phase to stop the server first: + +kill 124 + + +USER: +OBSERVATION: +[1]+ Terminated python3 app.py > server.log 2>&1 + +ASSISTANT: +The server has been stopped. Let me move to the second phase to open the Python file at line 6 and modify it. + +open_file('app.py', 6) + + +USER: +[File: /workspace/app.py (10 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) + +ASSISTANT: +I should edit the file to display the numbers in a table format. I should include correct indentation. Let me update the file: + +edit_file_by_replace( + 'app.py', + to_replace=" return str(numbers)", + new_content=" return '' + ''.join([f'' for i in numbers]) + '
{i}
'", +) +
+ +USER: +Observation: +[File: /workspace/app.py (10 lines total after edit)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return '' + ''.join([f'' for i in numbers]) + '
{i}
' +8| +9|if __name__ == '__main__': +10| app.run(port=5000) +(this is the end of the file) +[File updated (edited at line 7). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.] + +ASSISTANT: +Now, let's move the third phase: to run the updated file: + +python3 app.py > server.log 2>&1 & + + +USER: +Observation: +[1] 126 + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. The tasks are now complete. +Summary: +- Stopped the server running on port 5000. +- Modified the Python code to display the numbers in a table format. +- Ran the Python file again to display the numbers in a table format on the web page at port 5000. + + +--- END OF EXAMPLE 3 --- +{% endset %} +Here is an example of how you can interact with the environment for task solving: +{{ DEFAULT_EXAMPLE }} +{% if micro_agent %} +--- BEGIN OF GUIDELINE --- +The following information may assist you in completing your task: + +{{ micro_agent }} +--- END OF GUIDELINE --- +{% endif %} + +NOW, LET'S START! diff --git a/agenthub/coact_agent/planner/action_parser.py b/agenthub/coact_agent/planner/action_parser.py new file mode 100644 index 000000000000..955e02a17442 --- /dev/null +++ b/agenthub/coact_agent/planner/action_parser.py @@ -0,0 +1,100 @@ +import re + +from agenthub.codeact_agent.action_parser import ( + CodeActActionParserAgentDelegate, + CodeActActionParserCmdRun, + CodeActActionParserFinish, + CodeActActionParserIPythonRunCell, + CodeActActionParserMessage, + CodeActResponseParser, +) +from openhands.controller.action_parser import ActionParser +from openhands.events.action import ( + Action, + AgentDelegateAction, +) + + +class PlannerResponseParser(CodeActResponseParser): + """Parser action: + - CmdRunAction(command) - bash command to run + - IPythonRunCellAction(code) - IPython code to run + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + - MessageAction(content) - Message action to run (e.g. ask for clarification) + - AgentFinishAction() - end the interaction + """ + + def __init__(self, initial_task_str=None): + # Need pay attention to the item order in self.action_parsers + super().__init__() + self.action_parsers = [ + CodeActActionParserFinish(), + CodeActActionParserCmdRun(), + CodeActActionParserIPythonRunCell(), + CodeActActionParserAgentDelegate(), + CoActActionParserGlobalPlan(initial_task_str=initial_task_str), + ] + self.default_parser = CodeActActionParserMessage() + + def parse_response(self, response) -> str: + action = response.choices[0].message.content + if action is None: + return '' + for action_suffix in [ + 'bash', + 'ipython', + 'browse', + 'global_plan', + 'decide', + 'revise', + 'overrule', + 'collation', + ]: + if ( + f'' in action + and f'' not in action + ): + action += f'' + return action + + +class CoActActionParserGlobalPlan(ActionParser): + """Parser action: + - AgentDelegateAction(agent, inputs) - delegate action for (sub)task + """ + + def __init__( + self, + initial_task_str: list | None = None, + ): + self.global_plan: re.Match | None = None + self.initial_task_str = initial_task_str or [''] + + def check_condition(self, action_str: str) -> bool: + self.global_plan = re.search( + r'(.*)', action_str, re.DOTALL + ) + return self.global_plan is not None + + def parse(self, action_str: str) -> Action: + assert ( + self.global_plan is not None + ), 'self.global_plan should not be None when parse is called' + thought = action_str.replace(self.global_plan.group(0), '').strip() + global_plan_actions = self.global_plan.group(1).strip() + + # Some extra processing when doing swe-bench eval: extract text up to and including '--- END ISSUE ---' + issue_text_pattern = re.compile(r'(.*--- END ISSUE ---)', re.DOTALL) + issue_text_match = issue_text_pattern.match(self.initial_task_str[0]) + + if issue_text_match: + self.initial_task_str[0] = issue_text_match.group(1) + + return AgentDelegateAction( + agent='CoActExecutorAgent', + thought=thought, + inputs={ + 'task': f'The user message is: {self.initial_task_str[0]}.\nExecute the following plan to fulfill it:\n{global_plan_actions}' + }, + action_suffix='global_plan', + ) diff --git a/agenthub/coact_agent/planner/planner_agent.py b/agenthub/coact_agent/planner/planner_agent.py new file mode 100644 index 000000000000..cbacb8faf777 --- /dev/null +++ b/agenthub/coact_agent/planner/planner_agent.py @@ -0,0 +1,51 @@ +import os + +from agenthub.coact_agent.planner.action_parser import PlannerResponseParser +from agenthub.codeact_agent.codeact_agent import CodeActAgent +from openhands.core.config import AgentConfig +from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills.agentskills import ( + DOCUMENTATION_DICT as AGENTSKILLS_DOCS_DICT, +) +from openhands.utils.prompt import PromptManager + + +class GlobalPlannerAgent(CodeActAgent): + VERSION = '1.0' + + def __init__(self, llm: LLM, config: AgentConfig) -> None: + super().__init__(llm, config) + + self.action_parser = PlannerResponseParser( + initial_task_str=self.initial_task_str + ) + + # Planner agent can do everything except file-editing operations + planner_agentskills_exclude = [ + 'create_file', + 'edit_file_by_replace', + 'insert_content_at_line', + 'append_file', + ] + planner_agentskills = [ + v + for k, v in AGENTSKILLS_DOCS_DICT.items() + if k not in planner_agentskills_exclude + ] + executor_editing_agentskills = [ + v + for k, v in AGENTSKILLS_DOCS_DICT.items() + if k in planner_agentskills_exclude + ] + self.prompt_manager = PromptManager( + prompt_dir=os.path.join(os.path.dirname(__file__)), + agent_skills_docs=''.join(planner_agentskills), + system_extra_vars={ + 'executor_editing_agent_skills_docs': ''.join( + executor_editing_agentskills + ) + }, + micro_agent=self.micro_agent, + ) + + self.stop_sequences.append('') diff --git a/agenthub/coact_agent/planner/system_prompt.j2 b/agenthub/coact_agent/planner/system_prompt.j2 new file mode 100644 index 000000000000..2dc430ba9fc6 --- /dev/null +++ b/agenthub/coact_agent/planner/system_prompt.j2 @@ -0,0 +1,95 @@ +{% set MINIMAL_SYSTEM_PREFIX %} +You are an autonomous intelligent agent playing the role of a strategic leader in a multi-tier task execution structure, tasked with performing programming tasks. You will be given coding-related tasks. Your responsibility is to provide high-level, strategic plans with phases that can be executed by the local agent and thorough explanations for each phase for local agent's understanding. + +Here's the information you'll have: +* The user's objective: This is the task you're trying to complete. +* The user's codebase: This is what you need to navigate and use to define the plan. + +The structure of the global plan should be as follows: + +# Phases +## Phase 1 +- reason: <> +- description: <> +- expected_state: <> + <> + +<> + +## Phase 2 +- reason: ... +- description: ... +- expected_state: ... + ... + +...|... + +## Phase ... + + +You can use a Python environment with , e.g.: + +print("Hello World!") + + +The agent can execute bash commands wrapped with , e.g. ls . +If a bash command returns exit code `-1`, this means the process is not yet finished. +The assistant must then send a second . The second can be empty +(which will retrieve any additional logs), or it can contain text to be sent to STDIN of the running process, +or it can contain the text `ctrl+c` to interrupt the process. + +For commands that may run indefinitely, the output should be redirected to a file and the command run +in the background, e.g. python3 app.py > server.log 2>&1 & +If a command execution result says "Command timed out. Sending SIGINT to the process", the assistant should retry running the command in the background. + +As a planner agent, there are some additional actions that you can use to communicate and delegate tasks to the local agent: +- ``: Construct a multi-stage global plan, providing separate subtask descriptions and expected states for each phase. +{% endset %} +{% set PIP_INSTALL_PREFIX %} +The agent can install Python packages using the %pip magic command in an IPython environment by using the following syntax: %pip install [package needed] and should always import packages and define variables before starting to use them. +{% endset %} +{% set SYSTEM_PREFIX = MINIMAL_SYSTEM_PREFIX + PIP_INSTALL_PREFIX %} +{% set COMMAND_DOCS %} +Apart from the standard Python library, the agent can also use the following functions (already imported) in environment: +{{ planner_agent_skills_docs }} +IMPORTANT: +- Prefer to use these functions to interact with the codebase if possible, instead of using bash commands like `cat`, which are less efficient when the files are very large. +- `open_file` only returns the first 100 lines of the file by default! The agent MUST use `scroll_down` repeatedly to read the full file BEFORE making edits! +- NEVER use MORE THAN 5 consecutive `scroll_down` commands. When you use `open_file` and see that the file is too large, try to use search functions above to locate the relevant code, which can save you a lot of time. +- Don't be too tempted to construct your own functions, instead prefer to use above provided functions to interact with the codebase. + +IMPORTANT FOR CREATING FUNCTION_CALLS FOR LOCAL AGENT: +- The call shall adhere to THE `edit_file_by_replace`, `append_file` and `insert_content_at_line` FUNCTIONS REQUIRING PROPER INDENTATION. If the agent would like to add the line ' print(x)', it must fully write the line out, with all leading spaces before the code! +- Indentation is important and code that is not indented correctly will fail and require fixing before it can be run. +- Any code issued should be less than 50 lines to avoid context being cut off! +- After EVERY `create_file` the method `append_file` shall be used to write the FIRST content! +- For `edit_file_by_replace` NEVER provide empty parameters! +- For `edit_file_by_replace` the file must be read fully before any replacements! +{% endset %} +{% set EXECUTOR_SKILLS %} +Importantly, apart from the above functions, the local executor agent can use the following additional ones (already imported) in environment: +{{ executor_editing_agent_skills_docs }} +Create plans with phases that use those functions and always include all necessary parameters for editing-related functions so that the local agent doesn't need to waste resources to find them again. +{% endset %} +{% set SYSTEM_SUFFIX %} +{% endset %} +{% set SYSTEM_SUFFIX %} +Responses should be concise. +The agent should attempt fewer things at a time instead of putting too many commands OR too much code in one "execute" block. +Include ONLY ONE , or per response, unless the agent is finished with the task or needs more input or action from the user in order to proceed. +If the agent is finished with the task you MUST include in your response. +IMPORTANT: Execute code using , or whenever possible. +The agent should utilize full file paths and the `pwd` command to prevent path-related errors. +The agent must avoid apologies and thanks in its responses. +The planner agent can only use to browse the Internet if it's required to come up with the plan. To delegate the plan, please only use . +The planner agent can navigate the codebase to locate the necessary information to construct the plan. +After the local agent successfully completes the plan without any issues, you can collate the result and if it is good, you can exit. +Remember to create an as-detailed-as-possible plan for the local agent to follow, but with minimal number of phases. +Do NOT repeat what you have done into the plan, only provide the minimal required steps remaining for the local agent to follow to resolve the task. For example, if you have examined the codebase with a lot of steps and found the relevant code, just provide them with the plan to edit it. +The planner agent must NEVER perform the plan by itself and must delegate the plan to the local agent using . Do not go beyond this scope. + +{% endset %} +{# Combine all parts without newlines between them #} +{{ SYSTEM_PREFIX -}} +{{- COMMAND_DOCS -}} +{{- SYSTEM_SUFFIX }} diff --git a/agenthub/coact_agent/planner/user_prompt.j2 b/agenthub/coact_agent/planner/user_prompt.j2 new file mode 100644 index 000000000000..5cf7b3bbd40d --- /dev/null +++ b/agenthub/coact_agent/planner/user_prompt.j2 @@ -0,0 +1,181 @@ +{% set DEFAULT_EXAMPLE %} +--- START OF EXAMPLE 1 --- + +USER: I want to go to the user 'Every-Hands-AI' github profile using browser and check how many repos they have, then write the number to the new 'repo_count.txt' file. Use a plan that has the least number of steps. + +ASSISTANT: +I have enough information, let's create a plan to achieve the task: + +# Phases +## Phase 1 +- reason: As the first step, we need to identify the number of repositories in the user's GitHub profile. +- description: Browse the GitHub profile of the user 'Every-Hands-AI' to find the number of repositories. +- expected_state: The number of repositories is identified. +## Phase 2 +- reason: The number of repositories should be stored in a file as requested by the user. +- description: Create a new file `repo_count.txt` and write the number of repositories to it. +- expected_state: The file `repo_count.txt` is created and contains the number of repositories. + + +USER: +OBSERVATION: +LocalExecutorAgent finishes task with content: The GitHub profile 'Every-Hands-AI' does not exist or is not accessible. Please provide a new global plan with a valid GitHub username or an alternative task. + +ASSISTANT: +It seems the GitHub profile 'Every-Hands-AI' does not exist or is not accessible. Please provide a valid GitHub username or an alternative task. + +USER: +Sorry, the correct github handle is "All-Hands-AI". + +ASSISTANT: +Let's create a new plan to achieve the task: + +# Phases +## Phase 1 +- reason: As the first step, we need to identify the number of repositories in the user's GitHub profile. +- description: Browse the GitHub profile of the user 'All-Hands-AI' to find the number of repositories. +- expected_state: The number of repositories is identified. +## Phase 2 +- reason: The number of repositories should be stored in a file as requested by the user. +- description: Create a new file `repo_count.txt` and write the number of repositories to it. +- expected_state: The file `repo_count.txt` is created and contains the number of repositories. + + +USER: +OBSERVATION: +LocalExecutorAgent finishes task with content: The file repo_count.txt has been successfully updated with the number of repositories. The tasks are now complete. +Summary: +- Browsed the GitHub profile of the user 'All-Hands-AI' and found that the number of repositories is 2. +- Appended the number of repositories to the file repo_count.txt. + +ASSISTANT: +The tasks have been successfully completed. The file repo_count.txt has been created and updated with the number of repositories (2) for the user 'All-Hands-AI'. +If you have any further requests, feel free to ask. Otherwise, I will consider the task complete. + + +--- END OF EXAMPLE 1 --- + +--- START OF EXAMPLE 2 --- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. There's an existing Python file `app.py` that you can use. + +ASSISTANT: Let's check the existing Python file `app.py`: + +open_file('app.py') + + +USER: +OBSERVATION: +[File: /workspace/app.py (8 lines total)] +(this is the beginning of the file) +1|from flask import Flask +2|app = Flask(__name__) +3| +4|@app.route('/') +5|def index(): +6| numbers = list(range(1, 11)) +7| return str(numbers) +8| +(this is the end of the file) + +ASSISTANT: +Great! Seems like the existing Python file `app.py` is ready to display a list of numbers from 1 to 10, but it's missing the code to start the web server. Let me create a plan to achieve the task: + +# Phases +## Phase 1 +- reason: The existing Python file is missing the code to start the web server. Normally, the server is started using the `app.run(port=5000)` command. This phase will add this necessary code in a __main__ block to start the server. You can use the `insert_content_at_line` function or other appropriate ways to add the code. +- description: Edit the existing Python file `app.py` to insert the code for starting a web server, at the end of the file. +- expected_state: The Python file `app.py` is updated with the code to start the web server. + /workspace/app.py + +8| +9|if __name__ == '__main__': +10| app.run(port=5000) + +## Phase 2 +- reason: After updating the Python file, we need to run it to display the list of numbers on a web page. The command a shell command can be: `python3 app.py`. +- description: Run the Python file `app.py` to display the list of numbers on a web page at port 5000. +- expected_state: The list of numbers is displayed on a web page at port 5000. + + +USER: +OBSERVATION: +LocalExecutorAgent finishes task with content: The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. The tasks are now complete. +Summary: +- Edited the existing Python file `app.py` to insert the code for starting a web server. +- Ran the Python file to display the list of numbers on a web page at port 5000. + +ASSISTANT: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now browse the newly started server's homepage and show me the content. + +ASSISTANT: +Sure, let me create a plan to achieve the task: + +# Phases +## Phase 1 +- reason: To verify that the server is running and displaying the list of numbers correctly, we need to browse the server's homepage. We can use the `execute_browse` function to access the URL. +- description: Browse the server's homepage at http://127.0.0.1:5000 +- expected_state: The content of the server's homepage is retrieved. + + +USER: +OBSERVATION: +LocalExecutorAgent finishes task with content: The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". The tasks are now complete. +Summary: +- Browsed the server's homepage at http://127.0.0.1:5000 and retrieved the content "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". + +ASSISTANT: +The content of the server's homepage is "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]". If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +ASSISTANT: +Sure! Let me create a plan to achieve the task: + +# Phases +## Phase 1 +- reason: The server needs to be stopped before modifying the Python code. +- description: Stop the server running on port 5000. +- expected_state: The server is stopped. +## Phase 2 +- reason: The current code returns the numbers as a string, which is incorrect. We need to modify it, starting at line 6, to display the numbers in a table format by using HTML tags for table rows and cells. +- description: Modify the Python code to display the numbers in a table format. +- expected_state: The Python code is updated to display the numbers in a table format. + /workspace/app.py + +6| numbers = list(range(1, 11)) +7| return "" + "".join([f"" for num in numbers]) + "
{num}
" +
+## Phase 3 +- reason: After modifying the Python code, we need to run the file again to display the numbers in the expected table format. +- description: Run the Python file again to display the numbers in a table format on the web page at port 5000. +- expected_state: The numbers are displayed in a table format on the web page. +
+ +USER: +OBSERVATION: +LocalExecutorAgent finishes task with content: The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. The tasks are now complete. +Summary: +- Stopped the server running on port 5000. +- Modified the Python code to display the numbers in a table format. +- Ran the Python file again to display the numbers in a table format on the web page at port 5000. + +ASSISTANT: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + + +--- END OF EXAMPLE 2 --- +{% endset %} +Here is an example of how you can interact with the environment for task solving: +{{ DEFAULT_EXAMPLE }} +{% if micro_agent %} +--- BEGIN OF GUIDELINE --- +The following information may assist you in completing your task: + +{{ micro_agent }} +--- END OF GUIDELINE --- +{% endif %} + +NOW, LET'S START! diff --git a/agenthub/codeact_agent/action_parser.py b/agenthub/codeact_agent/action_parser.py index 4bfdb63ca4e0..2c1717e86f30 100644 --- a/agenthub/codeact_agent/action_parser.py +++ b/agenthub/codeact_agent/action_parser.py @@ -74,7 +74,7 @@ def parse(self, action_str: str) -> Action: self.finish_command is not None ), 'self.finish_command should not be None when parse is called' thought = action_str.replace(self.finish_command.group(0), '').strip() - return AgentFinishAction(thought=thought) + return AgentFinishAction(thought=thought, outputs={'content': thought}) class CodeActActionParserCmdRun(ActionParser): @@ -159,7 +159,9 @@ def parse(self, action_str: str) -> Action: thought = action_str.replace(self.agent_delegate.group(0), '').strip() browse_actions = self.agent_delegate.group(1).strip() task = f'{thought}. I should start with: {browse_actions}' - return AgentDelegateAction(agent='BrowsingAgent', inputs={'task': task}) + return AgentDelegateAction( + agent='BrowsingAgent', inputs={'task': task}, action_suffix='browse' + ) class CodeActActionParserMessage(ActionParser): diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py index d384ae568232..f3df700e315c 100644 --- a/agenthub/codeact_agent/codeact_agent.py +++ b/agenthub/codeact_agent/codeact_agent.py @@ -92,6 +92,12 @@ def __init__( agent_skills_docs=AgentSkillsRequirement.documentation, micro_agent=self.micro_agent, ) + self.stop_sequences = [ + '
', + '
', + '
', + ] + self.initial_task_str = [''] def action_to_str(self, action: Action) -> str: if isinstance(action, CmdRunAction): @@ -101,7 +107,7 @@ def action_to_str(self, action: Action) -> str: elif isinstance(action, IPythonRunCellAction): return f'{action.thought}\n\n{action.code}\n' elif isinstance(action, AgentDelegateAction): - return f'{action.thought}\n\n{action.inputs["task"]}\n' + return f'{action.thought}\n\n{action.inputs["task"]}\n' elif isinstance(action, MessageAction): return action.content elif isinstance(action, AgentFinishAction) and action.source == 'agent': @@ -197,11 +203,7 @@ def step(self, state: State) -> Action: messages = self._get_messages(state) params = { 'messages': self.llm.format_messages_for_llm(messages), - 'stop': [ - '
', - '
', - '', - ], + 'stop': self.stop_sequences, } response = self.llm.completion(**params) @@ -209,6 +211,15 @@ def step(self, state: State) -> Action: return self.action_parser.parse(response) def _get_messages(self, state: State) -> list[Message]: + delegated_task = state.inputs.get('task') + if delegated_task is not None: + # CodeActAgent is delegated a task + delegated_task = str(delegated_task) + self.initial_task_str[0] = delegated_task + delegated_task = '\n' + delegated_task + else: + delegated_task = '' + messages: list[Message] = [ Message( role='system', @@ -223,7 +234,7 @@ def _get_messages(self, state: State) -> list[Message]: role='user', content=[ TextContent( - text=self.prompt_manager.initial_user_message, + text=self.prompt_manager.initial_user_message + delegated_task, cache_prompt=self.llm.is_caching_prompt_active(), # if the user asks the same query, ) ], @@ -239,6 +250,10 @@ def _get_messages(self, state: State) -> list[Message]: else: raise ValueError(f'Unknown event type: {type(event)}') + if message and message.role == 'user' and not self.initial_task_str[0]: + # first user message + self.initial_task_str[0] = message.content[0].text + # add regular message if message: # handle error if the message is the SAME role as the previous message diff --git a/evaluation/swe_bench/run_infer.py b/evaluation/swe_bench/run_infer.py index 62333662c583..bac809e3df2b 100644 --- a/evaluation/swe_bench/run_infer.py +++ b/evaluation/swe_bench/run_infer.py @@ -40,11 +40,14 @@ AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, 'CodeActSWEAgent': codeact_user_response, + 'CoActPlannerAgent': codeact_user_response, } +codeact_inst_suffix = 'When you think you have fixed the issue through code changes, please run the following command: exit .\n' AGENT_CLS_TO_INST_SUFFIX = { - 'CodeActAgent': 'When you think you have fixed the issue through code changes, please run the following command: exit .\n', - 'CodeActSWEAgent': 'When you think you have fixed the issue through code changes, please run the following command: exit .\n', + 'CodeActAgent': codeact_inst_suffix, + 'CodeActSWEAgent': codeact_inst_suffix, + 'CoActPlannerAgent': codeact_inst_suffix, } @@ -67,6 +70,28 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata): f'--- BEGIN HINTS ---\n{instance.hints_text}\n--- END HINTS ---\n' ) instruction += CODEACT_SWE_PROMPT.format(workspace_dir_name=workspace_dir_name) + elif metadata.agent_class == 'CoActPlannerAgent': + instruction = ( + 'We are currently solving the following issue within our repository. Here is the issue text:\n' + '--- BEGIN ISSUE ---\n' + f'{instance.problem_statement}\n' + '--- END ISSUE ---\n\n' + ) + instruction += """ +Let's come up with 2 global plans sequentially to address the issue. +- First, examine the codebase and locate the relevant code for the issue. Then we'll come up with the FIRST detailed plan with all the edits to resolve it. +- After the local agent finishes executing the first plan, navigate the codebase again and come up with the SECOND detailed plan to create exactly ONE unit test at the correct location to verify the change has actually resolved the issue. As the LAST phase, explicitly tell the executor to execute it after creating that test case. If the test failed and after debugging it the local executor believes the previous fixes are incorrect, request for a new plan and include the error with explanation for that request. +- If the local agent fails to execute the test you suggested and request for a new plan, examine the +codebase again and come up with the last third plan to fix the issue and unit test. +""" + instruction += ( + 'IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.\n' + 'You only have 3 plans, so use them wisely.\n' + 'Do NOT try to perform the fix for the failed test by yourself as a planner agent, use the third plan instead.\n' + 'Remember to ONLY delegate the plan after you find out the proper positions to make the code changes.\n' + 'You should NOT modify any existing test case files. If needed, you can add new test cases in a NEW file to reproduce the issue.\n' + 'You SHOULD INCLUDE PROPER INDENTATION in your edit commands.\n' + ) else: # Testing general agents instruction = ( diff --git a/frontend/src/components/Workspace.tsx b/frontend/src/components/Workspace.tsx index 4d4d5ecf5cf1..3d9e1f530802 100644 --- a/frontend/src/components/Workspace.tsx +++ b/frontend/src/components/Workspace.tsx @@ -25,6 +25,8 @@ function Workspace() { const extraTabsMap: { [key: string]: TabOption[] } = { CodeActAgent: [TabOption.JUPYTER], PlannerAgent: [TabOption.PLANNER], + CoActPlannerAgent: [TabOption.JUPYTER], + CoActExecutorAgent: [TabOption.JUPYTER], }; const extraTabs = extraTabsMap[AGENT] || []; const showTabs = [...baseTabs, ...extraTabs]; diff --git a/openhands/events/action/agent.py b/openhands/events/action/agent.py index f49f573ed698..d645df7e0706 100644 --- a/openhands/events/action/agent.py +++ b/openhands/events/action/agent.py @@ -74,6 +74,7 @@ class AgentDelegateAction(Action): inputs: dict thought: str = '' action: str = ActionType.DELEGATE + action_suffix: str = '' @property def message(self) -> str: diff --git a/openhands/runtime/plugins/agent_skills/agentskills.py b/openhands/runtime/plugins/agent_skills/agentskills.py index dd34e3878d8f..4d02162e7f2a 100644 --- a/openhands/runtime/plugins/agent_skills/agentskills.py +++ b/openhands/runtime/plugins/agent_skills/agentskills.py @@ -12,6 +12,7 @@ __all__ = file_ops.__all__ + file_reader.__all__ DOCUMENTATION = '' +DOCUMENTATION_DICT = {} for func_name in __all__: func = globals()[func_name] @@ -23,3 +24,4 @@ fn_signature = f'{func.__name__}' + str(signature(func)) DOCUMENTATION += f'{fn_signature}:\n{cur_doc}\n\n' + DOCUMENTATION_DICT[func_name] = f'{fn_signature}:\n{cur_doc}\n\n' diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py index 8b9dd81e8bfd..3d2995dbff9c 100644 --- a/openhands/utils/prompt.py +++ b/openhands/utils/prompt.py @@ -24,6 +24,8 @@ def __init__( prompt_dir: str, agent_skills_docs: str, micro_agent: MicroAgent | None = None, + user_extra_vars: dict | None = None, # Additional user template variables + system_extra_vars: dict | None = None, # Additional system template variables ): self.prompt_dir: str = prompt_dir self.agent_skills_docs: str = agent_skills_docs @@ -32,6 +34,9 @@ def __init__( self.user_template: Template = self._load_template('user_prompt') self.micro_agent: MicroAgent | None = micro_agent + self.user_extra_vars = user_extra_vars or {} + self.system_extra_vars = system_extra_vars or {} + def _load_template(self, template_name: str) -> Template: template_path = os.path.join(self.prompt_dir, f'{template_name}.j2') if not os.path.exists(template_path): @@ -43,6 +48,7 @@ def _load_template(self, template_name: str) -> Template: def system_message(self) -> str: rendered = self.system_template.render( agent_skills_docs=self.agent_skills_docs, + **self.system_extra_vars, ).strip() return rendered @@ -58,6 +64,7 @@ def initial_user_message(self) -> str: into a more specialized agent that is tailored to the user's task. """ rendered = self.user_template.render( - micro_agent=self.micro_agent.content if self.micro_agent else None + micro_agent=self.micro_agent.content if self.micro_agent else None, + **self.user_extra_vars, ) return rendered.strip() diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log index d0d2de3ab32e..16d1e7fbd3b2 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_002.log @@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] +[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@opendevin-vm:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log index 65df2e8c13ac..1df092592a08 100644 --- a/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/DelegatorAgent/test_edits/prompt_003.log @@ -37,7 +37,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}] +[{"source": "agent", "action": "run", "args": {"command": "ls", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "bad.txt\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@opendevin-vm:/workspace $ ", "extras": {"command_id": -1, "command": "ls", "exit_code": 0}}, {"source": "agent", "action": "read", "args": {"path": "bad.txt", "start": 0, "end": -1, "thought": ""}}, {"source": "agent", "observation": "read", "content": "This is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n", "extras": {"path": "/workspace/bad.txt"}}] ## Format Your response MUST be in JSON format. It must be an object, and it must contain two fields: diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_007.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_007.log index 1da481be87ce..dee1bb9b0dfd 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_007.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_edits/prompt_007.log @@ -66,7 +66,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: Typos fixed in 'bad.txt':\n1. 'typoo' changed to 'typo'\n2. 'mor' changed to 'more'\n\nOriginal content:\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nFixed content:\nThis is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "extras": {"outputs": {"summary": "Typos fixed in 'bad.txt':\n1. 'typoo' changed to 'typo'\n2. 'mor' changed to 'more'\n\nOriginal content:\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nFixed content:\nThis is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"}}}] +[{"source": "user", "action": "message", "args": {"content": "Fix typos in bad.txt. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "TypoFixerAgent", "inputs": {"task": "Fix typos in bad.txt"}, "thought": "", "action_suffix": ""}}, {"source": "agent", "observation": "delegate", "content": "TypoFixerAgent finishes task with summary: Typos fixed in 'bad.txt':\n1. 'typoo' changed to 'typo'\n2. 'mor' changed to 'more'\n\nOriginal content:\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nFixed content:\nThis is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n", "extras": {"outputs": {"summary": "Typos fixed in 'bad.txt':\n1. 'typoo' changed to 'typo'\n2. 'mor' changed to 'more'\n\nOriginal content:\nThis is a stupid typoo.\nReally?\nNo mor typos!\nEnjoy!\n\nFixed content:\nThis is a stupid typo.\nReally?\nNo more typos!\nEnjoy!\n"}}}] If the last item in the history is an error, you should try to fix it. If you cannot fix it, call the `reject` action. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log index d4affba2f202..38cce237bc05 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_003.log @@ -24,7 +24,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@docker-desktop:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}] +[{"source": "agent", "action": "run", "args": {"command": "git status", "thought": "", "blocking": false, "keep_prompt": true, "is_confirmed": "confirmed"}}, {"source": "agent", "observation": "run", "content": "fatal: not a git repository (or any parent up to mount point /)\r\nStopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).\r\n\r\n[Python Interpreter: /openhands/poetry/openhands-ai-5O4_aCHf-py3.11/bin/python]\nopenhands@opendevin-vm:/workspace $ ", "extras": {"command_id": -1, "command": "git status", "exit_code": 128}}] If the last item in the history is an error, you should try to fix it. diff --git a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log index 7aaaa838d7ee..4236bf520125 100644 --- a/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log +++ b/tests/integration/mock/eventstream_runtime/ManagerAgent/test_simple_task_rejection/prompt_004.log @@ -66,7 +66,7 @@ as well as observations you've made. This only includes the MOST RECENT actions and observations--more may have happened before that. They are time-ordered, with your most recent action at the bottom. -[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: This is not a valid git repository. Unable to proceed with analyzing staged changes and generating a commit message.", "extras": {"outputs": {"reason": "This is not a valid git repository. Unable to proceed with analyzing staged changes and generating a commit message."}}}] +[{"source": "user", "action": "message", "args": {"content": "Write a git commit message for the current staging area. Do not ask me for confirmation at any point.", "images_urls": null, "wait_for_response": false}}, {"source": "agent", "action": "delegate", "args": {"agent": "CommitWriterAgent", "inputs": {}, "thought": "", "action_suffix": ""}}, {"source": "agent", "observation": "delegate", "content": "CommitWriterAgent finishes task with reason: This is not a valid git repository. Unable to proceed with analyzing staged changes and generating a commit message.", "extras": {"outputs": {"reason": "This is not a valid git repository. Unable to proceed with analyzing staged changes and generating a commit message."}}}] If the last item in the history is an error, you should try to fix it. If you cannot fix it, call the `reject` action. diff --git a/tests/unit/test_prompt_caching.py b/tests/unit/test_prompt_caching.py index 7acf01413096..54a72da89665 100644 --- a/tests/unit/test_prompt_caching.py +++ b/tests/unit/test_prompt_caching.py @@ -41,7 +41,7 @@ def test_get_messages_with_reminder(codeact_agent, mock_event_stream): codeact_agent.reset() messages = codeact_agent._get_messages( - Mock(history=mock_event_stream, max_iterations=5, iteration=0) + Mock(history=mock_event_stream, max_iterations=5, iteration=0, inputs=dict()) ) assert ( @@ -83,7 +83,7 @@ def test_get_messages_prompt_caching(codeact_agent, mock_event_stream): codeact_agent.reset() messages = codeact_agent._get_messages( - Mock(history=mock_event_stream, max_iterations=10, iteration=5) + Mock(history=mock_event_stream, max_iterations=10, iteration=5, inputs=dict()) ) # Check that only the last two user messages have cache_prompt=True @@ -138,7 +138,7 @@ def test_get_messages_with_cmd_action(codeact_agent, mock_event_stream): codeact_agent.reset() messages = codeact_agent._get_messages( - Mock(history=mock_event_stream, max_iterations=5, iteration=0) + Mock(history=mock_event_stream, max_iterations=5, iteration=0, inputs=dict()) ) # Assert the presence of key elements in the messages @@ -192,6 +192,7 @@ def test_prompt_caching_headers(codeact_agent, mock_event_stream): mock_state.history = mock_short_term_history mock_state.max_iterations = 5 mock_state.iteration = 0 + mock_state.inputs = dict() codeact_agent.reset() diff --git a/tests/unit/test_security.py b/tests/unit/test_security.py index f4c0503f5818..ffe7e37a74ea 100644 --- a/tests/unit/test_security.py +++ b/tests/unit/test_security.py @@ -250,6 +250,7 @@ def test_unsafe_bash_command(temp_dir: str): arguments={ 'agent': 'VerifierAgent', 'inputs': {'task': 'verify this task'}, + 'action_suffix': '', }, ), ),