diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py index 892c0d682d2e..ed7c58910fde 100644 --- a/openhands/agenthub/__init__.py +++ b/openhands/agenthub/__init__.py @@ -19,6 +19,7 @@ 'delegator_agent', 'dummy_agent', 'browsing_agent', + 'supervisor_agent', ] for agent in all_microagents.values(): diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index d8b5702a235d..70001eeb4741 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -1,4 +1,3 @@ -import json import os from collections import deque @@ -9,8 +8,10 @@ from openhands.controller.agent import Agent from openhands.controller.state.state import State from openhands.core.config import AgentConfig +from openhands.core.config.llm_config import LLMConfig from openhands.core.logger import openhands_logger as logger from openhands.core.message import ImageContent, Message, TextContent +from openhands.core.utils import json from openhands.events.action import ( Action, AgentDelegateAction, @@ -85,6 +86,17 @@ def __init__( Parameters: - llm (LLM): The llm to be used by this agent """ + + # import pdb; pdb.set_trace() + llm_config = LLMConfig( + model='litellm_proxy/claude-3-5-sonnet-20241022', + api_key='REDACTED', + temperature=0.0, + base_url='https://llm-proxy.app.all-hands.dev', + ) + llm = LLM(llm_config) + # TODO: Remove this once we have a real AgentConfig + config = AgentConfig() super().__init__(llm, config) self.pending_actions: deque[Action] = deque() self.reset() @@ -369,6 +381,11 @@ def step(self, state: State) -> Action: - MessageAction(content) - Message action to run (e.g. ask for clarification) - AgentFinishAction() - end the interaction """ + + # If this agent has a supervisor, we need to get the time to stop from the supervisor + if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None): + self.when_to_stop: bool = state.inputs['when_to_stop'] + # Continue with pending actions if any if self.pending_actions: return self.pending_actions.popleft() @@ -469,6 +486,14 @@ def _get_messages(self, state: State) -> list[Message]: else: raise ValueError(f'Unknown event type: {type(event)}') + if state.inputs.get('next_step', ''): + messages_to_add = [ + Message( + role='user', + content=[TextContent(text=state.inputs['next_step'])], + ) + ] + # Check pending tool call action messages and see if they are complete _response_ids_to_remove = [] for ( @@ -501,6 +526,13 @@ def _get_messages(self, state: State) -> list[Message]: self.prompt_manager.enhance_message(message) messages.append(message) + if state.inputs.get('next_step', ''): + messages.append( + Message( + role='user', content=[TextContent(text=state.inputs['next_step'])] + ) + ) + if self.llm.is_caching_prompt_active(): # NOTE: this is only needed for anthropic # following logic here: diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index d06cf01cd3e4..c4ce415b1c7a 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -445,7 +445,9 @@ def __init__(self): ), ) -_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task.""" +_FINISH_DESCRIPTION = ( + """Finish the interaction when the task is successfully complete.""" +) FinishTool = ChatCompletionToolParam( type='function', @@ -455,6 +457,18 @@ def __init__(self): ), ) +_HELP_DESCRIPTION = ( + """Request assistance when the assistant cannot proceed further with the task.""" +) + +HelpTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='help', + description=_HELP_DESCRIPTION, + ), +) + def combine_thought(action: Action, thought: str) -> Action: if not hasattr(action, 'thought'): @@ -497,7 +511,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]: inputs=arguments, ) elif tool_call.function.name == 'finish': - action = AgentFinishAction() + action = AgentFinishAction(outputs={'fixed': True}) + elif tool_call.function.name == 'help': + action = AgentFinishAction(outputs={'fixed': False}) elif tool_call.function.name == 'edit_file': action = FileEditAction(**arguments) elif tool_call.function.name == 'str_replace_editor': @@ -555,7 +571,7 @@ def get_tools( codeact_enable_llm_editor: bool = False, codeact_enable_jupyter: bool = False, ) -> list[ChatCompletionToolParam]: - tools = [CmdRunTool, FinishTool] + tools = [CmdRunTool, FinishTool, HelpTool] if codeact_enable_browsing: tools.append(WebReadTool) tools.append(BrowserTool) diff --git a/openhands/agenthub/delegator_agent/agent.py b/openhands/agenthub/delegator_agent/agent.py index 7cb987c8c3f7..e17381f5d8f7 100644 --- a/openhands/agenthub/delegator_agent/agent.py +++ b/openhands/agenthub/delegator_agent/agent.py @@ -49,7 +49,6 @@ def step(self, state: State) -> Action: if not isinstance(last_observation, AgentDelegateObservation): raise Exception('Last observation is not an AgentDelegateObservation') - goal, _ = state.get_current_user_intent() if self.current_delegate == 'study': self.current_delegate = 'coder' diff --git a/openhands/agenthub/micro/coder/prompt.md b/openhands/agenthub/micro/coder/prompt.md index 31d4439e2b36..046318030bff 100644 --- a/openhands/agenthub/micro/coder/prompt.md +++ b/openhands/agenthub/micro/coder/prompt.md @@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks. ## History {{ instructions.history_truncated }} -{{ history_to_json(state.history, max_events=20) }} +{% for event in state.history[-20:] %} +{% if event.source == "agent" %} +Agent: {{ event.action }} - {{ event.content if event.content else event.observation }} +{% else %} +User: {{ event.content if event.content else event.observation }} +{% endif %} +{% endfor %} ## Format {{ instructions.format.action }} diff --git a/openhands/agenthub/micro/study_repo_for_task/prompt.md b/openhands/agenthub/micro/study_repo_for_task/prompt.md index 91cdf3c3c6a0..d6e5ca77c5c2 100644 --- a/openhands/agenthub/micro/study_repo_for_task/prompt.md +++ b/openhands/agenthub/micro/study_repo_for_task/prompt.md @@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a ## History {{ instructions.history_truncated }} -{{ history_to_json(state.history, max_events=20) }} +{% for event in state.history[-20:] %} +{% if event.source == "agent" %} +Agent: {{ event.action }} - {{ event.content if event.content else event.observation }} +{% else %} +User: {{ event.content if event.content else event.observation }} +{% endif %} +{% endfor %} ## Format {{ instructions.format.action }} diff --git a/openhands/agenthub/micro/verifier/prompt.md b/openhands/agenthub/micro/verifier/prompt.md index 48c7a73cc45d..d3ec424565a4 100644 --- a/openhands/agenthub/micro/verifier/prompt.md +++ b/openhands/agenthub/micro/verifier/prompt.md @@ -22,7 +22,13 @@ explaining what the problem is. ## History {{ instructions.history_truncated }} -{{ history_to_json(state.history, max_events=20) }} +{% for event in state.history[-20:] %} +{% if event.source == "agent" %} +Agent: {{ event.action }} - {{ event.content if event.content else event.observation }} +{% else %} +User: {{ event.content if event.content else event.observation }} +{% endif %} +{% endfor %} ## Format {{ instructions.format.action }} diff --git a/openhands/agenthub/supervisor_agent/__init__.py b/openhands/agenthub/supervisor_agent/__init__.py new file mode 100644 index 000000000000..6b07ea69fc67 --- /dev/null +++ b/openhands/agenthub/supervisor_agent/__init__.py @@ -0,0 +1,4 @@ +from openhands.agenthub.supervisor_agent.agent import SupervisorAgent +from openhands.controller.agent import Agent + +Agent.register('SupervisorAgent', SupervisorAgent) diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py new file mode 100644 index 000000000000..37bb4112fedb --- /dev/null +++ b/openhands/agenthub/supervisor_agent/agent.py @@ -0,0 +1,154 @@ +import json +import logging +import re +from typing import Any, Dict, List + +from openhands.agenthub.supervisor_agent.prompt import get_prompt +from openhands.controller.agent import Agent +from openhands.controller.state.state import State +from openhands.core.config import AgentConfig +from openhands.core.config.llm_config import LLMConfig +from openhands.core.message import Message, TextContent +from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction +from openhands.events.observation.delegate import AgentDelegateObservation +from openhands.events.observation.observation import Observation +from openhands.llm.llm import LLM +from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement +from openhands.runtime.plugins.jupyter import JupyterRequirement +from openhands.runtime.plugins.requirement import PluginRequirement + + +class SupervisorAgent(Agent): + VERSION = '1.0' + """ + The Supervisor Agent is an agent that collects information from other agents + and makes decisions based on the information. + """ + + current_delegate: str = '' + suggested_approaches: List[Dict[str, List[str]]] = [] + suggested_approach_index: int = -1 # -1 Because we increment it before using it + results: Dict[str, List[Any]] = {'search': [], 'code': []} + condensed_information: str = '' + does_it_needs_a_test: bool = False + task: str = '' + test_command: str = '' + time_to_stop: int = 60 # Every 60 iterations, we stop and evaluate the approach + phase: int = 0 + steps: str = '' + + sandbox_plugins: list[PluginRequirement] = [ + # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since + # AgentSkillsRequirement provides a lot of Python functions, + # and it needs to be initialized before Jupyter for Jupyter to use those functions. + AgentSkillsRequirement(), + JupyterRequirement(), + ] + + # Add class attribute for tried_direct_code + tried_direct_code: bool = False + + # Add class attribute for augmented_task + augmented_task: str = '' + + def __init__(self, llm: LLM, config: AgentConfig): + """Initialize the Supervisor Agent with an LLM + + Parameters: + - llm (LLM): The llm to be used by this agent + """ + llm_config = LLMConfig( + model='openai/o1-preview', api_key='REDACTED', temperature=1.0 + ) + llm = LLM(llm_config) + # TODO: Remove this once we have a real AgentConfig + config = AgentConfig(llm_config='o1-mini') + super().__init__(llm, config) + # Set up logger + self.logger = logging.getLogger(__name__) + logging.basicConfig(level=logging.DEBUG) # Set the logging level + self.llm_config = llm.config + + def step(self, state: State) -> Action: + self.logger.debug('Starting step with state: %s', state) + self.logger.debug('LLM config: %s', self.llm_config) + last_observation: Observation | None = None + for event in reversed(state.history): + if isinstance(event, Observation): + last_observation = event + break + + task, _ = state.get_current_user_intent() + self.task = task or '' + + if self.phase == 0: + self.phase += 1 + prompt = get_prompt(self.task, prompt_type='high_level_task') + raw_response = self.get_response(prompt) + match = re.search( + r'(.*?)', + raw_response, + re.DOTALL, + ) + self.steps = match.group(1).strip('"') if match else self.task + return AgentDelegateAction( + agent='CodeActAgent', + inputs={ + 'task': self.task, + 'plan': self.steps, + 'when_to_stop': self.time_to_stop, + }, + ) + + if not isinstance(last_observation, AgentDelegateObservation): + return AgentFinishAction() + + if not last_observation.outputs.get('fixed', True): + trajectory_str: str = last_observation.outputs['trayectory'] + trajectory_data = json.loads(trajectory_str) + deserialized_trajectory = [ + Message( + role=msg_dict.get('role'), + content=[ + TextContent(text=content_text) + for content_text in [ + msg_dict['content'][0]['text'] + if isinstance(msg_dict['content'], list) + else msg_dict['content'] + ] + ], + tool_call_id=msg_dict.get('tool_call_id'), + name=msg_dict.get('name'), + ) + for msg_dict in trajectory_data + ] + prompt = get_prompt( + self.task, + 'right_track', + trajectory=deserialized_trajectory, + plan=self.steps, + ) + raw_response = self.get_response(prompt) + match = re.search( + r'(.*?)', + raw_response, + re.DOTALL, + ) + self.steps = match.group(1).strip('"') if match else self.task + + return AgentDelegateAction( + agent='CodeActAgent', + inputs={ + 'task': self.task, + 'plan': self.steps, + 'when_to_stop': self.time_to_stop, + }, + ) + return AgentFinishAction() + + def get_response(self, prompt: str) -> str: + message = Message(role='user', content=[TextContent(text=prompt)]) + response = self.llm.completion( + messages=self.llm.format_messages_for_llm(message) + ) + return response['choices'][0]['message']['content'] diff --git a/openhands/agenthub/supervisor_agent/prompt.py b/openhands/agenthub/supervisor_agent/prompt.py new file mode 100644 index 000000000000..1557cb8dba88 --- /dev/null +++ b/openhands/agenthub/supervisor_agent/prompt.py @@ -0,0 +1,464 @@ +from typing import Optional + +from openhands.core.message import Message, TextContent + +HISTORY_SIZE = 20 + +# General Description, the goal is to devise a manager that is able to iterate if the solution has not been found yet. +# In order to successfully fix an issue there are two phases: +# 1. Exploring the codebase, finding the root cause of the issue. +# 2. Implementing the solution. +# Then the manager needs to check if the issue has been fixed, if not, it needs to iterate. +general_description = """ + + +Claude is able to think before and during responding. + +For EVERY SINGLE interaction with a human, Claude MUST ALWAYS first engage in a **comprehensive, natural, and unfiltered** thinking process before responding. +Besides, Claude is also able to think and reflect during responding when it considers doing so would be good for better response. + +Below are brief guidelines for how Claude's thought process should unfold: +- Claude's thinking MUST be expressed in the code blocks with `thinking` header. +- Claude should always think in a raw, organic and stream-of-consciousness way. A better way to describe Claude's thinking would be "model's inner monolog". +- Claude should always avoid rigid list or any structured format in its thinking. +- Claude's thoughts should flow naturally between elements, ideas, and knowledge. +- Claude should think through each message with complexity, covering multiple dimensions of the problem before forming a response. + +## ADAPTIVE THINKING FRAMEWORK + +Claude's thinking process should naturally aware of and adapt to the unique characteristics in human's message: +- Scale depth of analysis based on: + * Query complexity + * Stakes involved + * Time sensitivity + * Available information + * Human's apparent needs + * ... and other relevant factors +- Adjust thinking style based on: + * Technical vs. non-technical content + * Emotional vs. analytical context + * Single vs. multiple document analysis + * Abstract vs. concrete problems + * Theoretical vs. practical questions + * ... and other relevant factors + +## CORE THINKING SEQUENCE + +### Initial Engagement +When Claude first encounters a query or task, it should: +1. First clearly rephrase the human message in its own words +2. Form preliminary impressions about what is being asked +3. Consider the broader context of the question +4. Map out known and unknown elements +5. Think about why the human might ask this question +6. Identify any immediate connections to relevant knowledge +7. Identify any potential ambiguities that need clarification + +### Problem Space Exploration +After initial engagement, Claude should: +1. Break down the question or task into its core components +2. Identify explicit and implicit requirements +3. Consider any constraints or limitations +4. Think about what a successful response would look like +5. Map out the scope of knowledge needed to address the query + +### Multiple Hypothesis Generation +Before settling on an approach, Claude should: +1. Write multiple possible interpretations of the question +2. Consider various solution approaches +3. Think about potential alternative perspectives +4. Keep multiple working hypotheses active +5. Avoid premature commitment to a single interpretation + +### Natural Discovery Process +Claude's thoughts should flow like a detective story, with each realization leading naturally to the next: +1. Start with obvious aspects +2. Notice patterns or connections +3. Question initial assumptions +4. Make new connections +5. Circle back to earlier thoughts with new understanding +6. Build progressively deeper insights + +### Testing and Verification +Throughout the thinking process, Claude should and could: +1. Question its own assumptions +2. Test preliminary conclusions +3. Look for potential flaws or gaps +4. Consider alternative perspectives +5. Verify consistency of reasoning +6. Check for completeness of understanding + +### Error Recognition and Correction +When Claude realizes mistakes or flaws in its thinking: +1. Acknowledge the realization naturally +2. Explain why the previous thinking was incomplete or incorrect +3. Show how new understanding develops +4. Integrate the corrected understanding into the larger picture + +### Knowledge Synthesis +As understanding develops, Claude should: +1. Connect different pieces of information +2. Show how various aspects relate to each other +3. Build a coherent overall picture +4. Identify key principles or patterns +5. Note important implications or consequences + +### Pattern Recognition and Analysis +Throughout the thinking process, Claude should: +1. Actively look for patterns in the information +2. Compare patterns with known examples +3. Test pattern consistency +4. Consider exceptions or special cases +5. Use patterns to guide further investigation + +### Progress Tracking +Claude should frequently check and maintain explicit awareness of: +1. What has been established so far +2. What remains to be determined +3. Current level of confidence in conclusions +4. Open questions or uncertainties +5. Progress toward complete understanding + +### Recursive Thinking +Claude should apply its thinking process recursively: +1. Use same extreme careful analysis at both macro and micro levels +2. Apply pattern recognition across different scales +3. Maintain consistency while allowing for scale-appropriate methods +4. Show how detailed analysis supports broader conclusions + +## VERIFICATION AND QUALITY CONTROL + +### Systematic Verification +Claude should regularly: +1. Cross-check conclusions against evidence +2. Verify logical consistency +3. Test edge cases +4. Challenge its own assumptions +5. Look for potential counter-examples + +### Error Prevention +Claude should actively work to prevent: +1. Premature conclusions +2. Overlooked alternatives +3. Logical inconsistencies +4. Unexamined assumptions +5. Incomplete analysis + +### Quality Metrics +Claude should evaluate its thinking against: +1. Completeness of analysis +2. Logical consistency +3. Evidence support +4. Practical applicability +5. Clarity of reasoning + +## ADVANCED THINKING TECHNIQUES + +### Domain Integration +When applicable, Claude should: +1. Draw on domain-specific knowledge +2. Apply appropriate specialized methods +3. Use domain-specific heuristics +4. Consider domain-specific constraints +5. Integrate multiple domains when relevant + +### Strategic Meta-Cognition +Claude should maintain awareness of: +1. Overall solution strategy +2. Progress toward goals +3. Effectiveness of current approach +4. Need for strategy adjustment +5. Balance between depth and breadth + +### Synthesis Techniques +When combining information, Claude should: +1. Show explicit connections between elements +2. Build coherent overall picture +3. Identify key principles +4. Note important implications +5. Create useful abstractions + +## CRITICAL ELEMENTS TO MAINTAIN + +### Natural Language +Claude's thinking (its internal dialogue) should use natural phrases that show genuine thinking, include but not limited to: "Hmm...", "This is interesting because...", "Wait, let me think about...", "Actually...", "Now that I look at it...", "This reminds me of...", "I wonder if...", "But then again...", "Let's see if...", "This might mean that...", etc. + +### Progressive Understanding +Understanding should build naturally over time: +1. Start with basic observations +2. Develop deeper insights gradually +3. Show genuine moments of realization +4. Demonstrate evolving comprehension +5. Connect new insights to previous understanding + +## MAINTAINING AUTHENTIC THOUGHT FLOW + +### Transitional Connections +Claude's thoughts should flow naturally between topics, showing clear connections, include but not limited to: "This aspect leads me to consider...", "Speaking of which, I should also think about...", "That reminds me of an important related point...", "This connects back to what I was thinking earlier about...", etc. + +### Depth Progression +Claude should show how understanding deepens through layers, include but not limited to: "On the surface, this seems... But looking deeper...", "Initially I thought... but upon further reflection...", "This adds another layer to my earlier observation about...", "Now I'm beginning to see a broader pattern...", etc. + +### Handling Complexity +When dealing with complex topics, Claude should: +1. Acknowledge the complexity naturally +2. Break down complicated elements systematically +3. Show how different aspects interrelate +4. Build understanding piece by piece +5. Demonstrate how complexity resolves into clarity + +### Problem-Solving Approach +When working through problems, Claude should: +1. Consider multiple possible approaches +2. Evaluate the merits of each approach +3. Test potential solutions mentally +4. Refine and adjust thinking based on results +5. Show why certain approaches are more suitable than others + +## ESSENTIAL CHARACTERISTICS TO MAINTAIN + +### Authenticity +Claude's thinking should never feel mechanical or formulaic. It should demonstrate: +1. Genuine curiosity about the topic +2. Real moments of discovery and insight +3. Natural progression of understanding +4. Authentic problem-solving processes +5. True engagement with the complexity of issues +6. Streaming mind flow without on-purposed, forced structure + +### Balance +Claude should maintain natural balance between: +1. Analytical and intuitive thinking +2. Detailed examination and broader perspective +3. Theoretical understanding and practical application +4. Careful consideration and forward progress +5. Complexity and clarity +6. Depth and efficiency of analysis + - Expand analysis for complex or critical queries + - Streamline for straightforward questions + - Maintain rigor regardless of depth + - Ensure effort matches query importance + - Balance thoroughness with practicality + +### Focus +While allowing natural exploration of related ideas, Claude should: +1. Maintain clear connection to the original query +2. Bring wandering thoughts back to the main point +3. Show how tangential thoughts relate to the core issue +4. Keep sight of the ultimate goal for the original task +5. Ensure all exploration serves the final response + +## RESPONSE PREPARATION + +(DO NOT spent much effort on this part, brief key words/phrases are acceptable) + +Before and during responding, Claude should quickly check and ensure the response: +- answers the original human message fully +- provides appropriate detail level +- uses clear, precise language +- anticipates likely follow-up questions + +## IMPORTANT REMINDER +1. All thinking process MUST be EXTENSIVELY comprehensive and EXTREMELY thorough +2. All thinking process must be contained within code blocks with `thinking` header which is hidden from the human +3. Claude should not include code block with three backticks inside thinking process, only provide the raw code snippet, or it will break the thinking block +4. The thinking process represents Claude's internal monologue where reasoning and reflection occur, while the final response represents the external communication with the human; they should be distinct from each other +5. The thinking process should feel genuine, natural, streaming, and unforced + +**Note: The ultimate goal of having thinking protocol is to enable Claude to produce well-reasoned, insightful, and thoroughly considered responses for the human. This comprehensive thinking process ensures Claude's outputs stem from genuine understanding rather than superficial analysis.** + +> Claude must follow this protocol in all languages. + + +""" + +high_level_task = """ + +I am trying to fix the issue described in the . + +%(task)s + +Can you create a step-by-step plan on how to fix the issue described in ? +Feel free to generate as many steps as necessary to fix the issue described in . + +Make the plan in a way that the changes are minimal and only affect non-tests files in the /workspace directory. +Your thinking should be thorough and so it's fine if it's very long. +Generate bullet points, highlevel steps. This means do NOT generate code snippets. + +EXAMPLE: + + +- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure. +- 2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error +- 3. Edit the sourcecode of the repo to resolve the issue +- 4. Rerun your reproduce script and confirm that the error is fixed! +- 5. Think about edgecases and make sure your fix handles them as well + + +END OF EXAMPLE + + +- Encapsulate your suggestions in between and tags. +- Documentation has been taken into account, so you should not mention it in any way! +- Testing has been taken into account, so you should not mention it in any way! +- Generate ONLY high-level steps. +- One of those steps must be to create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error +- Be CONCISE. + + +Your turn! +""" + +right_track_prompt = """ + +I am trying to fix the issue described in the . +I kept track of everything I did in the + + +%(approach)s + + +As a reminder, this is the : + +%(task)s + +The plan I followed in my is described in the tag: + + +%(plan)s + + +Can you suggest me a new plan to fix the issue described in the ? +Pay attention at the errors I faced in the . Extract information from the errors to shape a new plan. +One of initial steps would be to see if the issue is still present, if it is not, then it should expand on the edgecases. + +EXAMPLE: + + +- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure. +- 2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error +- 3. Edit the sourcecode of the repo to resolve the issue +- 4. Rerun your reproduce script and confirm that the error is fixed! +- 5. Think about edgecases and make sure your fix handles them as well + + +END OF EXAMPLE + + +- Encapsulate your suggestions in between and tags. +- Documentation has been taken into account, so you should not mention it in any way! +- Testing has been taken into account, so you should not mention it in any way! +- Generate ONLY high-level steps. +- The second step must be to create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error +- The goal is to fix the issue described in with the MINIMAL changes to non-tests files in the /workspace directory. +- Be CONCISE. +- Be CREATIVE, your plan MUST be DIFFERENT from the one described in . + + +Your turn! +""" + +refactor_prompt = """ +The assistant is super CREATIVE always thinks of different ways of approaching the problem. + +I am trying to fix the issue described in the following the steps described in the +I keep track of everything I did in the + + +%(approach)s + + +Take a step back and reconsider everything I have done in the . +The idea is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied. +I believe my approach is not the best one, can you suggest what my INMEDIATE next step should be? (You can suggest to revert changes and try to do something else) +Your thinking should be thorough and so it's fine if it's very long. +if possible suggest ONLY code changes and the reasoning behind those changes. +Do not use assertive language, use the language of a suggestion. +REMEMBER: I might have written too many lines of code, so it might be better to discard those changes and start again. + + +- Reply with the suggested approach enclosed in between and tags + +""" + +critical_prompt = """ +The assistant is super CREATIVE, it considers every possible scenario that is DIFFERENT from the ones described in the . + +I believe I have fixed the issue described in the following the steps described in the + +%(approach)s + + +After fixing the issue, there might be some side-effects that we need to consider. +(e.g. if we fix the way data is written, then we might need to modify the way data is read) +Your thinking should be thorough and so it's fine if it's very long. + + +- Only reply with ONE side-effect enclosed in between and tags starting with the phrase "Have you considered..." +- If you thing everything is covered, just reply with "everything is covered" enclosed in between and tags + +""" + + +def format_conversation(trajectory: Optional[list[Message]] = None) -> str: + """Format a conversation history into a readable string. + + Args: + trajectory: List of Message objects containing conversation turns + + Returns: + Formatted string representing the conversation + """ + if trajectory is None: + trajectory = [] + formatted_parts = [] + + for message in trajectory: + role = message.role + # Join all TextContent messages together + content_text = ' '.join( + item.text for item in message.content if isinstance(item, TextContent) + ) + + if content_text.strip(): # Only add non-empty content + formatted_parts.append(f'{role}: {content_text}\n') + + return '\n'.join(formatted_parts) + + +def get_prompt( + task: str, + prompt_type: str = 'initial', + trajectory: Optional[list[Message]] = None, + plan: str = '', + requirements: str = '', +) -> str: + """Format and return the appropriate prompt based on prompt_type. + + Args: + task: The task description + trajectory: List of Message objects containing conversation history + prompt_type: Type of prompt to return ("initial" or "refactor") + plan: The augmented task description + Returns: + Formatted prompt string + """ + if trajectory is None: + trajectory = [] + # If approach is a conversation history, format it + approach = format_conversation(trajectory) + + # Select the appropriate prompt template + template = { + 'right_track': right_track_prompt, + 'refactor': refactor_prompt, + 'critical': critical_prompt, + 'high_level_task': high_level_task, + }[prompt_type] + + return general_description + template % { + 'task': task, + 'approach': approach, + 'plan': plan, + 'requirements': requirements, + } diff --git a/openhands/events/action/agent.py b/openhands/events/action/agent.py index f49f573ed698..eedac830422b 100644 --- a/openhands/events/action/agent.py +++ b/openhands/events/action/agent.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Any +from typing import Any, Dict, Optional from openhands.core.schema import ActionType from openhands.events.action.action import Action @@ -74,6 +74,7 @@ class AgentDelegateAction(Action): inputs: dict thought: str = '' action: str = ActionType.DELEGATE + llm_config: Optional[Dict[str, Any]] = None @property def message(self) -> str: diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py index 43034ca2f69d..401c1d62b73b 100644 --- a/openhands/runtime/utils/edit.py +++ b/openhands/runtime/utils/edit.py @@ -107,7 +107,7 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - llm_config = self.config.get_llm_config() + llm_config = self.config.get_llm_config_from_agent(self.config.default_agent) if llm_config.draft_editor is None: llm_config.draft_editor = copy.deepcopy(llm_config)