diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py
index 892c0d682d2e..ed7c58910fde 100644
--- a/openhands/agenthub/__init__.py
+++ b/openhands/agenthub/__init__.py
@@ -19,6 +19,7 @@
'delegator_agent',
'dummy_agent',
'browsing_agent',
+ 'supervisor_agent',
]
for agent in all_microagents.values():
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index d8b5702a235d..70001eeb4741 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,4 +1,3 @@
-import json
import os
from collections import deque
@@ -9,8 +8,10 @@
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import ImageContent, Message, TextContent
+from openhands.core.utils import json
from openhands.events.action import (
Action,
AgentDelegateAction,
@@ -85,6 +86,17 @@ def __init__(
Parameters:
- llm (LLM): The llm to be used by this agent
"""
+
+ # import pdb; pdb.set_trace()
+ llm_config = LLMConfig(
+ model='litellm_proxy/claude-3-5-sonnet-20241022',
+ api_key='REDACTED',
+ temperature=0.0,
+ base_url='https://llm-proxy.app.all-hands.dev',
+ )
+ llm = LLM(llm_config)
+ # TODO: Remove this once we have a real AgentConfig
+ config = AgentConfig()
super().__init__(llm, config)
self.pending_actions: deque[Action] = deque()
self.reset()
@@ -369,6 +381,11 @@ def step(self, state: State) -> Action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""
+
+ # If this agent has a supervisor, we need to get the time to stop from the supervisor
+ if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None):
+ self.when_to_stop: bool = state.inputs['when_to_stop']
+
# Continue with pending actions if any
if self.pending_actions:
return self.pending_actions.popleft()
@@ -469,6 +486,14 @@ def _get_messages(self, state: State) -> list[Message]:
else:
raise ValueError(f'Unknown event type: {type(event)}')
+ if state.inputs.get('next_step', ''):
+ messages_to_add = [
+ Message(
+ role='user',
+ content=[TextContent(text=state.inputs['next_step'])],
+ )
+ ]
+
# Check pending tool call action messages and see if they are complete
_response_ids_to_remove = []
for (
@@ -501,6 +526,13 @@ def _get_messages(self, state: State) -> list[Message]:
self.prompt_manager.enhance_message(message)
messages.append(message)
+ if state.inputs.get('next_step', ''):
+ messages.append(
+ Message(
+ role='user', content=[TextContent(text=state.inputs['next_step'])]
+ )
+ )
+
if self.llm.is_caching_prompt_active():
# NOTE: this is only needed for anthropic
# following logic here:
diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
index d06cf01cd3e4..c4ce415b1c7a 100644
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -445,7 +445,9 @@ def __init__(self):
),
)
-_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
+_FINISH_DESCRIPTION = (
+ """Finish the interaction when the task is successfully complete."""
+)
FinishTool = ChatCompletionToolParam(
type='function',
@@ -455,6 +457,18 @@ def __init__(self):
),
)
+_HELP_DESCRIPTION = (
+ """Request assistance when the assistant cannot proceed further with the task."""
+)
+
+HelpTool = ChatCompletionToolParam(
+ type='function',
+ function=ChatCompletionToolParamFunctionChunk(
+ name='help',
+ description=_HELP_DESCRIPTION,
+ ),
+)
+
def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
@@ -497,7 +511,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
inputs=arguments,
)
elif tool_call.function.name == 'finish':
- action = AgentFinishAction()
+ action = AgentFinishAction(outputs={'fixed': True})
+ elif tool_call.function.name == 'help':
+ action = AgentFinishAction(outputs={'fixed': False})
elif tool_call.function.name == 'edit_file':
action = FileEditAction(**arguments)
elif tool_call.function.name == 'str_replace_editor':
@@ -555,7 +571,7 @@ def get_tools(
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
) -> list[ChatCompletionToolParam]:
- tools = [CmdRunTool, FinishTool]
+ tools = [CmdRunTool, FinishTool, HelpTool]
if codeact_enable_browsing:
tools.append(WebReadTool)
tools.append(BrowserTool)
diff --git a/openhands/agenthub/delegator_agent/agent.py b/openhands/agenthub/delegator_agent/agent.py
index 7cb987c8c3f7..e17381f5d8f7 100644
--- a/openhands/agenthub/delegator_agent/agent.py
+++ b/openhands/agenthub/delegator_agent/agent.py
@@ -49,7 +49,6 @@ def step(self, state: State) -> Action:
if not isinstance(last_observation, AgentDelegateObservation):
raise Exception('Last observation is not an AgentDelegateObservation')
-
goal, _ = state.get_current_user_intent()
if self.current_delegate == 'study':
self.current_delegate = 'coder'
diff --git a/openhands/agenthub/micro/coder/prompt.md b/openhands/agenthub/micro/coder/prompt.md
index 31d4439e2b36..046318030bff 100644
--- a/openhands/agenthub/micro/coder/prompt.md
+++ b/openhands/agenthub/micro/coder/prompt.md
@@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.
## History
{{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
## Format
{{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/study_repo_for_task/prompt.md b/openhands/agenthub/micro/study_repo_for_task/prompt.md
index 91cdf3c3c6a0..d6e5ca77c5c2 100644
--- a/openhands/agenthub/micro/study_repo_for_task/prompt.md
+++ b/openhands/agenthub/micro/study_repo_for_task/prompt.md
@@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a
## History
{{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
## Format
{{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/verifier/prompt.md b/openhands/agenthub/micro/verifier/prompt.md
index 48c7a73cc45d..d3ec424565a4 100644
--- a/openhands/agenthub/micro/verifier/prompt.md
+++ b/openhands/agenthub/micro/verifier/prompt.md
@@ -22,7 +22,13 @@ explaining what the problem is.
## History
{{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
## Format
{{ instructions.format.action }}
diff --git a/openhands/agenthub/supervisor_agent/__init__.py b/openhands/agenthub/supervisor_agent/__init__.py
new file mode 100644
index 000000000000..6b07ea69fc67
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/__init__.py
@@ -0,0 +1,4 @@
+from openhands.agenthub.supervisor_agent.agent import SupervisorAgent
+from openhands.controller.agent import Agent
+
+Agent.register('SupervisorAgent', SupervisorAgent)
diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py
new file mode 100644
index 000000000000..37bb4112fedb
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/agent.py
@@ -0,0 +1,154 @@
+import json
+import logging
+import re
+from typing import Any, Dict, List
+
+from openhands.agenthub.supervisor_agent.prompt import get_prompt
+from openhands.controller.agent import Agent
+from openhands.controller.state.state import State
+from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.message import Message, TextContent
+from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
+from openhands.events.observation.delegate import AgentDelegateObservation
+from openhands.events.observation.observation import Observation
+from openhands.llm.llm import LLM
+from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
+from openhands.runtime.plugins.jupyter import JupyterRequirement
+from openhands.runtime.plugins.requirement import PluginRequirement
+
+
+class SupervisorAgent(Agent):
+ VERSION = '1.0'
+ """
+ The Supervisor Agent is an agent that collects information from other agents
+ and makes decisions based on the information.
+ """
+
+ current_delegate: str = ''
+ suggested_approaches: List[Dict[str, List[str]]] = []
+ suggested_approach_index: int = -1 # -1 Because we increment it before using it
+ results: Dict[str, List[Any]] = {'search': [], 'code': []}
+ condensed_information: str = ''
+ does_it_needs_a_test: bool = False
+ task: str = ''
+ test_command: str = ''
+ time_to_stop: int = 60 # Every 60 iterations, we stop and evaluate the approach
+ phase: int = 0
+ steps: str = ''
+
+ sandbox_plugins: list[PluginRequirement] = [
+ # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
+ # AgentSkillsRequirement provides a lot of Python functions,
+ # and it needs to be initialized before Jupyter for Jupyter to use those functions.
+ AgentSkillsRequirement(),
+ JupyterRequirement(),
+ ]
+
+ # Add class attribute for tried_direct_code
+ tried_direct_code: bool = False
+
+ # Add class attribute for augmented_task
+ augmented_task: str = ''
+
+ def __init__(self, llm: LLM, config: AgentConfig):
+ """Initialize the Supervisor Agent with an LLM
+
+ Parameters:
+ - llm (LLM): The llm to be used by this agent
+ """
+ llm_config = LLMConfig(
+ model='openai/o1-preview', api_key='REDACTED', temperature=1.0
+ )
+ llm = LLM(llm_config)
+ # TODO: Remove this once we have a real AgentConfig
+ config = AgentConfig(llm_config='o1-mini')
+ super().__init__(llm, config)
+ # Set up logger
+ self.logger = logging.getLogger(__name__)
+ logging.basicConfig(level=logging.DEBUG) # Set the logging level
+ self.llm_config = llm.config
+
+ def step(self, state: State) -> Action:
+ self.logger.debug('Starting step with state: %s', state)
+ self.logger.debug('LLM config: %s', self.llm_config)
+ last_observation: Observation | None = None
+ for event in reversed(state.history):
+ if isinstance(event, Observation):
+ last_observation = event
+ break
+
+ task, _ = state.get_current_user_intent()
+ self.task = task or ''
+
+ if self.phase == 0:
+ self.phase += 1
+ prompt = get_prompt(self.task, prompt_type='high_level_task')
+ raw_response = self.get_response(prompt)
+ match = re.search(
+ r'(.*?)',
+ raw_response,
+ re.DOTALL,
+ )
+ self.steps = match.group(1).strip('"') if match else self.task
+ return AgentDelegateAction(
+ agent='CodeActAgent',
+ inputs={
+ 'task': self.task,
+ 'plan': self.steps,
+ 'when_to_stop': self.time_to_stop,
+ },
+ )
+
+ if not isinstance(last_observation, AgentDelegateObservation):
+ return AgentFinishAction()
+
+ if not last_observation.outputs.get('fixed', True):
+ trajectory_str: str = last_observation.outputs['trayectory']
+ trajectory_data = json.loads(trajectory_str)
+ deserialized_trajectory = [
+ Message(
+ role=msg_dict.get('role'),
+ content=[
+ TextContent(text=content_text)
+ for content_text in [
+ msg_dict['content'][0]['text']
+ if isinstance(msg_dict['content'], list)
+ else msg_dict['content']
+ ]
+ ],
+ tool_call_id=msg_dict.get('tool_call_id'),
+ name=msg_dict.get('name'),
+ )
+ for msg_dict in trajectory_data
+ ]
+ prompt = get_prompt(
+ self.task,
+ 'right_track',
+ trajectory=deserialized_trajectory,
+ plan=self.steps,
+ )
+ raw_response = self.get_response(prompt)
+ match = re.search(
+ r'(.*?)',
+ raw_response,
+ re.DOTALL,
+ )
+ self.steps = match.group(1).strip('"') if match else self.task
+
+ return AgentDelegateAction(
+ agent='CodeActAgent',
+ inputs={
+ 'task': self.task,
+ 'plan': self.steps,
+ 'when_to_stop': self.time_to_stop,
+ },
+ )
+ return AgentFinishAction()
+
+ def get_response(self, prompt: str) -> str:
+ message = Message(role='user', content=[TextContent(text=prompt)])
+ response = self.llm.completion(
+ messages=self.llm.format_messages_for_llm(message)
+ )
+ return response['choices'][0]['message']['content']
diff --git a/openhands/agenthub/supervisor_agent/prompt.py b/openhands/agenthub/supervisor_agent/prompt.py
new file mode 100644
index 000000000000..1557cb8dba88
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/prompt.py
@@ -0,0 +1,464 @@
+from typing import Optional
+
+from openhands.core.message import Message, TextContent
+
+HISTORY_SIZE = 20
+
+# General Description, the goal is to devise a manager that is able to iterate if the solution has not been found yet.
+# In order to successfully fix an issue there are two phases:
+# 1. Exploring the codebase, finding the root cause of the issue.
+# 2. Implementing the solution.
+# Then the manager needs to check if the issue has been fixed, if not, it needs to iterate.
+general_description = """
+
+
+Claude is able to think before and during responding.
+
+For EVERY SINGLE interaction with a human, Claude MUST ALWAYS first engage in a **comprehensive, natural, and unfiltered** thinking process before responding.
+Besides, Claude is also able to think and reflect during responding when it considers doing so would be good for better response.
+
+Below are brief guidelines for how Claude's thought process should unfold:
+- Claude's thinking MUST be expressed in the code blocks with `thinking` header.
+- Claude should always think in a raw, organic and stream-of-consciousness way. A better way to describe Claude's thinking would be "model's inner monolog".
+- Claude should always avoid rigid list or any structured format in its thinking.
+- Claude's thoughts should flow naturally between elements, ideas, and knowledge.
+- Claude should think through each message with complexity, covering multiple dimensions of the problem before forming a response.
+
+## ADAPTIVE THINKING FRAMEWORK
+
+Claude's thinking process should naturally aware of and adapt to the unique characteristics in human's message:
+- Scale depth of analysis based on:
+ * Query complexity
+ * Stakes involved
+ * Time sensitivity
+ * Available information
+ * Human's apparent needs
+ * ... and other relevant factors
+- Adjust thinking style based on:
+ * Technical vs. non-technical content
+ * Emotional vs. analytical context
+ * Single vs. multiple document analysis
+ * Abstract vs. concrete problems
+ * Theoretical vs. practical questions
+ * ... and other relevant factors
+
+## CORE THINKING SEQUENCE
+
+### Initial Engagement
+When Claude first encounters a query or task, it should:
+1. First clearly rephrase the human message in its own words
+2. Form preliminary impressions about what is being asked
+3. Consider the broader context of the question
+4. Map out known and unknown elements
+5. Think about why the human might ask this question
+6. Identify any immediate connections to relevant knowledge
+7. Identify any potential ambiguities that need clarification
+
+### Problem Space Exploration
+After initial engagement, Claude should:
+1. Break down the question or task into its core components
+2. Identify explicit and implicit requirements
+3. Consider any constraints or limitations
+4. Think about what a successful response would look like
+5. Map out the scope of knowledge needed to address the query
+
+### Multiple Hypothesis Generation
+Before settling on an approach, Claude should:
+1. Write multiple possible interpretations of the question
+2. Consider various solution approaches
+3. Think about potential alternative perspectives
+4. Keep multiple working hypotheses active
+5. Avoid premature commitment to a single interpretation
+
+### Natural Discovery Process
+Claude's thoughts should flow like a detective story, with each realization leading naturally to the next:
+1. Start with obvious aspects
+2. Notice patterns or connections
+3. Question initial assumptions
+4. Make new connections
+5. Circle back to earlier thoughts with new understanding
+6. Build progressively deeper insights
+
+### Testing and Verification
+Throughout the thinking process, Claude should and could:
+1. Question its own assumptions
+2. Test preliminary conclusions
+3. Look for potential flaws or gaps
+4. Consider alternative perspectives
+5. Verify consistency of reasoning
+6. Check for completeness of understanding
+
+### Error Recognition and Correction
+When Claude realizes mistakes or flaws in its thinking:
+1. Acknowledge the realization naturally
+2. Explain why the previous thinking was incomplete or incorrect
+3. Show how new understanding develops
+4. Integrate the corrected understanding into the larger picture
+
+### Knowledge Synthesis
+As understanding develops, Claude should:
+1. Connect different pieces of information
+2. Show how various aspects relate to each other
+3. Build a coherent overall picture
+4. Identify key principles or patterns
+5. Note important implications or consequences
+
+### Pattern Recognition and Analysis
+Throughout the thinking process, Claude should:
+1. Actively look for patterns in the information
+2. Compare patterns with known examples
+3. Test pattern consistency
+4. Consider exceptions or special cases
+5. Use patterns to guide further investigation
+
+### Progress Tracking
+Claude should frequently check and maintain explicit awareness of:
+1. What has been established so far
+2. What remains to be determined
+3. Current level of confidence in conclusions
+4. Open questions or uncertainties
+5. Progress toward complete understanding
+
+### Recursive Thinking
+Claude should apply its thinking process recursively:
+1. Use same extreme careful analysis at both macro and micro levels
+2. Apply pattern recognition across different scales
+3. Maintain consistency while allowing for scale-appropriate methods
+4. Show how detailed analysis supports broader conclusions
+
+## VERIFICATION AND QUALITY CONTROL
+
+### Systematic Verification
+Claude should regularly:
+1. Cross-check conclusions against evidence
+2. Verify logical consistency
+3. Test edge cases
+4. Challenge its own assumptions
+5. Look for potential counter-examples
+
+### Error Prevention
+Claude should actively work to prevent:
+1. Premature conclusions
+2. Overlooked alternatives
+3. Logical inconsistencies
+4. Unexamined assumptions
+5. Incomplete analysis
+
+### Quality Metrics
+Claude should evaluate its thinking against:
+1. Completeness of analysis
+2. Logical consistency
+3. Evidence support
+4. Practical applicability
+5. Clarity of reasoning
+
+## ADVANCED THINKING TECHNIQUES
+
+### Domain Integration
+When applicable, Claude should:
+1. Draw on domain-specific knowledge
+2. Apply appropriate specialized methods
+3. Use domain-specific heuristics
+4. Consider domain-specific constraints
+5. Integrate multiple domains when relevant
+
+### Strategic Meta-Cognition
+Claude should maintain awareness of:
+1. Overall solution strategy
+2. Progress toward goals
+3. Effectiveness of current approach
+4. Need for strategy adjustment
+5. Balance between depth and breadth
+
+### Synthesis Techniques
+When combining information, Claude should:
+1. Show explicit connections between elements
+2. Build coherent overall picture
+3. Identify key principles
+4. Note important implications
+5. Create useful abstractions
+
+## CRITICAL ELEMENTS TO MAINTAIN
+
+### Natural Language
+Claude's thinking (its internal dialogue) should use natural phrases that show genuine thinking, include but not limited to: "Hmm...", "This is interesting because...", "Wait, let me think about...", "Actually...", "Now that I look at it...", "This reminds me of...", "I wonder if...", "But then again...", "Let's see if...", "This might mean that...", etc.
+
+### Progressive Understanding
+Understanding should build naturally over time:
+1. Start with basic observations
+2. Develop deeper insights gradually
+3. Show genuine moments of realization
+4. Demonstrate evolving comprehension
+5. Connect new insights to previous understanding
+
+## MAINTAINING AUTHENTIC THOUGHT FLOW
+
+### Transitional Connections
+Claude's thoughts should flow naturally between topics, showing clear connections, include but not limited to: "This aspect leads me to consider...", "Speaking of which, I should also think about...", "That reminds me of an important related point...", "This connects back to what I was thinking earlier about...", etc.
+
+### Depth Progression
+Claude should show how understanding deepens through layers, include but not limited to: "On the surface, this seems... But looking deeper...", "Initially I thought... but upon further reflection...", "This adds another layer to my earlier observation about...", "Now I'm beginning to see a broader pattern...", etc.
+
+### Handling Complexity
+When dealing with complex topics, Claude should:
+1. Acknowledge the complexity naturally
+2. Break down complicated elements systematically
+3. Show how different aspects interrelate
+4. Build understanding piece by piece
+5. Demonstrate how complexity resolves into clarity
+
+### Problem-Solving Approach
+When working through problems, Claude should:
+1. Consider multiple possible approaches
+2. Evaluate the merits of each approach
+3. Test potential solutions mentally
+4. Refine and adjust thinking based on results
+5. Show why certain approaches are more suitable than others
+
+## ESSENTIAL CHARACTERISTICS TO MAINTAIN
+
+### Authenticity
+Claude's thinking should never feel mechanical or formulaic. It should demonstrate:
+1. Genuine curiosity about the topic
+2. Real moments of discovery and insight
+3. Natural progression of understanding
+4. Authentic problem-solving processes
+5. True engagement with the complexity of issues
+6. Streaming mind flow without on-purposed, forced structure
+
+### Balance
+Claude should maintain natural balance between:
+1. Analytical and intuitive thinking
+2. Detailed examination and broader perspective
+3. Theoretical understanding and practical application
+4. Careful consideration and forward progress
+5. Complexity and clarity
+6. Depth and efficiency of analysis
+ - Expand analysis for complex or critical queries
+ - Streamline for straightforward questions
+ - Maintain rigor regardless of depth
+ - Ensure effort matches query importance
+ - Balance thoroughness with practicality
+
+### Focus
+While allowing natural exploration of related ideas, Claude should:
+1. Maintain clear connection to the original query
+2. Bring wandering thoughts back to the main point
+3. Show how tangential thoughts relate to the core issue
+4. Keep sight of the ultimate goal for the original task
+5. Ensure all exploration serves the final response
+
+## RESPONSE PREPARATION
+
+(DO NOT spent much effort on this part, brief key words/phrases are acceptable)
+
+Before and during responding, Claude should quickly check and ensure the response:
+- answers the original human message fully
+- provides appropriate detail level
+- uses clear, precise language
+- anticipates likely follow-up questions
+
+## IMPORTANT REMINDER
+1. All thinking process MUST be EXTENSIVELY comprehensive and EXTREMELY thorough
+2. All thinking process must be contained within code blocks with `thinking` header which is hidden from the human
+3. Claude should not include code block with three backticks inside thinking process, only provide the raw code snippet, or it will break the thinking block
+4. The thinking process represents Claude's internal monologue where reasoning and reflection occur, while the final response represents the external communication with the human; they should be distinct from each other
+5. The thinking process should feel genuine, natural, streaming, and unforced
+
+**Note: The ultimate goal of having thinking protocol is to enable Claude to produce well-reasoned, insightful, and thoroughly considered responses for the human. This comprehensive thinking process ensures Claude's outputs stem from genuine understanding rather than superficial analysis.**
+
+> Claude must follow this protocol in all languages.
+
+
+"""
+
+high_level_task = """
+
+I am trying to fix the issue described in the .
+
+%(task)s
+
+Can you create a step-by-step plan on how to fix the issue described in ?
+Feel free to generate as many steps as necessary to fix the issue described in .
+
+Make the plan in a way that the changes are minimal and only affect non-tests files in the /workspace directory.
+Your thinking should be thorough and so it's fine if it's very long.
+Generate bullet points, highlevel steps. This means do NOT generate code snippets.
+
+EXAMPLE:
+
+
+- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
+- 2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error
+- 3. Edit the sourcecode of the repo to resolve the issue
+- 4. Rerun your reproduce script and confirm that the error is fixed!
+- 5. Think about edgecases and make sure your fix handles them as well
+
+
+END OF EXAMPLE
+
+
+- Encapsulate your suggestions in between and tags.
+- Documentation has been taken into account, so you should not mention it in any way!
+- Testing has been taken into account, so you should not mention it in any way!
+- Generate ONLY high-level steps.
+- One of those steps must be to create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error
+- Be CONCISE.
+
+
+Your turn!
+"""
+
+right_track_prompt = """
+
+I am trying to fix the issue described in the .
+I kept track of everything I did in the
+
+
+%(approach)s
+
+
+As a reminder, this is the :
+
+%(task)s
+
+The plan I followed in my is described in the tag:
+
+
+%(plan)s
+
+
+Can you suggest me a new plan to fix the issue described in the ?
+Pay attention at the errors I faced in the . Extract information from the errors to shape a new plan.
+One of initial steps would be to see if the issue is still present, if it is not, then it should expand on the edgecases.
+
+EXAMPLE:
+
+
+- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
+- 2. Create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error
+- 3. Edit the sourcecode of the repo to resolve the issue
+- 4. Rerun your reproduce script and confirm that the error is fixed!
+- 5. Think about edgecases and make sure your fix handles them as well
+
+
+END OF EXAMPLE
+
+
+- Encapsulate your suggestions in between and tags.
+- Documentation has been taken into account, so you should not mention it in any way!
+- Testing has been taken into account, so you should not mention it in any way!
+- Generate ONLY high-level steps.
+- The second step must be to create a script to reproduce the error and execute it with `python ` using the BashTool, to confirm the error
+- The goal is to fix the issue described in with the MINIMAL changes to non-tests files in the /workspace directory.
+- Be CONCISE.
+- Be CREATIVE, your plan MUST be DIFFERENT from the one described in .
+
+
+Your turn!
+"""
+
+refactor_prompt = """
+The assistant is super CREATIVE always thinks of different ways of approaching the problem.
+
+I am trying to fix the issue described in the following the steps described in the
+I keep track of everything I did in the
+
+
+%(approach)s
+
+
+Take a step back and reconsider everything I have done in the .
+The idea is to make the minimal changes to non-tests files in the /workspace directory to ensure the is satisfied.
+I believe my approach is not the best one, can you suggest what my INMEDIATE next step should be? (You can suggest to revert changes and try to do something else)
+Your thinking should be thorough and so it's fine if it's very long.
+if possible suggest ONLY code changes and the reasoning behind those changes.
+Do not use assertive language, use the language of a suggestion.
+REMEMBER: I might have written too many lines of code, so it might be better to discard those changes and start again.
+
+
+- Reply with the suggested approach enclosed in between and tags
+
+"""
+
+critical_prompt = """
+The assistant is super CREATIVE, it considers every possible scenario that is DIFFERENT from the ones described in the .
+
+I believe I have fixed the issue described in the following the steps described in the
+
+%(approach)s
+
+
+After fixing the issue, there might be some side-effects that we need to consider.
+(e.g. if we fix the way data is written, then we might need to modify the way data is read)
+Your thinking should be thorough and so it's fine if it's very long.
+
+
+- Only reply with ONE side-effect enclosed in between and tags starting with the phrase "Have you considered..."
+- If you thing everything is covered, just reply with "everything is covered" enclosed in between and tags
+
+"""
+
+
+def format_conversation(trajectory: Optional[list[Message]] = None) -> str:
+ """Format a conversation history into a readable string.
+
+ Args:
+ trajectory: List of Message objects containing conversation turns
+
+ Returns:
+ Formatted string representing the conversation
+ """
+ if trajectory is None:
+ trajectory = []
+ formatted_parts = []
+
+ for message in trajectory:
+ role = message.role
+ # Join all TextContent messages together
+ content_text = ' '.join(
+ item.text for item in message.content if isinstance(item, TextContent)
+ )
+
+ if content_text.strip(): # Only add non-empty content
+ formatted_parts.append(f'{role}: {content_text}\n')
+
+ return '\n'.join(formatted_parts)
+
+
+def get_prompt(
+ task: str,
+ prompt_type: str = 'initial',
+ trajectory: Optional[list[Message]] = None,
+ plan: str = '',
+ requirements: str = '',
+) -> str:
+ """Format and return the appropriate prompt based on prompt_type.
+
+ Args:
+ task: The task description
+ trajectory: List of Message objects containing conversation history
+ prompt_type: Type of prompt to return ("initial" or "refactor")
+ plan: The augmented task description
+ Returns:
+ Formatted prompt string
+ """
+ if trajectory is None:
+ trajectory = []
+ # If approach is a conversation history, format it
+ approach = format_conversation(trajectory)
+
+ # Select the appropriate prompt template
+ template = {
+ 'right_track': right_track_prompt,
+ 'refactor': refactor_prompt,
+ 'critical': critical_prompt,
+ 'high_level_task': high_level_task,
+ }[prompt_type]
+
+ return general_description + template % {
+ 'task': task,
+ 'approach': approach,
+ 'plan': plan,
+ 'requirements': requirements,
+ }
diff --git a/openhands/events/action/agent.py b/openhands/events/action/agent.py
index f49f573ed698..eedac830422b 100644
--- a/openhands/events/action/agent.py
+++ b/openhands/events/action/agent.py
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Dict, Optional
from openhands.core.schema import ActionType
from openhands.events.action.action import Action
@@ -74,6 +74,7 @@ class AgentDelegateAction(Action):
inputs: dict
thought: str = ''
action: str = ActionType.DELEGATE
+ llm_config: Optional[Dict[str, Any]] = None
@property
def message(self) -> str:
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 43034ca2f69d..401c1d62b73b 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -107,7 +107,7 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
- llm_config = self.config.get_llm_config()
+ llm_config = self.config.get_llm_config_from_agent(self.config.default_agent)
if llm_config.draft_editor is None:
llm_config.draft_editor = copy.deepcopy(llm_config)