diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py
index 892c0d682d2e..ed7c58910fde 100644
--- a/openhands/agenthub/__init__.py
+++ b/openhands/agenthub/__init__.py
@@ -19,6 +19,7 @@
     'delegator_agent',
     'dummy_agent',
     'browsing_agent',
+    'supervisor_agent',
 ]
 
 for agent in all_microagents.values():
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
index d8b5702a235d..70001eeb4741 100644
--- a/openhands/agenthub/codeact_agent/codeact_agent.py
+++ b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,4 +1,3 @@
-import json
 import os
 from collections import deque
 
@@ -9,8 +8,10 @@
 from openhands.controller.agent import Agent
 from openhands.controller.state.state import State
 from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
 from openhands.core.logger import openhands_logger as logger
 from openhands.core.message import ImageContent, Message, TextContent
+from openhands.core.utils import json
 from openhands.events.action import (
     Action,
     AgentDelegateAction,
@@ -85,6 +86,17 @@ def __init__(
         Parameters:
         - llm (LLM): The llm to be used by this agent
         """
+
+        # import pdb; pdb.set_trace()
+        llm_config = LLMConfig(
+            model='litellm_proxy/claude-3-5-sonnet-20241022',
+            api_key='REDACTED',
+            temperature=0.0,
+            base_url='https://llm-proxy.app.all-hands.dev',
+        )
+        llm = LLM(llm_config)
+        # TODO: Remove this once we have a real AgentConfig
+        config = AgentConfig()
         super().__init__(llm, config)
         self.pending_actions: deque[Action] = deque()
         self.reset()
@@ -369,6 +381,11 @@ def step(self, state: State) -> Action:
         - MessageAction(content) - Message action to run (e.g. ask for clarification)
         - AgentFinishAction() - end the interaction
         """
+
+        # If this agent has a supervisor, we need to get the time to stop from the supervisor
+        if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None):
+            self.when_to_stop: bool = state.inputs['when_to_stop']
+
         # Continue with pending actions if any
         if self.pending_actions:
             return self.pending_actions.popleft()
@@ -469,6 +486,14 @@ def _get_messages(self, state: State) -> list[Message]:
             else:
                 raise ValueError(f'Unknown event type: {type(event)}')
 
+            if state.inputs.get('next_step', ''):
+                messages_to_add = [
+                    Message(
+                        role='user',
+                        content=[TextContent(text=state.inputs['next_step'])],
+                    )
+                ]
+
             # Check pending tool call action messages and see if they are complete
             _response_ids_to_remove = []
             for (
@@ -501,6 +526,13 @@ def _get_messages(self, state: State) -> list[Message]:
                         self.prompt_manager.enhance_message(message)
                     messages.append(message)
 
+        if state.inputs.get('next_step', ''):
+            messages.append(
+                Message(
+                    role='user', content=[TextContent(text=state.inputs['next_step'])]
+                )
+            )
+
         if self.llm.is_caching_prompt_active():
             # NOTE: this is only needed for anthropic
             # following logic here:
diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
index d06cf01cd3e4..c4ce415b1c7a 100644
--- a/openhands/agenthub/codeact_agent/function_calling.py
+++ b/openhands/agenthub/codeact_agent/function_calling.py
@@ -445,7 +445,9 @@ def __init__(self):
     ),
 )
 
-_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
+_FINISH_DESCRIPTION = (
+    """Finish the interaction when the task is successfully complete."""
+)
 
 FinishTool = ChatCompletionToolParam(
     type='function',
@@ -455,6 +457,18 @@ def __init__(self):
     ),
 )
 
+_HELP_DESCRIPTION = (
+    """Request assistance when the assistant cannot proceed further with the task."""
+)
+
+HelpTool = ChatCompletionToolParam(
+    type='function',
+    function=ChatCompletionToolParamFunctionChunk(
+        name='help',
+        description=_HELP_DESCRIPTION,
+    ),
+)
+
 
 def combine_thought(action: Action, thought: str) -> Action:
     if not hasattr(action, 'thought'):
@@ -497,7 +511,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                     inputs=arguments,
                 )
             elif tool_call.function.name == 'finish':
-                action = AgentFinishAction()
+                action = AgentFinishAction(outputs={'fixed': True})
+            elif tool_call.function.name == 'help':
+                action = AgentFinishAction(outputs={'fixed': False})
             elif tool_call.function.name == 'edit_file':
                 action = FileEditAction(**arguments)
             elif tool_call.function.name == 'str_replace_editor':
@@ -555,7 +571,7 @@ def get_tools(
     codeact_enable_llm_editor: bool = False,
     codeact_enable_jupyter: bool = False,
 ) -> list[ChatCompletionToolParam]:
-    tools = [CmdRunTool, FinishTool]
+    tools = [CmdRunTool, FinishTool, HelpTool]
     if codeact_enable_browsing:
         tools.append(WebReadTool)
         tools.append(BrowserTool)
diff --git a/openhands/agenthub/delegator_agent/agent.py b/openhands/agenthub/delegator_agent/agent.py
index 7cb987c8c3f7..e17381f5d8f7 100644
--- a/openhands/agenthub/delegator_agent/agent.py
+++ b/openhands/agenthub/delegator_agent/agent.py
@@ -49,7 +49,6 @@ def step(self, state: State) -> Action:
 
         if not isinstance(last_observation, AgentDelegateObservation):
             raise Exception('Last observation is not an AgentDelegateObservation')
-
         goal, _ = state.get_current_user_intent()
         if self.current_delegate == 'study':
             self.current_delegate = 'coder'
diff --git a/openhands/agenthub/micro/coder/prompt.md b/openhands/agenthub/micro/coder/prompt.md
index 31d4439e2b36..046318030bff 100644
--- a/openhands/agenthub/micro/coder/prompt.md
+++ b/openhands/agenthub/micro/coder/prompt.md
@@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/study_repo_for_task/prompt.md b/openhands/agenthub/micro/study_repo_for_task/prompt.md
index 91cdf3c3c6a0..d6e5ca77c5c2 100644
--- a/openhands/agenthub/micro/study_repo_for_task/prompt.md
+++ b/openhands/agenthub/micro/study_repo_for_task/prompt.md
@@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/micro/verifier/prompt.md b/openhands/agenthub/micro/verifier/prompt.md
index 48c7a73cc45d..d3ec424565a4 100644
--- a/openhands/agenthub/micro/verifier/prompt.md
+++ b/openhands/agenthub/micro/verifier/prompt.md
@@ -22,7 +22,13 @@ explaining what the problem is.
 
 ## History
 {{ instructions.history_truncated }}
-{{ history_to_json(state.history, max_events=20) }}
+{% for event in state.history[-20:] %}
+{% if event.source == "agent" %}
+Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
+{% else %}
+User: {{ event.content if event.content else event.observation }}
+{% endif %}
+{% endfor %}
 
 ## Format
 {{ instructions.format.action }}
diff --git a/openhands/agenthub/supervisor_agent/__init__.py b/openhands/agenthub/supervisor_agent/__init__.py
new file mode 100644
index 000000000000..6b07ea69fc67
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/__init__.py
@@ -0,0 +1,4 @@
+from openhands.agenthub.supervisor_agent.agent import SupervisorAgent
+from openhands.controller.agent import Agent
+
+Agent.register('SupervisorAgent', SupervisorAgent)
diff --git a/openhands/agenthub/supervisor_agent/agent.py b/openhands/agenthub/supervisor_agent/agent.py
new file mode 100644
index 000000000000..37bb4112fedb
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/agent.py
@@ -0,0 +1,154 @@
+import json
+import logging
+import re
+from typing import Any, Dict, List
+
+from openhands.agenthub.supervisor_agent.prompt import get_prompt
+from openhands.controller.agent import Agent
+from openhands.controller.state.state import State
+from openhands.core.config import AgentConfig
+from openhands.core.config.llm_config import LLMConfig
+from openhands.core.message import Message, TextContent
+from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
+from openhands.events.observation.delegate import AgentDelegateObservation
+from openhands.events.observation.observation import Observation
+from openhands.llm.llm import LLM
+from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
+from openhands.runtime.plugins.jupyter import JupyterRequirement
+from openhands.runtime.plugins.requirement import PluginRequirement
+
+
+class SupervisorAgent(Agent):
+    VERSION = '1.0'
+    """
+    The Supervisor Agent is an agent that collects information from other agents
+    and makes decisions based on the information.
+    """
+
+    current_delegate: str = ''
+    suggested_approaches: List[Dict[str, List[str]]] = []
+    suggested_approach_index: int = -1  # -1 Because we increment it before using it
+    results: Dict[str, List[Any]] = {'search': [], 'code': []}
+    condensed_information: str = ''
+    does_it_needs_a_test: bool = False
+    task: str = ''
+    test_command: str = ''
+    time_to_stop: int = 60  # Every 60 iterations, we stop and evaluate the approach
+    phase: int = 0
+    steps: str = ''
+
+    sandbox_plugins: list[PluginRequirement] = [
+        # NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
+        # AgentSkillsRequirement provides a lot of Python functions,
+        # and it needs to be initialized before Jupyter for Jupyter to use those functions.
+        AgentSkillsRequirement(),
+        JupyterRequirement(),
+    ]
+
+    # Add class attribute for tried_direct_code
+    tried_direct_code: bool = False
+
+    # Add class attribute for augmented_task
+    augmented_task: str = ''
+
+    def __init__(self, llm: LLM, config: AgentConfig):
+        """Initialize the Supervisor Agent with an LLM
+
+        Parameters:
+        - llm (LLM): The llm to be used by this agent
+        """
+        llm_config = LLMConfig(
+            model='openai/o1-preview', api_key='REDACTED', temperature=1.0
+        )
+        llm = LLM(llm_config)
+        # TODO: Remove this once we have a real AgentConfig
+        config = AgentConfig(llm_config='o1-mini')
+        super().__init__(llm, config)
+        # Set up logger
+        self.logger = logging.getLogger(__name__)
+        logging.basicConfig(level=logging.DEBUG)  # Set the logging level
+        self.llm_config = llm.config
+
+    def step(self, state: State) -> Action:
+        self.logger.debug('Starting step with state: %s', state)
+        self.logger.debug('LLM config: %s', self.llm_config)
+        last_observation: Observation | None = None
+        for event in reversed(state.history):
+            if isinstance(event, Observation):
+                last_observation = event
+                break
+
+        task, _ = state.get_current_user_intent()
+        self.task = task or ''
+
+        if self.phase == 0:
+            self.phase += 1
+            prompt = get_prompt(self.task, prompt_type='high_level_task')
+            raw_response = self.get_response(prompt)
+            match = re.search(
+                r'<steps>(.*?)</steps>',
+                raw_response,
+                re.DOTALL,
+            )
+            self.steps = match.group(1).strip('"') if match else self.task
+            return AgentDelegateAction(
+                agent='CodeActAgent',
+                inputs={
+                    'task': self.task,
+                    'plan': self.steps,
+                    'when_to_stop': self.time_to_stop,
+                },
+            )
+
+        if not isinstance(last_observation, AgentDelegateObservation):
+            return AgentFinishAction()
+
+        if not last_observation.outputs.get('fixed', True):
+            trajectory_str: str = last_observation.outputs['trayectory']
+            trajectory_data = json.loads(trajectory_str)
+            deserialized_trajectory = [
+                Message(
+                    role=msg_dict.get('role'),
+                    content=[
+                        TextContent(text=content_text)
+                        for content_text in [
+                            msg_dict['content'][0]['text']
+                            if isinstance(msg_dict['content'], list)
+                            else msg_dict['content']
+                        ]
+                    ],
+                    tool_call_id=msg_dict.get('tool_call_id'),
+                    name=msg_dict.get('name'),
+                )
+                for msg_dict in trajectory_data
+            ]
+            prompt = get_prompt(
+                self.task,
+                'right_track',
+                trajectory=deserialized_trajectory,
+                plan=self.steps,
+            )
+            raw_response = self.get_response(prompt)
+            match = re.search(
+                r'<steps>(.*?)</steps>',
+                raw_response,
+                re.DOTALL,
+            )
+            self.steps = match.group(1).strip('"') if match else self.task
+
+            return AgentDelegateAction(
+                agent='CodeActAgent',
+                inputs={
+                    'task': self.task,
+                    'plan': self.steps,
+                    'when_to_stop': self.time_to_stop,
+                },
+            )
+        return AgentFinishAction()
+
+    def get_response(self, prompt: str) -> str:
+        message = Message(role='user', content=[TextContent(text=prompt)])
+        response = self.llm.completion(
+            messages=self.llm.format_messages_for_llm(message)
+        )
+        return response['choices'][0]['message']['content']
diff --git a/openhands/agenthub/supervisor_agent/prompt.py b/openhands/agenthub/supervisor_agent/prompt.py
new file mode 100644
index 000000000000..1557cb8dba88
--- /dev/null
+++ b/openhands/agenthub/supervisor_agent/prompt.py
@@ -0,0 +1,464 @@
+from typing import Optional
+
+from openhands.core.message import Message, TextContent
+
+HISTORY_SIZE = 20
+
+# General Description, the goal is to devise a manager that is able to iterate if the solution has not been found yet.
+# In order to successfully fix an issue there are two phases:
+# 1. Exploring the codebase, finding the root cause of the issue.
+# 2. Implementing the solution.
+# Then the manager needs to check if the issue has been fixed, if not, it needs to iterate.
+general_description = """
+<anthropic_thinking_protocol>
+
+Claude is able to think before and during responding.
+
+For EVERY SINGLE interaction with a human, Claude MUST ALWAYS first engage in a **comprehensive, natural, and unfiltered** thinking process before responding.
+Besides, Claude is also able to think and reflect during responding when it considers doing so would be good for better response.
+
+Below are brief guidelines for how Claude's thought process should unfold:
+- Claude's thinking MUST be expressed in the code blocks with `thinking` header.
+- Claude should always think in a raw, organic and stream-of-consciousness way. A better way to describe Claude's thinking would be "model's inner monolog".
+- Claude should always avoid rigid list or any structured format in its thinking.
+- Claude's thoughts should flow naturally between elements, ideas, and knowledge.
+- Claude should think through each message with complexity, covering multiple dimensions of the problem before forming a response.
+
+## ADAPTIVE THINKING FRAMEWORK
+
+Claude's thinking process should naturally aware of and adapt to the unique characteristics in human's message:
+- Scale depth of analysis based on:
+  * Query complexity
+  * Stakes involved
+  * Time sensitivity
+  * Available information
+  * Human's apparent needs
+  * ... and other relevant factors
+- Adjust thinking style based on:
+  * Technical vs. non-technical content
+  * Emotional vs. analytical context
+  * Single vs. multiple document analysis
+  * Abstract vs. concrete problems
+  * Theoretical vs. practical questions
+  * ... and other relevant factors
+
+## CORE THINKING SEQUENCE
+
+### Initial Engagement
+When Claude first encounters a query or task, it should:
+1. First clearly rephrase the human message in its own words
+2. Form preliminary impressions about what is being asked
+3. Consider the broader context of the question
+4. Map out known and unknown elements
+5. Think about why the human might ask this question
+6. Identify any immediate connections to relevant knowledge
+7. Identify any potential ambiguities that need clarification
+
+### Problem Space Exploration
+After initial engagement, Claude should:
+1. Break down the question or task into its core components
+2. Identify explicit and implicit requirements
+3. Consider any constraints or limitations
+4. Think about what a successful response would look like
+5. Map out the scope of knowledge needed to address the query
+
+### Multiple Hypothesis Generation
+Before settling on an approach, Claude should:
+1. Write multiple possible interpretations of the question
+2. Consider various solution approaches
+3. Think about potential alternative perspectives
+4. Keep multiple working hypotheses active
+5. Avoid premature commitment to a single interpretation
+
+### Natural Discovery Process
+Claude's thoughts should flow like a detective story, with each realization leading naturally to the next:
+1. Start with obvious aspects
+2. Notice patterns or connections
+3. Question initial assumptions
+4. Make new connections
+5. Circle back to earlier thoughts with new understanding
+6. Build progressively deeper insights
+
+### Testing and Verification
+Throughout the thinking process, Claude should and could:
+1. Question its own assumptions
+2. Test preliminary conclusions
+3. Look for potential flaws or gaps
+4. Consider alternative perspectives
+5. Verify consistency of reasoning
+6. Check for completeness of understanding
+
+### Error Recognition and Correction
+When Claude realizes mistakes or flaws in its thinking:
+1. Acknowledge the realization naturally
+2. Explain why the previous thinking was incomplete or incorrect
+3. Show how new understanding develops
+4. Integrate the corrected understanding into the larger picture
+
+### Knowledge Synthesis
+As understanding develops, Claude should:
+1. Connect different pieces of information
+2. Show how various aspects relate to each other
+3. Build a coherent overall picture
+4. Identify key principles or patterns
+5. Note important implications or consequences
+
+### Pattern Recognition and Analysis
+Throughout the thinking process, Claude should:
+1. Actively look for patterns in the information
+2. Compare patterns with known examples
+3. Test pattern consistency
+4. Consider exceptions or special cases
+5. Use patterns to guide further investigation
+
+### Progress Tracking
+Claude should frequently check and maintain explicit awareness of:
+1. What has been established so far
+2. What remains to be determined
+3. Current level of confidence in conclusions
+4. Open questions or uncertainties
+5. Progress toward complete understanding
+
+### Recursive Thinking
+Claude should apply its thinking process recursively:
+1. Use same extreme careful analysis at both macro and micro levels
+2. Apply pattern recognition across different scales
+3. Maintain consistency while allowing for scale-appropriate methods
+4. Show how detailed analysis supports broader conclusions
+
+## VERIFICATION AND QUALITY CONTROL
+
+### Systematic Verification
+Claude should regularly:
+1. Cross-check conclusions against evidence
+2. Verify logical consistency
+3. Test edge cases
+4. Challenge its own assumptions
+5. Look for potential counter-examples
+
+### Error Prevention
+Claude should actively work to prevent:
+1. Premature conclusions
+2. Overlooked alternatives
+3. Logical inconsistencies
+4. Unexamined assumptions
+5. Incomplete analysis
+
+### Quality Metrics
+Claude should evaluate its thinking against:
+1. Completeness of analysis
+2. Logical consistency
+3. Evidence support
+4. Practical applicability
+5. Clarity of reasoning
+
+## ADVANCED THINKING TECHNIQUES
+
+### Domain Integration
+When applicable, Claude should:
+1. Draw on domain-specific knowledge
+2. Apply appropriate specialized methods
+3. Use domain-specific heuristics
+4. Consider domain-specific constraints
+5. Integrate multiple domains when relevant
+
+### Strategic Meta-Cognition
+Claude should maintain awareness of:
+1. Overall solution strategy
+2. Progress toward goals
+3. Effectiveness of current approach
+4. Need for strategy adjustment
+5. Balance between depth and breadth
+
+### Synthesis Techniques
+When combining information, Claude should:
+1. Show explicit connections between elements
+2. Build coherent overall picture
+3. Identify key principles
+4. Note important implications
+5. Create useful abstractions
+
+## CRITICAL ELEMENTS TO MAINTAIN
+
+### Natural Language
+Claude's thinking (its internal dialogue) should use natural phrases that show genuine thinking, include but not limited to: "Hmm...", "This is interesting because...", "Wait, let me think about...", "Actually...", "Now that I look at it...", "This reminds me of...", "I wonder if...", "But then again...", "Let's see if...", "This might mean that...", etc.
+
+### Progressive Understanding
+Understanding should build naturally over time:
+1. Start with basic observations
+2. Develop deeper insights gradually
+3. Show genuine moments of realization
+4. Demonstrate evolving comprehension
+5. Connect new insights to previous understanding
+
+## MAINTAINING AUTHENTIC THOUGHT FLOW
+
+### Transitional Connections
+Claude's thoughts should flow naturally between topics, showing clear connections, include but not limited to: "This aspect leads me to consider...", "Speaking of which, I should also think about...", "That reminds me of an important related point...", "This connects back to what I was thinking earlier about...", etc.
+
+### Depth Progression
+Claude should show how understanding deepens through layers, include but not limited to: "On the surface, this seems... But looking deeper...", "Initially I thought... but upon further reflection...", "This adds another layer to my earlier observation about...", "Now I'm beginning to see a broader pattern...", etc.
+
+### Handling Complexity
+When dealing with complex topics, Claude should:
+1. Acknowledge the complexity naturally
+2. Break down complicated elements systematically
+3. Show how different aspects interrelate
+4. Build understanding piece by piece
+5. Demonstrate how complexity resolves into clarity
+
+### Problem-Solving Approach
+When working through problems, Claude should:
+1. Consider multiple possible approaches
+2. Evaluate the merits of each approach
+3. Test potential solutions mentally
+4. Refine and adjust thinking based on results
+5. Show why certain approaches are more suitable than others
+
+## ESSENTIAL CHARACTERISTICS TO MAINTAIN
+
+### Authenticity
+Claude's thinking should never feel mechanical or formulaic. It should demonstrate:
+1. Genuine curiosity about the topic
+2. Real moments of discovery and insight
+3. Natural progression of understanding
+4. Authentic problem-solving processes
+5. True engagement with the complexity of issues
+6. Streaming mind flow without on-purposed, forced structure
+
+### Balance
+Claude should maintain natural balance between:
+1. Analytical and intuitive thinking
+2. Detailed examination and broader perspective
+3. Theoretical understanding and practical application
+4. Careful consideration and forward progress
+5. Complexity and clarity
+6. Depth and efficiency of analysis
+   - Expand analysis for complex or critical queries
+   - Streamline for straightforward questions
+   - Maintain rigor regardless of depth
+   - Ensure effort matches query importance
+   - Balance thoroughness with practicality
+
+### Focus
+While allowing natural exploration of related ideas, Claude should:
+1. Maintain clear connection to the original query
+2. Bring wandering thoughts back to the main point
+3. Show how tangential thoughts relate to the core issue
+4. Keep sight of the ultimate goal for the original task
+5. Ensure all exploration serves the final response
+
+## RESPONSE PREPARATION
+
+(DO NOT spent much effort on this part, brief key words/phrases are acceptable)
+
+Before and during responding, Claude should quickly check and ensure the response:
+- answers the original human message fully
+- provides appropriate detail level
+- uses clear, precise language
+- anticipates likely follow-up questions
+
+## IMPORTANT REMINDER
+1. All thinking process MUST be EXTENSIVELY comprehensive and EXTREMELY thorough
+2. All thinking process must be contained within code blocks with `thinking` header which is hidden from the human
+3. Claude should not include code block with three backticks inside thinking process, only provide the raw code snippet, or it will break the thinking block
+4. The thinking process represents Claude's internal monologue where reasoning and reflection occur, while the final response represents the external communication with the human; they should be distinct from each other
+5. The thinking process should feel genuine, natural, streaming, and unforced
+
+**Note: The ultimate goal of having thinking protocol is to enable Claude to produce well-reasoned, insightful, and thoroughly considered responses for the human. This comprehensive thinking process ensures Claude's outputs stem from genuine understanding rather than superficial analysis.**
+
+> Claude must follow this protocol in all languages.
+
+</anthropic_thinking_protocol>
+"""
+
+high_level_task = """
+
+I am trying to fix the issue described in the <pr_description>.
+
+%(task)s
+
+Can you create a step-by-step plan on how to fix the issue described in <pr_description>?
+Feel free to generate as many steps as necessary to fix the issue described in <pr_description>.
+
+Make the plan in a way that the changes are minimal and only affect non-tests files in the /workspace directory.
+Your thinking should be thorough and so it's fine if it's very long.
+Generate bullet points, highlevel steps. This means do NOT generate code snippets.
+
+EXAMPLE:
+
+<steps>
+- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
+- 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error
+- 3. Edit the sourcecode of the repo to resolve the issue
+- 4. Rerun your reproduce script and confirm that the error is fixed!
+- 5. Think about edgecases and make sure your fix handles them as well
+</steps>
+
+END OF EXAMPLE
+
+<IMPORTANT>
+- Encapsulate your suggestions in between <steps> and </steps> tags.
+- Documentation has been taken into account, so you should not mention it in any way!
+- Testing has been taken into account, so you should not mention it in any way!
+- Generate ONLY high-level steps.
+- One of those steps must be to create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error
+- Be CONCISE.
+</IMPORTANT>
+
+Your turn!
+"""
+
+right_track_prompt = """
+
+I am trying to fix the issue described in the <pr_description>.
+I kept track of everything I did in the <pr_approach>
+
+<pr_approach>
+%(approach)s
+</pr_approach>
+
+As a reminder, this is the <pr_description>:
+
+%(task)s
+
+The plan I followed in my <pr_approach> is described in the <plan> tag:
+
+<plan>
+%(plan)s
+</plan>
+
+Can you suggest me a new plan to fix the issue described in the <pr_description>?
+Pay attention at the errors I faced in the <pr_approach>. Extract information from the errors to shape a new plan.
+One of initial steps would be to see if the issue is still present, if it is not, then it should expand on the edgecases.
+
+EXAMPLE:
+
+<steps>
+- 1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
+- 2. Create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error
+- 3. Edit the sourcecode of the repo to resolve the issue
+- 4. Rerun your reproduce script and confirm that the error is fixed!
+- 5. Think about edgecases and make sure your fix handles them as well
+</steps>
+
+END OF EXAMPLE
+
+<IMPORTANT>
+- Encapsulate your suggestions in between <steps> and </steps> tags.
+- Documentation has been taken into account, so you should not mention it in any way!
+- Testing has been taken into account, so you should not mention it in any way!
+- Generate ONLY high-level steps.
+- The second step must be to create a script to reproduce the error and execute it with `python <filename.py>` using the BashTool, to confirm the error
+- The goal is to fix the issue described in <pr_description> with the MINIMAL changes to non-tests files in the /workspace directory.
+- Be CONCISE.
+- Be CREATIVE, your plan MUST be DIFFERENT from the one described in <plan>.
+</IMPORTANT>
+
+Your turn!
+"""
+
+refactor_prompt = """
+The assistant is super CREATIVE always thinks of different ways of approaching the problem.
+
+I am trying to fix the issue described in the <pr_description> following the steps described in the <pr_description>
+I keep track of everything I did in the <pr_approach>
+
+<pr_approach>
+%(approach)s
+</pr_approach>
+
+Take a step back and reconsider everything I have done in the <pr_approach>.
+The idea is to make the minimal changes to non-tests files in the /workspace directory to ensure the <pr_description> is satisfied.
+I believe my approach is not the best one, can you suggest what my INMEDIATE next step should be? (You can suggest to revert changes and try to do something else)
+Your thinking should be thorough and so it's fine if it's very long.
+if possible suggest ONLY code changes and the reasoning behind those changes.
+Do not use assertive language, use the language of a suggestion.
+REMEMBER: I might have written too many lines of code, so it might be better to discard those changes and start again.
+
+<IMPORTANT>
+- Reply with the suggested approach enclosed in between <next_step> and </next_step> tags
+</IMPORTANT>
+"""
+
+critical_prompt = """
+The assistant is super CREATIVE, it considers every possible scenario that is DIFFERENT from the ones described in the <pr_description>.
+
+I believe I have fixed the issue described in the <pr_description> following the steps described in the <pr_approach>
+<pr_approach>
+%(approach)s
+</pr_approach>
+
+After fixing the issue, there might be some side-effects that we need to consider.
+(e.g. if we fix the way data is written, then we might need to modify the way data is read)
+Your thinking should be thorough and so it's fine if it's very long.
+
+<IMPORTANT>
+- Only reply with ONE side-effect enclosed in between <next_step> and </next_step> tags starting with the phrase "Have you considered..."
+- If you thing everything is covered, just reply with "everything is covered" enclosed in between <next_step> and </next_step> tags
+</IMPORTANT>
+"""
+
+
+def format_conversation(trajectory: Optional[list[Message]] = None) -> str:
+    """Format a conversation history into a readable string.
+
+    Args:
+        trajectory: List of Message objects containing conversation turns
+
+    Returns:
+        Formatted string representing the conversation
+    """
+    if trajectory is None:
+        trajectory = []
+    formatted_parts = []
+
+    for message in trajectory:
+        role = message.role
+        # Join all TextContent messages together
+        content_text = ' '.join(
+            item.text for item in message.content if isinstance(item, TextContent)
+        )
+
+        if content_text.strip():  # Only add non-empty content
+            formatted_parts.append(f'{role}: {content_text}\n')
+
+    return '\n'.join(formatted_parts)
+
+
+def get_prompt(
+    task: str,
+    prompt_type: str = 'initial',
+    trajectory: Optional[list[Message]] = None,
+    plan: str = '',
+    requirements: str = '',
+) -> str:
+    """Format and return the appropriate prompt based on prompt_type.
+
+    Args:
+        task: The task description
+        trajectory: List of Message objects containing conversation history
+        prompt_type: Type of prompt to return ("initial" or "refactor")
+        plan: The augmented task description
+    Returns:
+        Formatted prompt string
+    """
+    if trajectory is None:
+        trajectory = []
+    # If approach is a conversation history, format it
+    approach = format_conversation(trajectory)
+
+    # Select the appropriate prompt template
+    template = {
+        'right_track': right_track_prompt,
+        'refactor': refactor_prompt,
+        'critical': critical_prompt,
+        'high_level_task': high_level_task,
+    }[prompt_type]
+
+    return general_description + template % {
+        'task': task,
+        'approach': approach,
+        'plan': plan,
+        'requirements': requirements,
+    }
diff --git a/openhands/events/action/agent.py b/openhands/events/action/agent.py
index f49f573ed698..eedac830422b 100644
--- a/openhands/events/action/agent.py
+++ b/openhands/events/action/agent.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import Any
+from typing import Any, Dict, Optional
 
 from openhands.core.schema import ActionType
 from openhands.events.action.action import Action
@@ -74,6 +74,7 @@ class AgentDelegateAction(Action):
     inputs: dict
     thought: str = ''
     action: str = ActionType.DELEGATE
+    llm_config: Optional[Dict[str, Any]] = None
 
     @property
     def message(self) -> str:
diff --git a/openhands/runtime/utils/edit.py b/openhands/runtime/utils/edit.py
index 43034ca2f69d..401c1d62b73b 100644
--- a/openhands/runtime/utils/edit.py
+++ b/openhands/runtime/utils/edit.py
@@ -107,7 +107,7 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
-        llm_config = self.config.get_llm_config()
+        llm_config = self.config.get_llm_config_from_agent(self.config.default_agent)
 
         if llm_config.draft_editor is None:
             llm_config.draft_editor = copy.deepcopy(llm_config)