Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat] Exploring AgentDelegation through Supervisor Agent #4449

Draft
wants to merge 61 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
76cdcd1
Updated tests
AlexCuadron Oct 15, 2024
3beaf5c
chore(deps): bump litellm from 1.49.3 to 1.49.4 (#4406)
dependabot[bot] Oct 15, 2024
c8db8aa
chore(deps-dev): bump llama-index from 0.11.17 to 0.11.18 (#4408)
dependabot[bot] Oct 15, 2024
308dc62
chore(deps): bump modal from 0.64.181 to 0.64.182 (#4407)
dependabot[bot] Oct 15, 2024
158a923
refactor: move get_pairs from memory to shared utils (#4411)
xingyaoww Oct 15, 2024
b6a9163
Fix eval output path in case of @ char (#4416)
mamoodi Oct 15, 2024
8ba531a
Fix for lockup - create the runtime in a background thread (#4412)
tofarr Oct 15, 2024
87f6870
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 16, 2024
6037e20
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 16, 2024
0c5de4c
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 17, 2024
79cb41a
Initial Commit for the Supervisor Agent
AlexCuadron Oct 17, 2024
12798fd
Added support to specify the platform on which the image should be bu…
AlexCuadron Oct 19, 2024
ef3646f
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 20, 2024
7a9975d
Merge branch 'main' into supervisoragent
AlexCuadron Oct 20, 2024
cd4ff77
Merge branch 'main' into supervisoragent
AlexCuadron Oct 21, 2024
18bdb56
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 22, 2024
b2db9e1
Merge branch 'main' into supervisoragent
AlexCuadron Oct 22, 2024
5ddcd60
Merge remote-tracking branch 'upstream/main' into supervisoragent
AlexCuadron Oct 23, 2024
7ca0de6
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 23, 2024
31a18ba
Merge branch 'main' into supervisoragent
AlexCuadron Oct 23, 2024
5a76cc8
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 24, 2024
cd7cac1
Merge branch 'main' into supervisoragent
AlexCuadron Oct 24, 2024
640f769
enables codeactagent delegation
AlexCuadron Oct 24, 2024
d5d44e2
hacky way to enable different LLMs
AlexCuadron Oct 24, 2024
f1d317c
Some progress
AlexCuadron Oct 25, 2024
4a7ef31
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 25, 2024
74b7d04
Merge branch 'main' into supervisoragent
AlexCuadron Oct 25, 2024
32c69af
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 28, 2024
047846e
Merge branch 'main' into supervisoragent
AlexCuadron Oct 28, 2024
bf8b4c0
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 28, 2024
6741b81
Merge branch 'main' into supervisoragent
AlexCuadron Oct 28, 2024
e284c95
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 28, 2024
a749a4c
Merge branch 'main' into supervisoragent
AlexCuadron Oct 28, 2024
04c56c6
fix
AlexCuadron Oct 28, 2024
619bbf1
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 30, 2024
10c8f0c
Merge branch 'main' into supervisoragent
AlexCuadron Oct 30, 2024
65ec945
Merge remote-tracking branch 'upstream/main'
AlexCuadron Oct 31, 2024
d1151b8
Merge branch 'main' into supervisoragent
AlexCuadron Oct 31, 2024
399f19e
MAS
AlexCuadron Oct 31, 2024
d644f45
Merge remote-tracking branch 'upstream/main'
AlexCuadron Nov 5, 2024
500112a
merge
AlexCuadron Nov 5, 2024
ec94128
Merge remote-tracking branch 'upstream/main'
AlexCuadron Nov 5, 2024
f5e8820
merge
AlexCuadron Nov 5, 2024
7ad4bc0
Merge remote-tracking branch 'upstream/main'
AlexCuadron Nov 8, 2024
dbd7ad4
Merge branch 'main' into SupervisorAgent
AlexCuadron Nov 8, 2024
a9e346a
first try
AlexCuadron Nov 11, 2024
413caa6
attempt
AlexCuadron Nov 13, 2024
6a61134
o1 -> sonnet -> o1 -> sonnet
AlexCuadron Nov 16, 2024
cf1321f
thinking claude
AlexCuadron Nov 17, 2024
fa822b0
Merge remote-tracking branch 'upstream/main'
AlexCuadron Dec 31, 2024
7c0f566
merge main
AlexCuadron Jan 1, 2025
098013c
Merge branch 'main' into supervisoragent
AlexCuadron Jan 1, 2025
c8080a0
Merge remote-tracking branch 'upstream/main'
AlexCuadron Jan 1, 2025
cc295fb
Merge branch 'main' into supervisoragent
AlexCuadron Jan 1, 2025
9041e23
Merge branch 'main' into supervisoragent
AlexCuadron Jan 1, 2025
17a6708
Merge remote-tracking branch 'upstream/main'
AlexCuadron Jan 1, 2025
9b7d840
Merge branch 'main' into supervisoragent
AlexCuadron Jan 1, 2025
e43a93e
Merge remote-tracking branch 'upstream/main'
AlexCuadron Jan 8, 2025
cabb7cb
merge main
AlexCuadron Jan 8, 2025
9efc87c
:Merge remote-tracking branch 'upstream/main' into supervisoragent
AlexCuadron Jan 13, 2025
cdb053b
:Merge branch 'main' into supervisoragent
AlexCuadron Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openhands/agenthub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
'delegator_agent',
'dummy_agent',
'browsing_agent',
'supervisor_agent',
]

for agent in all_microagents.values():
Expand Down
34 changes: 33 additions & 1 deletion openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
from collections import deque

Expand All @@ -9,8 +8,10 @@
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.config.llm_config import LLMConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import ImageContent, Message, TextContent
from openhands.core.utils import json
from openhands.events.action import (
Action,
AgentDelegateAction,
Expand Down Expand Up @@ -85,6 +86,17 @@ def __init__(
Parameters:
- llm (LLM): The llm to be used by this agent
"""

# import pdb; pdb.set_trace()
llm_config = LLMConfig(
model='litellm_proxy/claude-3-5-sonnet-20241022',
api_key='REDACTED',
temperature=0.0,
base_url='https://llm-proxy.app.all-hands.dev',
)
llm = LLM(llm_config)
# TODO: Remove this once we have a real AgentConfig
config = AgentConfig()
super().__init__(llm, config)
self.pending_actions: deque[Action] = deque()
self.reset()
Expand Down Expand Up @@ -369,6 +381,11 @@ def step(self, state: State) -> Action:
- MessageAction(content) - Message action to run (e.g. ask for clarification)
- AgentFinishAction() - end the interaction
"""

# If this agent has a supervisor, we need to get the time to stop from the supervisor
if self.when_to_stop < 0 and state.inputs.get('when_to_stop', None):
self.when_to_stop: bool = state.inputs['when_to_stop']

# Continue with pending actions if any
if self.pending_actions:
return self.pending_actions.popleft()
Expand Down Expand Up @@ -469,6 +486,14 @@ def _get_messages(self, state: State) -> list[Message]:
else:
raise ValueError(f'Unknown event type: {type(event)}')

if state.inputs.get('next_step', ''):
messages_to_add = [
Message(
role='user',
content=[TextContent(text=state.inputs['next_step'])],
)
]

# Check pending tool call action messages and see if they are complete
_response_ids_to_remove = []
for (
Expand Down Expand Up @@ -501,6 +526,13 @@ def _get_messages(self, state: State) -> list[Message]:
self.prompt_manager.enhance_message(message)
messages.append(message)

if state.inputs.get('next_step', ''):
messages.append(
Message(
role='user', content=[TextContent(text=state.inputs['next_step'])]
)
)

if self.llm.is_caching_prompt_active():
# NOTE: this is only needed for anthropic
# following logic here:
Expand Down
22 changes: 19 additions & 3 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,9 @@ def __init__(self):
),
)

_FINISH_DESCRIPTION = """Finish the interaction when the task is complete OR if the assistant cannot proceed further with the task."""
_FINISH_DESCRIPTION = (
"""Finish the interaction when the task is successfully complete."""
)

FinishTool = ChatCompletionToolParam(
type='function',
Expand All @@ -455,6 +457,18 @@ def __init__(self):
),
)

_HELP_DESCRIPTION = (
"""Request assistance when the assistant cannot proceed further with the task."""
)

HelpTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='help',
description=_HELP_DESCRIPTION,
),
)


def combine_thought(action: Action, thought: str) -> Action:
if not hasattr(action, 'thought'):
Expand Down Expand Up @@ -497,7 +511,9 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
inputs=arguments,
)
elif tool_call.function.name == 'finish':
action = AgentFinishAction()
action = AgentFinishAction(outputs={'fixed': True})
elif tool_call.function.name == 'help':
action = AgentFinishAction(outputs={'fixed': False})
elif tool_call.function.name == 'edit_file':
action = FileEditAction(**arguments)
elif tool_call.function.name == 'str_replace_editor':
Expand Down Expand Up @@ -555,7 +571,7 @@ def get_tools(
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
) -> list[ChatCompletionToolParam]:
tools = [CmdRunTool, FinishTool]
tools = [CmdRunTool, FinishTool, HelpTool]
if codeact_enable_browsing:
tools.append(WebReadTool)
tools.append(BrowserTool)
Expand Down
1 change: 0 additions & 1 deletion openhands/agenthub/delegator_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def step(self, state: State) -> Action:

if not isinstance(last_observation, AgentDelegateObservation):
raise Exception('Last observation is not an AgentDelegateObservation')

goal, _ = state.get_current_user_intent()
if self.current_delegate == 'study':
self.current_delegate = 'coder'
Expand Down
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/coder/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ Do NOT finish until you have completed the tasks.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/study_repo_for_task/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ implement the solution. If the codebase is empty, you should call the `finish` a

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
Expand Down
8 changes: 7 additions & 1 deletion openhands/agenthub/micro/verifier/prompt.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ explaining what the problem is.

## History
{{ instructions.history_truncated }}
{{ history_to_json(state.history, max_events=20) }}
{% for event in state.history[-20:] %}
{% if event.source == "agent" %}
Agent: {{ event.action }} - {{ event.content if event.content else event.observation }}
{% else %}
User: {{ event.content if event.content else event.observation }}
{% endif %}
{% endfor %}

## Format
{{ instructions.format.action }}
4 changes: 4 additions & 0 deletions openhands/agenthub/supervisor_agent/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from openhands.agenthub.supervisor_agent.agent import SupervisorAgent
from openhands.controller.agent import Agent

Agent.register('SupervisorAgent', SupervisorAgent)
154 changes: 154 additions & 0 deletions openhands/agenthub/supervisor_agent/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import json
import logging
import re
from typing import Any, Dict, List

from openhands.agenthub.supervisor_agent.prompt import get_prompt
from openhands.controller.agent import Agent
from openhands.controller.state.state import State
from openhands.core.config import AgentConfig
from openhands.core.config.llm_config import LLMConfig
from openhands.core.message import Message, TextContent
from openhands.events.action import Action, AgentDelegateAction, AgentFinishAction
from openhands.events.observation.delegate import AgentDelegateObservation
from openhands.events.observation.observation import Observation
from openhands.llm.llm import LLM
from openhands.runtime.plugins.agent_skills import AgentSkillsRequirement
from openhands.runtime.plugins.jupyter import JupyterRequirement
from openhands.runtime.plugins.requirement import PluginRequirement


class SupervisorAgent(Agent):
VERSION = '1.0'
"""
The Supervisor Agent is an agent that collects information from other agents
and makes decisions based on the information.
"""

current_delegate: str = ''
suggested_approaches: List[Dict[str, List[str]]] = []
suggested_approach_index: int = -1 # -1 Because we increment it before using it
results: Dict[str, List[Any]] = {'search': [], 'code': []}
condensed_information: str = ''
does_it_needs_a_test: bool = False
task: str = ''
test_command: str = ''
time_to_stop: int = 60 # Every 60 iterations, we stop and evaluate the approach
phase: int = 0
steps: str = ''

sandbox_plugins: list[PluginRequirement] = [
# NOTE: AgentSkillsRequirement need to go before JupyterRequirement, since
# AgentSkillsRequirement provides a lot of Python functions,
# and it needs to be initialized before Jupyter for Jupyter to use those functions.
AgentSkillsRequirement(),
JupyterRequirement(),
]

# Add class attribute for tried_direct_code
tried_direct_code: bool = False

# Add class attribute for augmented_task
augmented_task: str = ''

def __init__(self, llm: LLM, config: AgentConfig):
"""Initialize the Supervisor Agent with an LLM

Parameters:
- llm (LLM): The llm to be used by this agent
"""
llm_config = LLMConfig(
model='openai/o1-preview', api_key='REDACTED', temperature=1.0
)
llm = LLM(llm_config)
# TODO: Remove this once we have a real AgentConfig
config = AgentConfig(llm_config='o1-mini')
super().__init__(llm, config)
# Set up logger
self.logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG) # Set the logging level
self.llm_config = llm.config

def step(self, state: State) -> Action:
self.logger.debug('Starting step with state: %s', state)
self.logger.debug('LLM config: %s', self.llm_config)
last_observation: Observation | None = None
for event in reversed(state.history):
if isinstance(event, Observation):
last_observation = event
break

task, _ = state.get_current_user_intent()
self.task = task or ''

if self.phase == 0:
self.phase += 1
prompt = get_prompt(self.task, prompt_type='high_level_task')
raw_response = self.get_response(prompt)
match = re.search(
r'<steps>(.*?)</steps>',
raw_response,
re.DOTALL,
)
self.steps = match.group(1).strip('"') if match else self.task
return AgentDelegateAction(
agent='CodeActAgent',
inputs={
'task': self.task,
'plan': self.steps,
'when_to_stop': self.time_to_stop,
},
)

if not isinstance(last_observation, AgentDelegateObservation):
return AgentFinishAction()

if not last_observation.outputs.get('fixed', True):
trajectory_str: str = last_observation.outputs['trayectory']
trajectory_data = json.loads(trajectory_str)
deserialized_trajectory = [
Message(
role=msg_dict.get('role'),
content=[
TextContent(text=content_text)
for content_text in [
msg_dict['content'][0]['text']
if isinstance(msg_dict['content'], list)
else msg_dict['content']
]
],
tool_call_id=msg_dict.get('tool_call_id'),
name=msg_dict.get('name'),
)
for msg_dict in trajectory_data
]
prompt = get_prompt(
self.task,
'right_track',
trajectory=deserialized_trajectory,
plan=self.steps,
)
raw_response = self.get_response(prompt)
match = re.search(
r'<steps>(.*?)</steps>',
raw_response,
re.DOTALL,
)
self.steps = match.group(1).strip('"') if match else self.task

return AgentDelegateAction(
agent='CodeActAgent',
inputs={
'task': self.task,
'plan': self.steps,
'when_to_stop': self.time_to_stop,
},
)
return AgentFinishAction()

def get_response(self, prompt: str) -> str:
message = Message(role='user', content=[TextContent(text=prompt)])
response = self.llm.completion(
messages=self.llm.format_messages_for_llm(message)
)
return response['choices'][0]['message']['content']
Loading
Loading