Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PRO-952] Fix up our Replay x OH State Machine #18

Merged
merged 17 commits into from
Jan 24, 2025
68 changes: 19 additions & 49 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
from collections import deque

Expand All @@ -9,7 +10,7 @@
from openhands.core.config import AgentConfig
from openhands.core.logger import openhands_logger as logger
from openhands.core.message import ImageContent, Message, TextContent
from openhands.core.schema.replay import ReplayDebuggingPhase
from openhands.core.schema.replay import ReplayPhase
from openhands.events.action import (
Action,
AgentDelegateAction,
Expand All @@ -36,12 +37,14 @@
from openhands.events.observation.error import ErrorObservation
from openhands.events.observation.observation import Observation
from openhands.events.observation.replay import (
ReplayPhaseUpdateObservation,
ReplayToolCmdOutputObservation,
ReplayObservation,
)
from openhands.events.replay import replay_enhance_action
from openhands.events.serialization.event import truncate_content
from openhands.llm.llm import LLM
from openhands.replay.replay_initial_analysis import replay_enhance_action
from openhands.replay.replay_phases import (
get_replay_observation_prompt,
)
from openhands.runtime.plugins import (
AgentSkillsRequirement,
JupyterRequirement,
Expand Down Expand Up @@ -102,7 +105,7 @@ def __init__(

# We're in normal mode by default (even if replay is not enabled).
# This will initialize the set of tools the agent has access to.
self.replay_phase_changed(ReplayDebuggingPhase.Normal)
self.update_tools(ReplayPhase.Normal)

self.prompt_manager = PromptManager(
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro')
Expand Down Expand Up @@ -253,38 +256,8 @@ def get_observation_message(
)
text += f'\n[Command finished with exit code {obs.exit_code}]'
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, ReplayToolCmdOutputObservation):
# if it doesn't have tool call metadata, it was triggered by a user action
if obs.tool_call_metadata is None:
text = truncate_content(
f'\nObserved result of replay command executed by user:\n{obs.content}',
max_message_chars,
)
else:
text = obs.content
message = Message(role='user', content=[TextContent(text=text)])
elif isinstance(obs, ReplayPhaseUpdateObservation):
# NOTE: The phase change itself is handled in AgentController.
new_phase = obs.new_phase
if new_phase == ReplayDebuggingPhase.Edit:
# Tell the agent to stop analyzing and start editing:
text = """
You have concluded the analysis.

IMPORTANT: NOW review, then implement the hypothesized changes using tools. The code is available in the workspace. Start by answering these questions:
1. What is the goal of the investigation according to the initial prompt and initial analysis? IMPORTANT. PAY ATTENTION TO THIS. THIS IS THE ENTRY POINT OF EVERYTHING.
2. Given (1), is the hypothesis's `problem` description correct? Does it match the goal of the investigation?
3. Do the `editSuggestions` actually address the issue?
4. Rephrase the hypothesis so that it is consistent and correct.

IMPORTANT: Don't stop. Keep working.
IMPORTANT: Don't stop. Keep working.
"""
message = Message(role='user', content=[TextContent(text=text)])
else:
raise NotImplementedError(
f'Unhandled ReplayPhaseUpdateAction: {new_phase}'
)
elif isinstance(obs, ReplayObservation):
message = get_replay_observation_prompt(obs, max_message_chars)
elif isinstance(obs, IPythonRunCellObservation):
text = obs.content
# replace base64 images with a placeholder
Expand Down Expand Up @@ -344,22 +317,16 @@ def reset(self) -> None:
"""Resets the CodeAct Agent."""
super().reset()

def replay_phase_changed(self, phase: ReplayDebuggingPhase) -> None:
"""Called whenenever the phase of the replay debugging process changes.

We currently use this to give the agent access to different tools for the
different phases.
"""
def update_tools(self, phase: ReplayPhase) -> None:
self.tools = codeact_function_calling.get_tools(
codeact_enable_browsing=self.config.codeact_enable_browsing,
codeact_enable_jupyter=self.config.codeact_enable_jupyter,
codeact_enable_llm_editor=self.config.codeact_enable_llm_editor,
codeact_enable_replay=self.config.codeact_enable_replay,
codeact_replay_phase=phase,
replay_phase=phase,
)
logger.debug(
f'[REPLAY] CodeActAgent.replay_phase_changed({phase}).'
# f'New tools: {json.dumps(self.tools, indent=2)}'
logger.info(
f'[REPLAY] update_tools for phase {phase}: {json.dumps([t["function"]['name'] for t in self.tools], indent=2)}'
)

def step(self, state: State) -> Action:
Expand Down Expand Up @@ -388,7 +355,7 @@ def step(self, state: State) -> Action:
return AgentFinishAction()

if self.config.codeact_enable_replay:
# Replay enhancement.
# Check for whether we should enhance the prompt.
enhance_action = replay_enhance_action(state, self.config.is_workspace_repo)
if enhance_action:
logger.info('[REPLAY] Enhancing prompt for Replay recording...')
Expand All @@ -402,7 +369,10 @@ def step(self, state: State) -> Action:
params['tools'] = self.tools
if self.mock_function_calling:
params['mock_function_calling'] = True
# logger.debug(f'#######\nCodeActAgent.step: messages:\n{json.dumps(params)}\n\n#######\n')

# # Debug log the raw input to the LLM:
# logger.debug(f'#######\nCodeActAgent.step: RAW LLM INPUT:\n{repr(params)}\n\n#######\n')

response = self.llm.completion(**params)
actions = codeact_function_calling.response_to_actions(response, state)
for action in actions:
Expand Down
226 changes: 13 additions & 213 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from openhands.controller.state.state import State
from openhands.core.exceptions import FunctionCallNotExistsError
from openhands.core.logger import openhands_logger as logger
from openhands.core.schema import ReplayDebuggingPhase
from openhands.core.schema import ReplayPhase
from openhands.events.action import (
Action,
AgentDelegateAction,
Expand All @@ -26,171 +26,13 @@
IPythonRunCellAction,
MessageAction,
)
from openhands.events.action.replay import (
ReplayPhaseUpdateAction,
ReplayToolCmdRunAction,
)
from openhands.events.tool import ToolCallMetadata

# ---------------------------------------------------------
# Tool: inspect-data
# ---------------------------------------------------------
_REPLAY_INSPECT_DATA_DESCRIPTION = """
Explains value, data flow and origin information for `expression` at `point`.
IMPORTANT: Prefer using inspect-data over inspect-point.
"""

ReplayInspectDataTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='inspect-data',
description=_REPLAY_INSPECT_DATA_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'expression': {
'type': 'string',
'description': 'A valid JS expression. IMPORTANT: First pick the best expression. If the expression is an object: Prefer "array[0]" over "array" and "o.x" over "o" to get closer to the origin and creation site of important data points. Prefer nested object over primitive expressions.',
},
'point': {
'type': 'string',
'description': 'The point at which to inspect the runtime. The first point comes from the `thisPoint` in the Initial analysis.',
},
'explanation': {
'type': 'string',
'description': 'Give a concise explanation as to why you take this investigative step.',
},
'explanation_source': {
'type': 'string',
'description': 'Explain which data you saw in the previous analysis results that informs this step.',
},
},
'required': ['expression', 'point', 'explanation', 'explanation_source'],
},
),
)

# ---------------------------------------------------------
# Tool: inspect-point
# ---------------------------------------------------------
_REPLAY_INSPECT_POINT_DESCRIPTION = """
Explains dynamic control flow and data flow dependencies of the code at `point`.
Use this tool instead of `inspect-data` only when you don't have a specific data point to investigate.
"""

ReplayInspectPointTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='inspect-point',
description=_REPLAY_INSPECT_POINT_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'point': {'type': 'string'},
},
'required': ['point'],
},
),
from openhands.replay.replay_tools import (
get_replay_tools,
handle_replay_tool_call,
is_replay_tool,
)

# ---------------------------------------------------------
# Tool: SubmitHypothesis
# TODO: Divide this into multiple steps -
# 1. The first submission must be as simple as possible to take little computational effort from the analysis steps.
# 2. The second submission, after analysis has already concluded, must be as complete as possible.
# ---------------------------------------------------------
# _REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
# Your investigation has yielded a complete thin slice from symptom to root cause,
# enough proof to let the `CodeEdit` agent take over to fix the bug.
# DO NOT GUESS. You must provide exact code in the exact right location to fix this bug,
# based on evidence you have gathered.
# """

# ReplaySubmitHypothesisTool = ChatCompletionToolParam(
# type='function',
# function=ChatCompletionToolParamFunctionChunk(
# name='submit-hypothesis',
# description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
# parameters={
# 'type': 'object',
# 'properties': {
# 'rootCauseHypothesis': {'type': 'string'},
# 'thinSlice': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'point': {'type': 'string'},
# 'code': {'type': 'string'},
# 'role': {'type': 'string'},
# },
# 'required': ['point', 'code', 'role'],
# },
# },
# 'modifications': {
# 'type': 'array',
# 'items': {
# 'type': 'object',
# 'properties': {
# 'kind': {
# 'type': 'string',
# 'enum': ['add', 'remove', 'modify'],
# },
# 'newCode': {'type': 'string'},
# 'oldCode': {'type': 'string'},
# 'location': {'type': 'string'},
# 'point': {'type': 'string'},
# # NOTE: Even though, we really want the `line` here, it will lead to much worse performance because the agent has a hard time computing correct line numbers from its point-based investigation.
# # Instead of requiring a line number, the final fix will be more involved, as explained in the issue.
# # see: https://linear.app/replay/issue/PRO-939/use-tools-data-flow-analysis-for-10608#comment-3b7ae176
# # 'line': {'type': 'number'},
# 'briefExplanation': {'type': 'string'},
# 'verificationProof': {'type': 'string'},
# },
# 'required': [
# 'kind',
# 'location',
# 'briefExplanation',
# # 'line',
# 'verificationProof',
# ],
# },
# },
# },
# 'required': ['rootCauseHypothesis', 'thinSlice', 'modifications'],
# },
# ),
# )
_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION = """
# Use this tool to conclude your analysis and move on to code editing.
# """

ReplaySubmitHypothesisTool = ChatCompletionToolParam(
type='function',
function=ChatCompletionToolParamFunctionChunk(
name='submit-hypothesis',
description=_REPLAY_SUBMIT_HYPOTHESIS_DESCRIPTION.strip(),
parameters={
'type': 'object',
'properties': {
'problem': {
'type': 'string',
'description': 'One-sentence explanation of the core problem that this will solve.',
},
'rootCauseHypothesis': {'type': 'string'},
'editSuggestions': {
'type': 'string',
'description': 'Provide suggestions to fix the bug, if you know enough about the code that requires modification.',
},
},
'required': ['rootCauseHypothesis'],
},
),
)

REPLAY_TOOLS = ['inspect-data', 'inspect-point', 'submit-hypothesis']


# ---------------------------------------------------------
# OH default tools.
# ---------------------------------------------------------
Expand Down Expand Up @@ -631,36 +473,8 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
) from e
if tool_call.function.name == 'execute_bash':
action = CmdRunAction(**arguments)
elif tool_call.function.name in REPLAY_TOOLS:
logger.info(
f'[REPLAY] TOOL_CALL {tool_call.function.name} - arguments: {json.dumps(arguments, indent=2)}'
)
if tool_call.function.name == 'inspect-data':
# Remove explanation props.
arguments = {
k: v for k, v in arguments.items() if 'explanation' not in k
}
action = ReplayToolCmdRunAction(
command_name='inspect-data',
command_args=arguments
| {'recordingId': state.replay_recording_id},
)
elif tool_call.function.name == 'inspect-point':
# if arguments['expression'] == 'wiredRules': # hackfix for 10608 experiment
# raise FunctionCallValidationError(f'wiredRules is irrelevant to the problem. Try something else.')
action = ReplayToolCmdRunAction(
command_name='inspect-point',
command_args=arguments
| {'recordingId': state.replay_recording_id},
)
elif tool_call.function.name == 'submit-hypothesis':
action = ReplayPhaseUpdateAction(
new_phase=ReplayDebuggingPhase.Edit, info=json.dumps(arguments)
)
else:
raise ValueError(
f'Unknown Replay tool. Make sure to add them all to REPLAY_TOOLS: {tool_call.function.name}'
)
elif is_replay_tool(tool_call.function.name):
action = handle_replay_tool_call(tool_call, arguments, state)
elif tool_call.function.name == 'execute_ipython_cell':
action = IPythonRunCellAction(**arguments)
elif tool_call.function.name == 'delegate_to_browsing_agent':
Expand All @@ -686,6 +500,7 @@ def response_to_actions(response: ModelResponse, state: State) -> list[Action]:
raise FunctionCallNotExistsError(
f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.'
)
assert action

# We only add thought to the first action
if i == 0:
Expand Down Expand Up @@ -727,31 +542,16 @@ def get_tools(
codeact_enable_llm_editor: bool = False,
codeact_enable_jupyter: bool = False,
codeact_enable_replay: bool = False,
codeact_replay_phase: ReplayDebuggingPhase = ReplayDebuggingPhase.Normal,
replay_phase: ReplayPhase = ReplayPhase.Normal,
) -> list[ChatCompletionToolParam]:
default_tools = get_default_tools(
codeact_enable_browsing,
codeact_enable_llm_editor,
codeact_enable_jupyter,
)
if not codeact_enable_replay or codeact_replay_phase == ReplayDebuggingPhase.Normal:
# Use the default tools when not in a Replay-specific phase.
return default_tools

if codeact_enable_replay:
analysis_tools = [
ReplayInspectDataTool,
ReplayInspectPointTool,
]
if codeact_replay_phase == ReplayDebuggingPhase.Analysis:
# Analysis tools only. This phase is concluded upon submit-hypothesis.
tools = analysis_tools + [ReplaySubmitHypothesisTool]
elif codeact_replay_phase == ReplayDebuggingPhase.Edit:
# Combine default and analysis tools.
tools = default_tools + analysis_tools
else:
raise ValueError(
f'Unhandled ReplayDebuggingPhase in get_tools: {codeact_replay_phase}'
)
# Handle Replay tool updates.
return get_replay_tools(replay_phase, default_tools)

return tools
# Just the default tools.
return default_tools
Loading
Loading