Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
xingyaoww committed Nov 13, 2024
1 parent 3687d7b commit 686cb7e
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 209 deletions.
3 changes: 1 addition & 2 deletions openhands/agenthub/codeact_agent/codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(
microagent_dir=os.path.join(os.path.dirname(__file__), 'micro')
if self.config.use_microagents
else None,
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'),
prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
disabled_microagents=self.config.disabled_microagents,
)

Expand Down Expand Up @@ -315,7 +315,6 @@ def step(self, state: State) -> Action:
if self.mock_function_calling:
params['mock_function_calling'] = True
response = self.llm.completion(**params)

actions = codeact_function_calling.response_to_actions(response)
for action in actions:
self.pending_actions.append(action)
Expand Down
3 changes: 0 additions & 3 deletions openhands/agenthub/codeact_agent/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@
* The assistant should define variables and import packages before using them.
* The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
"""
# We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files
# """* Apart from the standard Python library, the assistant can also use the following functions (already imported):
# {AgentSkillsRequirement.documentation}"""

IPythonTool = ChatCompletionToolParam(
type='function',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@ You are OpenHands agent, a helpful AI assistant that can interact with a compute
* When configuring git credentials, use "openhands" as the user.name and "[email protected]" as the user.email by default, unless explicitly instructed otherwise.
* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
</IMPORTANT>

4 changes: 2 additions & 2 deletions openhands/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(
if self.is_function_calling_active():
logger.debug('LLM: model supports function calling')

completion_unwrapped = self._completion
self._completion_unwrapped = self._completion

@self.retry_decorator(
num_retries=self.config.num_retries,
Expand Down Expand Up @@ -218,7 +218,7 @@ def wrapper(*args, **kwargs):

try:
# we don't support streaming here, thus we get a ModelResponse
resp: ModelResponse = completion_unwrapped(*args, **kwargs)
resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)

non_fncall_response = copy.deepcopy(resp)
if mock_function_calling:
Expand Down
8 changes: 1 addition & 7 deletions openhands/utils/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class PromptManager:
Attributes:
prompt_dir (str): Directory containing prompt templates.
agent_skills_docs (str): Documentation of agent skills.
microagent_dir (str): Directory containing microagent specifications.
disabled_microagents (list[str] | None): List of microagents to disable. If None, all microagents are enabled.
"""
Expand All @@ -27,11 +26,9 @@ def __init__(
self,
prompt_dir: str,
microagent_dir: str | None = None,
agent_skills_docs: str = '',
disabled_microagents: list[str] | None = None,
):
self.prompt_dir: str = prompt_dir
self.agent_skills_docs: str = agent_skills_docs

self.system_template: Template = self._load_template('system_prompt')
self.user_template: Template = self._load_template('user_prompt')
Expand Down Expand Up @@ -62,10 +59,7 @@ def _load_template(self, template_name: str) -> Template:
return Template(file.read())

def get_system_message(self) -> str:
rendered = self.system_template.render(
agent_skills_docs=self.agent_skills_docs,
).strip()
return rendered
return self.system_template.render().strip()

def get_example_user_message(self) -> str:
"""This is the initial user message provided to the agent
Expand Down
4 changes: 0 additions & 4 deletions tests/unit/test_codeact_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def test_cmd_output_observation_message(agent: CodeActAgent):
assert result.role == 'user'
assert len(result.content) == 1
assert isinstance(result.content[0], TextContent)
assert 'OBSERVATION:' in result.content[0].text
assert 'Command output' in result.content[0].text
assert 'Command finished with exit code 0' in result.content[0].text

Expand All @@ -57,7 +56,6 @@ def test_ipython_run_cell_observation_message(agent: CodeActAgent):
assert result.role == 'user'
assert len(result.content) == 1
assert isinstance(result.content[0], TextContent)
assert 'OBSERVATION:' in result.content[0].text
assert 'IPython output' in result.content[0].text
assert (
'![image](data:image/png;base64, ...) already displayed to user'
Expand All @@ -80,7 +78,6 @@ def test_agent_delegate_observation_message(agent: CodeActAgent):
assert result.role == 'user'
assert len(result.content) == 1
assert isinstance(result.content[0], TextContent)
assert 'OBSERVATION:' in result.content[0].text
assert 'Delegated agent output' in result.content[0].text


Expand All @@ -96,7 +93,6 @@ def test_error_observation_message(agent: CodeActAgent):
assert result.role == 'user'
assert len(result.content) == 1
assert isinstance(result.content[0], TextContent)
assert 'OBSERVATION:' in result.content[0].text
assert 'Error message' in result.content[0].text
assert 'Error occurred in processing last action' in result.content[0].text

Expand Down
175 changes: 31 additions & 144 deletions tests/unit/test_prompt_caching.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,43 @@
from unittest.mock import Mock, patch
from unittest.mock import Mock

import pytest
from litellm import ModelResponse

from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
from openhands.core.config import AgentConfig, LLMConfig
from openhands.events.action import CmdRunAction, MessageAction
from openhands.events.observation import CmdOutputObservation
from openhands.events.action import MessageAction
from openhands.llm.llm import LLM


@pytest.fixture
def mock_llm():
llm = Mock(spec=LLM)
llm.config = LLMConfig(model='claude-3-5-sonnet-20241022', caching_prompt=True)
llm.is_caching_prompt_active.return_value = True
llm = LLM(
LLMConfig(
model='claude-3-5-sonnet-20241022',
api_key='fake',
caching_prompt=True,
)
)
return llm


@pytest.fixture(params=[False, True])
def codeact_agent(mock_llm, request):
@pytest.fixture
def codeact_agent(mock_llm):
config = AgentConfig()
config.function_calling = request.param
return CodeActAgent(mock_llm, config)


def response_mock(content: str):
def response_mock(content: str, tool_call_id: str):
class MockModelResponse:
def __init__(self, content):
def __init__(self, content, tool_call_id):
self.choices = [
{
'message': {
'content': content,
'tool_calls': [
{
'function': {
'id': tool_call_id,
'name': 'execute_bash',
'arguments': '{}',
}
Expand All @@ -46,10 +50,10 @@ def __init__(self, content):
def model_dump(self):
return {'choices': self.choices}

return MockModelResponse(content)
return ModelResponse(**MockModelResponse(content, tool_call_id).model_dump())


def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
def test_get_messages(codeact_agent: CodeActAgent):
# Add some events to history
history = list()
message_action_1 = MessageAction('Initial user message')
Expand Down Expand Up @@ -78,13 +82,9 @@ def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
) # System, initial user + user message, agent message, last user message
assert messages[0].content[0].cache_prompt # system message
assert messages[1].role == 'user'
if not codeact_agent.config.function_calling:
assert messages[1].content[0].text.endswith("LET'S START!")
assert messages[1].content[1].text.endswith('Initial user message')
else:
assert messages[1].content[0].text.endswith('Initial user message')
assert messages[1].content[0].text.endswith('Initial user message')
# we add cache breakpoint to the last 3 user messages
assert messages[1].content[-1].cache_prompt
assert messages[1].content[0].cache_prompt

assert messages[3].role == 'user'
assert messages[3].content[0].text == ('Hello, agent!')
Expand All @@ -95,14 +95,6 @@ def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
assert messages[5].role == 'user'
assert messages[5].content[0].text.startswith('Laaaaaaaast!')
assert messages[5].content[0].cache_prompt
if not codeact_agent.config.function_calling:
assert (
messages[5]
.content[1]
.text.endswith(
'ENVIRONMENT REMINDER: You have 5 turns left to complete the task. When finished reply with <finish></finish>.'
)
)


def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
Expand Down Expand Up @@ -132,114 +124,20 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
) # Including the initial system+user + 2 last user message

# Verify that these are indeed the last two user messages (from start)
if not codeact_agent.config.function_calling:
assert (
cached_user_messages[0].content[0].text.startswith('A chat between')
) # system message
assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
assert cached_user_messages[2].content[0].text.startswith('User message 1')
assert cached_user_messages[3].content[0].text.startswith('User message 1')


def test_get_messages_with_cmd_action(codeact_agent: CodeActAgent):
if codeact_agent.config.function_calling:
pytest.skip('Skipping this test for function calling')

history = list()

# Add a mix of actions and observations
message_action_1 = MessageAction(
"Let's list the contents of the current directory."
)
message_action_1._source = 'user'
history.append(message_action_1)

cmd_action_1 = CmdRunAction('ls -l', thought='List files in current directory')
cmd_action_1._source = 'agent'
cmd_action_1._id = 'cmd_1'
history.append(cmd_action_1)

cmd_observation_1 = CmdOutputObservation(
content='total 0\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file1.txt\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file2.txt',
command_id=cmd_action_1._id,
command='ls -l',
exit_code=0,
)
cmd_observation_1._source = 'user'
history.append(cmd_observation_1)

message_action_2 = MessageAction("Now, let's create a new directory.")
message_action_2._source = 'agent'
history.append(message_action_2)

cmd_action_2 = CmdRunAction('mkdir new_directory', thought='Create a new directory')
cmd_action_2._source = 'agent'
cmd_action_2._id = 'cmd_2'
history.append(cmd_action_2)

cmd_observation_2 = CmdOutputObservation(
content='',
command_id=cmd_action_2._id,
command='mkdir new_directory',
exit_code=0,
)
cmd_observation_2._source = 'user'
history.append(cmd_observation_2)

codeact_agent.reset()
messages = codeact_agent._get_messages(
Mock(history=history, max_iterations=5, iteration=0)
)

# Assert the presence of key elements in the messages
assert (
messages[1]
.content[-1]
.text.startswith("Let's list the contents of the current directory.")
) # user, included in the initial message
if not codeact_agent.config.function_calling:
assert any(
'List files in current directory\n<execute_bash>\nls -l\n</execute_bash>'
in msg.content[0].text
for msg in messages
) # agent
assert any(
'total 0\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file1.txt\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file2.txt'
in msg.content[0].text
for msg in messages
) # user, observation
assert any(
"Now, let's create a new directory." in msg.content[0].text for msg in messages
) # agent
if not codeact_agent.config.function_calling:
assert messages[4].content[1].text.startswith('Create a new directory') # agent
assert any(
'finished with exit code 0' in msg.content[0].text for msg in messages
) # user, observation
assert (
messages[5].content[0].text.startswith('OBSERVATION:\n\n')
) # user, observation

# prompt cache is added to the system message
assert messages[0].content[0].cache_prompt
# and the first initial user message
assert messages[1].content[-1].cache_prompt
# and to the last two user messages
assert messages[3].content[0].cache_prompt
assert messages[5].content[0].cache_prompt

# reminder is added to the last user message
if not codeact_agent.config.function_calling:
assert 'ENVIRONMENT REMINDER: You have 5 turns' in messages[5].content[1].text


def test_prompt_caching_headers(codeact_agent: CodeActAgent):
history = list()
if codeact_agent.config.function_calling:
pytest.skip('Skipping this test for function calling')

# Setup
history.append(MessageAction('Hello, agent!'))
history.append(MessageAction('Hello, user!'))
msg1 = MessageAction('Hello, agent!')
msg1._source = 'user'
history.append(msg1)
msg2 = MessageAction('Hello, user!')
msg2._source = 'agent'
history.append(msg2)

mock_state = Mock()
mock_state.history = history
Expand All @@ -253,23 +151,12 @@ def check_headers(**kwargs):
assert 'extra_headers' in kwargs
assert 'anthropic-beta' in kwargs['extra_headers']
assert kwargs['extra_headers']['anthropic-beta'] == 'prompt-caching-2024-07-31'
# Create a mock response with the expected structure
mock_response = Mock()
mock_response.choices = [Mock()]
mock_response.choices[0].message = Mock()
mock_response.choices[0].message.content = 'Hello! How can I assist you today?'
return mock_response
return ModelResponse(
choices=[{'message': {'content': 'Hello! How can I assist you today?'}}]
)

# Use patch to replace litellm_completion with our check_headers function
with patch('openhands.llm.llm.litellm_completion', side_effect=check_headers):
# Also patch the action parser to return a MessageAction
with patch.object(
codeact_agent.action_parser,
'parse',
return_value=MessageAction('Hello! How can I assist you today?'),
):
# Act
result = codeact_agent.step(mock_state)
codeact_agent.llm._completion_unwrapped = check_headers
result = codeact_agent.step(mock_state)

# Assert
assert isinstance(result, MessageAction)
Expand Down
Loading

0 comments on commit 686cb7e

Please sign in to comment.