fix tests

All-Hands-AI · Nov 13, 2024 · 686cb7e · 686cb7e
1 parent 3687d7b
commit 686cb7e
Show file tree

Hide file tree

Showing 9 changed files with 44 additions and 209 deletions.
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -103,7 +103,7 @@ def __init__(
             microagent_dir=os.path.join(os.path.dirname(__file__), 'micro')
             if self.config.use_microagents
             else None,
-            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts', 'tools'),
+            prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'),
             disabled_microagents=self.config.disabled_microagents,
         )
 
@@ -315,7 +315,6 @@ def step(self, state: State) -> Action:
         if self.mock_function_calling:
             params['mock_function_calling'] = True
         response = self.llm.completion(**params)
-
         actions = codeact_function_calling.response_to_actions(response)
         for action in actions:
             self.pending_actions.append(action)

diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py
@@ -53,9 +53,6 @@
 * The assistant should define variables and import packages before using them.
 * The variable defined in the IPython environment will not be available outside the IPython environment (e.g., in terminal).
 """
-# We are not using agentskills's file_ops for viewing files now because StrReplaceEditorTool already supports viewing files
-# """* Apart from the standard Python library, the assistant can also use the following functions (already imported):
-# {AgentSkillsRequirement.documentation}"""
 
 IPythonTool = ChatCompletionToolParam(
     type='function',

diff --git a/...eact_agent/prompts/tools/system_prompt.j2 → ...ub/codeact_agent/prompts/system_prompt.j2 b/...eact_agent/prompts/tools/system_prompt.j2 → ...ub/codeact_agent/prompts/system_prompt.j2
@@ -4,4 +4,3 @@ You are OpenHands agent, a helpful AI assistant that can interact with a compute
 * When configuring git credentials, use "openhands" as the user.name and "[email protected]" as the user.email by default, unless explicitly instructed otherwise.
 * The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
 </IMPORTANT>
-
diff --git a/...odeact_agent/prompts/tools/user_prompt.j2 → ...thub/codeact_agent/prompts/user_prompt.j2 b/...odeact_agent/prompts/tools/user_prompt.j2 → ...thub/codeact_agent/prompts/user_prompt.j2
diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -153,7 +153,7 @@ def __init__(
         if self.is_function_calling_active():
             logger.debug('LLM: model supports function calling')
 
-        completion_unwrapped = self._completion
+        self._completion_unwrapped = self._completion
 
         @self.retry_decorator(
             num_retries=self.config.num_retries,
@@ -218,7 +218,7 @@ def wrapper(*args, **kwargs):
 
             try:
                 # we don't support streaming here, thus we get a ModelResponse
-                resp: ModelResponse = completion_unwrapped(*args, **kwargs)
+                resp: ModelResponse = self._completion_unwrapped(*args, **kwargs)
 
                 non_fncall_response = copy.deepcopy(resp)
                 if mock_function_calling:

diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py
@@ -18,7 +18,6 @@ class PromptManager:
 
     Attributes:
         prompt_dir (str): Directory containing prompt templates.
-        agent_skills_docs (str): Documentation of agent skills.
         microagent_dir (str): Directory containing microagent specifications.
         disabled_microagents (list[str] | None): List of microagents to disable. If None, all microagents are enabled.
     """
@@ -27,11 +26,9 @@ def __init__(
         self,
         prompt_dir: str,
         microagent_dir: str | None = None,
-        agent_skills_docs: str = '',
         disabled_microagents: list[str] | None = None,
     ):
         self.prompt_dir: str = prompt_dir
-        self.agent_skills_docs: str = agent_skills_docs
 
         self.system_template: Template = self._load_template('system_prompt')
         self.user_template: Template = self._load_template('user_prompt')
@@ -62,10 +59,7 @@ def _load_template(self, template_name: str) -> Template:
             return Template(file.read())
 
     def get_system_message(self) -> str:
-        rendered = self.system_template.render(
-            agent_skills_docs=self.agent_skills_docs,
-        ).strip()
-        return rendered
+        return self.system_template.render().strip()
 
     def get_example_user_message(self) -> str:
         """This is the initial user message provided to the agent

diff --git a/tests/unit/test_codeact_agent.py b/tests/unit/test_codeact_agent.py
@@ -37,7 +37,6 @@ def test_cmd_output_observation_message(agent: CodeActAgent):
     assert result.role == 'user'
     assert len(result.content) == 1
     assert isinstance(result.content[0], TextContent)
-    assert 'OBSERVATION:' in result.content[0].text
     assert 'Command output' in result.content[0].text
     assert 'Command finished with exit code 0' in result.content[0].text
 
@@ -57,7 +56,6 @@ def test_ipython_run_cell_observation_message(agent: CodeActAgent):
     assert result.role == 'user'
     assert len(result.content) == 1
     assert isinstance(result.content[0], TextContent)
-    assert 'OBSERVATION:' in result.content[0].text
     assert 'IPython output' in result.content[0].text
     assert (
         '![image](data:image/png;base64, ...) already displayed to user'
@@ -80,7 +78,6 @@ def test_agent_delegate_observation_message(agent: CodeActAgent):
     assert result.role == 'user'
     assert len(result.content) == 1
     assert isinstance(result.content[0], TextContent)
-    assert 'OBSERVATION:' in result.content[0].text
     assert 'Delegated agent output' in result.content[0].text
 
 
@@ -96,7 +93,6 @@ def test_error_observation_message(agent: CodeActAgent):
     assert result.role == 'user'
     assert len(result.content) == 1
     assert isinstance(result.content[0], TextContent)
-    assert 'OBSERVATION:' in result.content[0].text
     assert 'Error message' in result.content[0].text
     assert 'Error occurred in processing last action' in result.content[0].text
 

diff --git a/tests/unit/test_prompt_caching.py b/tests/unit/test_prompt_caching.py
@@ -1,39 +1,43 @@
-from unittest.mock import Mock, patch
+from unittest.mock import Mock
 
 import pytest
+from litellm import ModelResponse
 
 from openhands.agenthub.codeact_agent.codeact_agent import CodeActAgent
 from openhands.core.config import AgentConfig, LLMConfig
-from openhands.events.action import CmdRunAction, MessageAction
-from openhands.events.observation import CmdOutputObservation
+from openhands.events.action import MessageAction
 from openhands.llm.llm import LLM
 
 
 @pytest.fixture
 def mock_llm():
-    llm = Mock(spec=LLM)
-    llm.config = LLMConfig(model='claude-3-5-sonnet-20241022', caching_prompt=True)
-    llm.is_caching_prompt_active.return_value = True
+    llm = LLM(
+        LLMConfig(
+            model='claude-3-5-sonnet-20241022',
+            api_key='fake',
+            caching_prompt=True,
+        )
+    )
     return llm
 
 
-@pytest.fixture(params=[False, True])
-def codeact_agent(mock_llm, request):
+@pytest.fixture
+def codeact_agent(mock_llm):
     config = AgentConfig()
-    config.function_calling = request.param
     return CodeActAgent(mock_llm, config)
 
 
-def response_mock(content: str):
+def response_mock(content: str, tool_call_id: str):
     class MockModelResponse:
-        def __init__(self, content):
+        def __init__(self, content, tool_call_id):
             self.choices = [
                 {
                     'message': {
                         'content': content,
                         'tool_calls': [
                             {
                                 'function': {
+                                    'id': tool_call_id,
                                     'name': 'execute_bash',
                                     'arguments': '{}',
                                 }
@@ -46,10 +50,10 @@ def __init__(self, content):
         def model_dump(self):
             return {'choices': self.choices}
 
-    return MockModelResponse(content)
+    return ModelResponse(**MockModelResponse(content, tool_call_id).model_dump())
 
 
-def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
+def test_get_messages(codeact_agent: CodeActAgent):
     # Add some events to history
     history = list()
     message_action_1 = MessageAction('Initial user message')
@@ -78,13 +82,9 @@ def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
     )  # System, initial user + user message, agent message, last user message
     assert messages[0].content[0].cache_prompt  # system message
     assert messages[1].role == 'user'
-    if not codeact_agent.config.function_calling:
-        assert messages[1].content[0].text.endswith("LET'S START!")
-        assert messages[1].content[1].text.endswith('Initial user message')
-    else:
-        assert messages[1].content[0].text.endswith('Initial user message')
+    assert messages[1].content[0].text.endswith('Initial user message')
     # we add cache breakpoint to the last 3 user messages
-    assert messages[1].content[-1].cache_prompt
+    assert messages[1].content[0].cache_prompt
 
     assert messages[3].role == 'user'
     assert messages[3].content[0].text == ('Hello, agent!')
@@ -95,14 +95,6 @@ def test_get_messages_with_reminder(codeact_agent: CodeActAgent):
     assert messages[5].role == 'user'
     assert messages[5].content[0].text.startswith('Laaaaaaaast!')
     assert messages[5].content[0].cache_prompt
-    if not codeact_agent.config.function_calling:
-        assert (
-            messages[5]
-            .content[1]
-            .text.endswith(
-                'ENVIRONMENT REMINDER: You have 5 turns left to complete the task. When finished reply with <finish></finish>.'
-            )
-        )
 
 
 def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
@@ -132,114 +124,20 @@ def test_get_messages_prompt_caching(codeact_agent: CodeActAgent):
     )  # Including the initial system+user + 2 last user message
 
     # Verify that these are indeed the last two user messages (from start)
-    if not codeact_agent.config.function_calling:
-        assert (
-            cached_user_messages[0].content[0].text.startswith('A chat between')
-        )  # system message
+    assert cached_user_messages[0].content[0].text.startswith('You are OpenHands agent')
     assert cached_user_messages[2].content[0].text.startswith('User message 1')
     assert cached_user_messages[3].content[0].text.startswith('User message 1')
 
 
-def test_get_messages_with_cmd_action(codeact_agent: CodeActAgent):
-    if codeact_agent.config.function_calling:
-        pytest.skip('Skipping this test for function calling')
-
-    history = list()
-
-    # Add a mix of actions and observations
-    message_action_1 = MessageAction(
-        "Let's list the contents of the current directory."
-    )
-    message_action_1._source = 'user'
-    history.append(message_action_1)
-
-    cmd_action_1 = CmdRunAction('ls -l', thought='List files in current directory')
-    cmd_action_1._source = 'agent'
-    cmd_action_1._id = 'cmd_1'
-    history.append(cmd_action_1)
-
-    cmd_observation_1 = CmdOutputObservation(
-        content='total 0\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file1.txt\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file2.txt',
-        command_id=cmd_action_1._id,
-        command='ls -l',
-        exit_code=0,
-    )
-    cmd_observation_1._source = 'user'
-    history.append(cmd_observation_1)
-
-    message_action_2 = MessageAction("Now, let's create a new directory.")
-    message_action_2._source = 'agent'
-    history.append(message_action_2)
-
-    cmd_action_2 = CmdRunAction('mkdir new_directory', thought='Create a new directory')
-    cmd_action_2._source = 'agent'
-    cmd_action_2._id = 'cmd_2'
-    history.append(cmd_action_2)
-
-    cmd_observation_2 = CmdOutputObservation(
-        content='',
-        command_id=cmd_action_2._id,
-        command='mkdir new_directory',
-        exit_code=0,
-    )
-    cmd_observation_2._source = 'user'
-    history.append(cmd_observation_2)
-
-    codeact_agent.reset()
-    messages = codeact_agent._get_messages(
-        Mock(history=history, max_iterations=5, iteration=0)
-    )
-
-    # Assert the presence of key elements in the messages
-    assert (
-        messages[1]
-        .content[-1]
-        .text.startswith("Let's list the contents of the current directory.")
-    )  # user, included in the initial message
-    if not codeact_agent.config.function_calling:
-        assert any(
-            'List files in current directory\n<execute_bash>\nls -l\n</execute_bash>'
-            in msg.content[0].text
-            for msg in messages
-        )  # agent
-    assert any(
-        'total 0\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file1.txt\n-rw-r--r-- 1 user group 0 Jan 1 00:00 file2.txt'
-        in msg.content[0].text
-        for msg in messages
-    )  # user, observation
-    assert any(
-        "Now, let's create a new directory." in msg.content[0].text for msg in messages
-    )  # agent
-    if not codeact_agent.config.function_calling:
-        assert messages[4].content[1].text.startswith('Create a new directory')  # agent
-    assert any(
-        'finished with exit code 0' in msg.content[0].text for msg in messages
-    )  # user, observation
-    assert (
-        messages[5].content[0].text.startswith('OBSERVATION:\n\n')
-    )  # user, observation
-
-    # prompt cache is added to the system message
-    assert messages[0].content[0].cache_prompt
-    # and the first initial user message
-    assert messages[1].content[-1].cache_prompt
-    # and to the last two user messages
-    assert messages[3].content[0].cache_prompt
-    assert messages[5].content[0].cache_prompt
-
-    # reminder is added to the last user message
-    if not codeact_agent.config.function_calling:
-        assert 'ENVIRONMENT REMINDER: You have 5 turns' in messages[5].content[1].text
-
-
 def test_prompt_caching_headers(codeact_agent: CodeActAgent):
     history = list()
-    if codeact_agent.config.function_calling:
-        pytest.skip('Skipping this test for function calling')
-
     # Setup
-    history.append(MessageAction('Hello, agent!'))
-    history.append(MessageAction('Hello, user!'))
+    msg1 = MessageAction('Hello, agent!')
+    msg1._source = 'user'
+    history.append(msg1)
+    msg2 = MessageAction('Hello, user!')
+    msg2._source = 'agent'
+    history.append(msg2)
 
     mock_state = Mock()
     mock_state.history = history
@@ -253,23 +151,12 @@ def check_headers(**kwargs):
         assert 'extra_headers' in kwargs
         assert 'anthropic-beta' in kwargs['extra_headers']
         assert kwargs['extra_headers']['anthropic-beta'] == 'prompt-caching-2024-07-31'
-        # Create a mock response with the expected structure
-        mock_response = Mock()
-        mock_response.choices = [Mock()]
-        mock_response.choices[0].message = Mock()
-        mock_response.choices[0].message.content = 'Hello! How can I assist you today?'
-        return mock_response
+        return ModelResponse(
+            choices=[{'message': {'content': 'Hello! How can I assist you today?'}}]
+        )
 
-    # Use patch to replace litellm_completion with our check_headers function
-    with patch('openhands.llm.llm.litellm_completion', side_effect=check_headers):
-        # Also patch the action parser to return a MessageAction
-        with patch.object(
-            codeact_agent.action_parser,
-            'parse',
-            return_value=MessageAction('Hello! How can I assist you today?'),
-        ):
-            # Act
-            result = codeact_agent.step(mock_state)
+    codeact_agent.llm._completion_unwrapped = check_headers
+    result = codeact_agent.step(mock_state)
 
     # Assert
     assert isinstance(result, MessageAction)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,3 @@ You are OpenHands agent, a helpful AI assistant that can interact with a compute
		* When configuring git credentials, use "openhands" as the user.name and "[email protected]" as the user.email by default, unless explicitly instructed otherwise.
		* The assistant MUST NOT include comments in the code unless they are necessary to describe non-obvious behavior.
		</IMPORTANT>