App diagnose chat (#253)

* Added workload_health_check endpoint
robusta-dev · Jan 30, 2025 · 13feef1 · 13feef1
1 parent 99ac32c
commit 13feef1
Show file tree

Hide file tree

Showing 4 changed files with 324 additions and 1 deletion.
diff --git a/holmes/core/conversations.py b/holmes/core/conversations.py
@@ -6,6 +6,7 @@
     ConversationInvestigationResult,
     ToolCallConversationResult,
     IssueChatRequest,
+    WorkloadHealthChatRequest,
 )
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.core.tool_calling_llm import ToolCallingLLM
@@ -14,6 +15,7 @@
 
 DEFAULT_TOOL_SIZE = 10000
 
+
 def calculate_tool_size(
     ai: ToolCallingLLM, messages_without_tools: list[dict], number_of_tools: int
 ) -> int:
@@ -136,14 +138,28 @@ def handle_issue_conversation(
     return system_prompt
 
 
-def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCallingLLM,
+def build_issue_chat_messages(issue_chat_request: IssueChatRequest, 
+                              ai: ToolCallingLLM,
                               global_instructions: Optional[Instructions] = None):
     """
     This function generates a list of messages for issue conversation and ensures that the message sequence adheres to the model's context window limitations
     by truncating tool outputs as necessary before sending to llm.
 
     We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
     That's why we assume that first message in the conversation is system message and truncate tools for it.
+
+    System prompt handling:
+    1. For new conversations (empty conversation_history):
+       - Creates a new system prompt using generic_ask_for_issue_conversation.jinja2 template
+       - Includes investigation analysis, tools (if any), and issue type information
+       - If there are tools, calculates appropriate tool size and truncates tool outputs
+    
+    2. For existing conversations:
+       - Preserves the conversation history
+       - Updates the first message (system prompt) with recalculated content
+       - Truncates tool outputs if necessary to fit context window
+       - Maintains the original conversation flow while ensuring context limits
+
     Example structure of conversation history:
     conversation_history = [
     # System prompt
@@ -313,6 +329,53 @@ def build_chat_messages(
     ask: str, conversation_history: Optional[List[Dict[str, str]]], ai: ToolCallingLLM,
     global_instructions: Optional[Instructions] = None
 ) -> List[dict]:
+    """
+    This function generates a list of messages for general chat conversation and ensures that the message sequence adheres to the model's context window limitations
+    by truncating tool outputs as necessary before sending to llm.
+
+    We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
+    That's why we assume that first message in the conversation is system message and truncate tools for it.
+
+    System prompt handling:
+    1. For new conversations (empty conversation_history):
+       - Creates a new system prompt using generic_ask_conversation.jinja2 template
+       - Uses an empty template context (no specific analysis or tools required)
+       - Adds global instructions to the user prompt if provided
+    
+    2. For existing conversations:
+       - Preserves the conversation history as is
+       - No need to update system prompt as it doesn't contain tool-specific content
+       - Only truncates tool messages if they exist in the conversation
+       - Maintains the original conversation flow while ensuring context limits
+
+    Example structure of conversation history:
+    conversation_history = [
+    # System prompt for general chat
+    {"role": "system", "content": "...."},
+    # User message with a general question
+    {"role": "user", "content": "Can you analyze the logs from my application?"},
+    # Assistant initiates a tool call
+    {
+        "role": "assistant",
+        "content": None,
+        "tool_call": {
+            "name": "fetch_application_logs",
+            "arguments": "{\"service\": \"backend\", \"time_range\": \"last_hour\"}"
+        }
+    },
+    # Tool/Function response
+    {
+        "role": "tool",
+        "name": "fetch_application_logs",
+        "content": "{\"log_entries\": [\"Error in processing request\", \"Connection timeout\"]}"
+    },
+    # Assistant's final response to the user
+    {
+        "role": "assistant",
+        "content": "I've analyzed your application logs and found some issues: there are error messages related to request processing and connection timeouts."
+    },
+    ]
+    """
     template_path = "builtin://generic_ask_conversation.jinja2"
 
     if not conversation_history or len(conversation_history) == 0:
@@ -354,3 +417,192 @@ def build_chat_messages(
     )
     truncate_tool_messages(conversation_history, tool_size)
     return conversation_history
+
+
+def build_workload_health_chat_messages(workload_health_chat_request: WorkloadHealthChatRequest, 
+                                        ai: ToolCallingLLM,
+                                        global_instructions: Optional[Instructions] = None
+                                        ):
+    """
+    This function generates a list of messages for workload health conversation and ensures that the message sequence adheres to the model's context window limitations
+    by truncating tool outputs as necessary before sending to llm.
+
+    We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
+    That's why we assume that first message in the conversation is system message and truncate tools for it.
+
+    System prompt handling:
+    1. For new conversations (empty conversation_history):
+       - Creates a new system prompt using kubernetes_workload_chat.jinja2 template
+       - Includes workload analysis, tools (if any), and resource information
+       - If there are tools, calculates appropriate tool size and truncates tool outputs
+    
+    2. For existing conversations:
+       - Preserves the conversation history
+       - Updates the first message (system prompt) with recalculated content
+       - Truncates tool outputs if necessary to fit context window
+       - Maintains the original conversation flow while ensuring context limits
+
+    Example structure of conversation history:
+    conversation_history = [
+    # System prompt with workload analysis
+    {"role": "system", "content": "...."},
+    # User message asking about workload health
+    {"role": "user", "content": "What's the current health status of my deployment?"},
+    # Assistant initiates a tool call
+    {
+        "role": "assistant",
+        "content": None,
+        "tool_call": {
+            "name": "check_workload_metrics",
+            "arguments": "{\"namespace\": \"default\", \"workload\": \"my-deployment\"}"
+        }
+    },
+    # Tool/Function response
+    {
+        "role": "tool",
+        "name": "check_workload_metrics",
+        "content": "{\"cpu_usage\": \"45%\", \"memory_usage\": \"60%\", \"status\": \"Running\"}"
+    },
+    # Assistant's final response to the user
+    {
+        "role": "assistant",
+        "content": "Your deployment is running normally with CPU usage at 45% and memory usage at 60%."
+    },
+    ]
+    """
+
+    template_path = "builtin://kubernetes_workload_chat.jinja2"
+
+    conversation_history = workload_health_chat_request.conversation_history
+    user_prompt = workload_health_chat_request.ask
+    workload_analysis = workload_health_chat_request.workload_health_result.analysis
+    tools_for_workload = workload_health_chat_request.workload_health_result.tools
+    resource = workload_health_chat_request.resource
+
+    if not conversation_history or len(conversation_history) == 0:
+        user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions)
+
+        number_of_tools_for_workload = len(tools_for_workload)
+        if number_of_tools_for_workload == 0:
+            system_prompt = load_and_render_prompt(
+                template_path,
+                {
+                    "workload_analysis": workload_analysis,
+                    "tools_called_for_workload": tools_for_workload,
+                    "resource": resource,
+                },
+            )
+            messages = [
+                {
+                    "role": "system",
+                    "content": system_prompt,
+                },
+                {
+                    "role": "user",
+                    "content": user_prompt,
+                },
+            ]
+            return messages
+
+        template_context_without_tools = {
+            "workload_analysis": workload_analysis,
+            "tools_called_for_workload": None,
+            "resource": resource,
+        }
+        system_prompt_without_tools = load_and_render_prompt(
+            template_path, template_context_without_tools
+        )
+        messages_without_tools = [
+            {
+                "role": "system",
+                "content": system_prompt_without_tools,
+            },
+            {
+                "role": "user",
+                "content": user_prompt,
+            },
+        ]
+        tool_size = calculate_tool_size(
+            ai, messages_without_tools, number_of_tools_for_workload
+        )
+
+        truncated_workload_result_tool_calls = [
+            ToolCallConversationResult(
+                name=tool.name,
+                description=tool.description,
+                output=tool.output[:tool_size],
+            )
+            for tool in tools_for_workload
+        ]
+
+        truncated_template_context = {
+            "workload_analysis": workload_analysis,
+            "tools_called_for_workload": truncated_workload_result_tool_calls,
+            "resource": resource,
+        }
+        system_prompt_with_truncated_tools = load_and_render_prompt(
+            template_path, truncated_template_context
+        )
+        return [
+            {
+                "role": "system",
+                "content": system_prompt_with_truncated_tools,
+            },
+            {
+                "role": "user",
+                "content": user_prompt,
+            },
+        ]
+
+    user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions)
+
+    conversation_history.append(
+        {
+            "role": "user",
+            "content": user_prompt,
+        }
+    )
+    number_of_tools = len(tools_for_workload) + len(
+        [message for message in conversation_history if message.get("role") == "tool"]
+    )
+
+    if number_of_tools == 0:
+        return conversation_history
+
+    conversation_history_without_tools = [
+        message for message in conversation_history if message.get("role") != "tool"
+    ]
+    template_context_without_tools = {
+        "workload_analysis": workload_analysis,
+        "tools_called_for_workload": None,
+        "resource": resource,
+    }
+    system_prompt_without_tools = load_and_render_prompt(
+        template_path, template_context_without_tools
+    )
+    conversation_history_without_tools[0]["content"] = system_prompt_without_tools
+
+    tool_size = calculate_tool_size(
+        ai, conversation_history_without_tools, number_of_tools
+    )
+
+    truncated_workload_result_tool_calls = [
+        ToolCallConversationResult(
+            name=tool.name, description=tool.description, output=tool.output[:tool_size]
+        )
+        for tool in tools_for_workload
+    ]
+
+    template_context = {
+        "workload_analysis": workload_analysis,
+        "tools_called_for_workload": truncated_workload_result_tool_calls,
+        "resource": resource,
+    }
+    system_prompt_with_truncated_tools = load_and_render_prompt(
+        template_path, template_context
+    )
+    conversation_history[0]["content"] = system_prompt_with_truncated_tools
+
+    truncate_tool_messages(conversation_history, tool_size)
+
+    return conversation_history
diff --git a/holmes/core/models.py b/holmes/core/models.py
@@ -127,3 +127,14 @@ class ChatResponse(BaseModel):
     analysis: str
     conversation_history: list[dict]
     tool_calls: Optional[List[ToolCallResult]] = []
+
+
+class WorkloadHealthInvestigationResult(BaseModel):
+    analysis: Optional[str] = None
+    tools: Optional[List[ToolCallConversationResult]] = []
+
+
+class WorkloadHealthChatRequest(ChatRequestBaseModel):
+    ask: str
+    workload_health_result: WorkloadHealthInvestigationResult
+    resource: dict
diff --git a/holmes/plugins/prompts/kubernetes_workload_chat.jinja2 b/holmes/plugins/prompts/kubernetes_workload_chat.jinja2
@@ -0,0 +1,38 @@
+You are a tool-calling AI assist provided with common DevOps and IT tools that you can use to troubleshoot problems or answer questions.
+Whenever possible, you MUST first use tools to investigate, then answer the question.
+Do not say 'based on the tool output' or explicitly refer to tools at all.
+If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
+
+### Context Awareness:
+Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{resource}}. 
+However, not all questions may be directly related to that investigation. 
+Use results of the investigation and conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.
+
+#### Results of Workload Health Check Analysis:
+{{workload_analysis}}
+
+{% if tools_called_for_workload %}
+Tools used for the workload analysis:
+{% for tool in tools_called_for_workload %}
+    {{ tool }}
+{% endfor %}
+{% endif %}
+
+
+{% include '_global_instructions.jinja2' %}
+{% include '_general_instructions.jinja2' %}
+
+Style guide:
+* Reply with terse output.
+* Be painfully concise.
+* Leave out "the" and filler words when possible.
+* Be terse but not at the expense of leaving out important data like the root cause and how to fix.
+
+Examples:
+
+User: Why did the workload-example app crash?
+(Call tool kubectl_find_resource kind=pod keyword=workload`)
+(Call tool kubectl_previous_logs namespace=demos pod=workload-example-1299492-d9g9d # this pod name was found from the previous tool call)
+
+AI: `workload-example-1299492-d9g9d` crashed due to email validation error during HTTP request for /api/create_user
+Relevant logs:
diff --git a/server.py b/server.py
@@ -34,6 +34,7 @@
     build_chat_messages,
     build_issue_chat_messages,
     handle_issue_conversation,
+    build_workload_health_chat_messages,
 )
 from holmes.core.issue import Issue
 from holmes.core.models import (
@@ -45,6 +46,7 @@
     ChatRequest,
     ChatResponse,
     IssueChatRequest,
+    WorkloadHealthChatRequest,
 )
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.utils.holmes_sync_toolsets import holmes_sync_toolsets_status
@@ -106,6 +108,7 @@ async def log_requests(request: Request, call_next):
                 status_code = response.status_code
             logging.info(f"Request completed {request.method} {request.url.path} status={status_code} latency={process_time}ms")
 
+
 @app.post("/api/investigate")
 def investigate_issues(investigate_request: InvestigateRequest):
     try:
@@ -164,6 +167,25 @@ def workload_health_check(request: WorkloadHealthRequest):
         raise HTTPException(status_code=401, detail=e.message)
 
 
+@app.post("/api/workload_health_chat")
+def workload_health_conversation(workload_health_chat_request: WorkloadHealthChatRequest):
+    try:
+        load_robusta_api_key(dal=dal, config=config)
+        ai = config.create_toolcalling_llm(dal=dal)
+        global_instructions = dal.get_global_instructions_for_account()
+
+        messages = build_workload_health_chat_messages(workload_health_chat_request, ai, global_instructions)
+        llm_call = ai.messages_call(messages=messages)
+
+        return ChatResponse(
+            analysis=llm_call.result,
+            tool_calls=llm_call.tool_calls,
+            conversation_history=llm_call.messages,
+        )
+    except AuthenticationError as e:
+        raise HTTPException(status_code=401, detail=e.message)
+
+
 # older api that does not support conversation history
 @app.post("/api/conversation")
 def issue_conversation_deprecated(conversation_request: ConversationRequest):