Skip to content

Commit

Permalink
App diagnose chat (#253)
Browse files Browse the repository at this point in the history
* Added workload_health_check endpoint
  • Loading branch information
itisallgood authored Jan 30, 2025
1 parent 99ac32c commit 13feef1
Show file tree
Hide file tree
Showing 4 changed files with 324 additions and 1 deletion.
254 changes: 253 additions & 1 deletion holmes/core/conversations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ConversationInvestigationResult,
ToolCallConversationResult,
IssueChatRequest,
WorkloadHealthChatRequest,
)
from holmes.plugins.prompts import load_and_render_prompt
from holmes.core.tool_calling_llm import ToolCallingLLM
Expand All @@ -14,6 +15,7 @@

DEFAULT_TOOL_SIZE = 10000


def calculate_tool_size(
ai: ToolCallingLLM, messages_without_tools: list[dict], number_of_tools: int
) -> int:
Expand Down Expand Up @@ -136,14 +138,28 @@ def handle_issue_conversation(
return system_prompt


def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCallingLLM,
def build_issue_chat_messages(issue_chat_request: IssueChatRequest,
ai: ToolCallingLLM,
global_instructions: Optional[Instructions] = None):
"""
This function generates a list of messages for issue conversation and ensures that the message sequence adheres to the model's context window limitations
by truncating tool outputs as necessary before sending to llm.
We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
That's why we assume that first message in the conversation is system message and truncate tools for it.
System prompt handling:
1. For new conversations (empty conversation_history):
- Creates a new system prompt using generic_ask_for_issue_conversation.jinja2 template
- Includes investigation analysis, tools (if any), and issue type information
- If there are tools, calculates appropriate tool size and truncates tool outputs
2. For existing conversations:
- Preserves the conversation history
- Updates the first message (system prompt) with recalculated content
- Truncates tool outputs if necessary to fit context window
- Maintains the original conversation flow while ensuring context limits
Example structure of conversation history:
conversation_history = [
# System prompt
Expand Down Expand Up @@ -313,6 +329,53 @@ def build_chat_messages(
ask: str, conversation_history: Optional[List[Dict[str, str]]], ai: ToolCallingLLM,
global_instructions: Optional[Instructions] = None
) -> List[dict]:
"""
This function generates a list of messages for general chat conversation and ensures that the message sequence adheres to the model's context window limitations
by truncating tool outputs as necessary before sending to llm.
We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
That's why we assume that first message in the conversation is system message and truncate tools for it.
System prompt handling:
1. For new conversations (empty conversation_history):
- Creates a new system prompt using generic_ask_conversation.jinja2 template
- Uses an empty template context (no specific analysis or tools required)
- Adds global instructions to the user prompt if provided
2. For existing conversations:
- Preserves the conversation history as is
- No need to update system prompt as it doesn't contain tool-specific content
- Only truncates tool messages if they exist in the conversation
- Maintains the original conversation flow while ensuring context limits
Example structure of conversation history:
conversation_history = [
# System prompt for general chat
{"role": "system", "content": "...."},
# User message with a general question
{"role": "user", "content": "Can you analyze the logs from my application?"},
# Assistant initiates a tool call
{
"role": "assistant",
"content": None,
"tool_call": {
"name": "fetch_application_logs",
"arguments": "{\"service\": \"backend\", \"time_range\": \"last_hour\"}"
}
},
# Tool/Function response
{
"role": "tool",
"name": "fetch_application_logs",
"content": "{\"log_entries\": [\"Error in processing request\", \"Connection timeout\"]}"
},
# Assistant's final response to the user
{
"role": "assistant",
"content": "I've analyzed your application logs and found some issues: there are error messages related to request processing and connection timeouts."
},
]
"""
template_path = "builtin://generic_ask_conversation.jinja2"

if not conversation_history or len(conversation_history) == 0:
Expand Down Expand Up @@ -354,3 +417,192 @@ def build_chat_messages(
)
truncate_tool_messages(conversation_history, tool_size)
return conversation_history


def build_workload_health_chat_messages(workload_health_chat_request: WorkloadHealthChatRequest,
ai: ToolCallingLLM,
global_instructions: Optional[Instructions] = None
):
"""
This function generates a list of messages for workload health conversation and ensures that the message sequence adheres to the model's context window limitations
by truncating tool outputs as necessary before sending to llm.
We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us.
That's why we assume that first message in the conversation is system message and truncate tools for it.
System prompt handling:
1. For new conversations (empty conversation_history):
- Creates a new system prompt using kubernetes_workload_chat.jinja2 template
- Includes workload analysis, tools (if any), and resource information
- If there are tools, calculates appropriate tool size and truncates tool outputs
2. For existing conversations:
- Preserves the conversation history
- Updates the first message (system prompt) with recalculated content
- Truncates tool outputs if necessary to fit context window
- Maintains the original conversation flow while ensuring context limits
Example structure of conversation history:
conversation_history = [
# System prompt with workload analysis
{"role": "system", "content": "...."},
# User message asking about workload health
{"role": "user", "content": "What's the current health status of my deployment?"},
# Assistant initiates a tool call
{
"role": "assistant",
"content": None,
"tool_call": {
"name": "check_workload_metrics",
"arguments": "{\"namespace\": \"default\", \"workload\": \"my-deployment\"}"
}
},
# Tool/Function response
{
"role": "tool",
"name": "check_workload_metrics",
"content": "{\"cpu_usage\": \"45%\", \"memory_usage\": \"60%\", \"status\": \"Running\"}"
},
# Assistant's final response to the user
{
"role": "assistant",
"content": "Your deployment is running normally with CPU usage at 45% and memory usage at 60%."
},
]
"""

template_path = "builtin://kubernetes_workload_chat.jinja2"

conversation_history = workload_health_chat_request.conversation_history
user_prompt = workload_health_chat_request.ask
workload_analysis = workload_health_chat_request.workload_health_result.analysis
tools_for_workload = workload_health_chat_request.workload_health_result.tools
resource = workload_health_chat_request.resource

if not conversation_history or len(conversation_history) == 0:
user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions)

number_of_tools_for_workload = len(tools_for_workload)
if number_of_tools_for_workload == 0:
system_prompt = load_and_render_prompt(
template_path,
{
"workload_analysis": workload_analysis,
"tools_called_for_workload": tools_for_workload,
"resource": resource,
},
)
messages = [
{
"role": "system",
"content": system_prompt,
},
{
"role": "user",
"content": user_prompt,
},
]
return messages

template_context_without_tools = {
"workload_analysis": workload_analysis,
"tools_called_for_workload": None,
"resource": resource,
}
system_prompt_without_tools = load_and_render_prompt(
template_path, template_context_without_tools
)
messages_without_tools = [
{
"role": "system",
"content": system_prompt_without_tools,
},
{
"role": "user",
"content": user_prompt,
},
]
tool_size = calculate_tool_size(
ai, messages_without_tools, number_of_tools_for_workload
)

truncated_workload_result_tool_calls = [
ToolCallConversationResult(
name=tool.name,
description=tool.description,
output=tool.output[:tool_size],
)
for tool in tools_for_workload
]

truncated_template_context = {
"workload_analysis": workload_analysis,
"tools_called_for_workload": truncated_workload_result_tool_calls,
"resource": resource,
}
system_prompt_with_truncated_tools = load_and_render_prompt(
template_path, truncated_template_context
)
return [
{
"role": "system",
"content": system_prompt_with_truncated_tools,
},
{
"role": "user",
"content": user_prompt,
},
]

user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions)

conversation_history.append(
{
"role": "user",
"content": user_prompt,
}
)
number_of_tools = len(tools_for_workload) + len(
[message for message in conversation_history if message.get("role") == "tool"]
)

if number_of_tools == 0:
return conversation_history

conversation_history_without_tools = [
message for message in conversation_history if message.get("role") != "tool"
]
template_context_without_tools = {
"workload_analysis": workload_analysis,
"tools_called_for_workload": None,
"resource": resource,
}
system_prompt_without_tools = load_and_render_prompt(
template_path, template_context_without_tools
)
conversation_history_without_tools[0]["content"] = system_prompt_without_tools

tool_size = calculate_tool_size(
ai, conversation_history_without_tools, number_of_tools
)

truncated_workload_result_tool_calls = [
ToolCallConversationResult(
name=tool.name, description=tool.description, output=tool.output[:tool_size]
)
for tool in tools_for_workload
]

template_context = {
"workload_analysis": workload_analysis,
"tools_called_for_workload": truncated_workload_result_tool_calls,
"resource": resource,
}
system_prompt_with_truncated_tools = load_and_render_prompt(
template_path, template_context
)
conversation_history[0]["content"] = system_prompt_with_truncated_tools

truncate_tool_messages(conversation_history, tool_size)

return conversation_history
11 changes: 11 additions & 0 deletions holmes/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,14 @@ class ChatResponse(BaseModel):
analysis: str
conversation_history: list[dict]
tool_calls: Optional[List[ToolCallResult]] = []


class WorkloadHealthInvestigationResult(BaseModel):
analysis: Optional[str] = None
tools: Optional[List[ToolCallConversationResult]] = []


class WorkloadHealthChatRequest(ChatRequestBaseModel):
ask: str
workload_health_result: WorkloadHealthInvestigationResult
resource: dict
38 changes: 38 additions & 0 deletions holmes/plugins/prompts/kubernetes_workload_chat.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
You are a tool-calling AI assist provided with common DevOps and IT tools that you can use to troubleshoot problems or answer questions.
Whenever possible, you MUST first use tools to investigate, then answer the question.
Do not say 'based on the tool output' or explicitly refer to tools at all.
If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.

### Context Awareness:
Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{resource}}.
However, not all questions may be directly related to that investigation.
Use results of the investigation and conversation history to maintain continuity when appropriate, ensuring efficiency in your responses.

#### Results of Workload Health Check Analysis:
{{workload_analysis}}

{% if tools_called_for_workload %}
Tools used for the workload analysis:
{% for tool in tools_called_for_workload %}
{{ tool }}
{% endfor %}
{% endif %}


{% include '_global_instructions.jinja2' %}
{% include '_general_instructions.jinja2' %}

Style guide:
* Reply with terse output.
* Be painfully concise.
* Leave out "the" and filler words when possible.
* Be terse but not at the expense of leaving out important data like the root cause and how to fix.

Examples:

User: Why did the workload-example app crash?
(Call tool kubectl_find_resource kind=pod keyword=workload`)
(Call tool kubectl_previous_logs namespace=demos pod=workload-example-1299492-d9g9d # this pod name was found from the previous tool call)

AI: `workload-example-1299492-d9g9d` crashed due to email validation error during HTTP request for /api/create_user
Relevant logs:
22 changes: 22 additions & 0 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
build_chat_messages,
build_issue_chat_messages,
handle_issue_conversation,
build_workload_health_chat_messages,
)
from holmes.core.issue import Issue
from holmes.core.models import (
Expand All @@ -45,6 +46,7 @@
ChatRequest,
ChatResponse,
IssueChatRequest,
WorkloadHealthChatRequest,
)
from holmes.plugins.prompts import load_and_render_prompt
from holmes.utils.holmes_sync_toolsets import holmes_sync_toolsets_status
Expand Down Expand Up @@ -106,6 +108,7 @@ async def log_requests(request: Request, call_next):
status_code = response.status_code
logging.info(f"Request completed {request.method} {request.url.path} status={status_code} latency={process_time}ms")


@app.post("/api/investigate")
def investigate_issues(investigate_request: InvestigateRequest):
try:
Expand Down Expand Up @@ -164,6 +167,25 @@ def workload_health_check(request: WorkloadHealthRequest):
raise HTTPException(status_code=401, detail=e.message)


@app.post("/api/workload_health_chat")
def workload_health_conversation(workload_health_chat_request: WorkloadHealthChatRequest):
try:
load_robusta_api_key(dal=dal, config=config)
ai = config.create_toolcalling_llm(dal=dal)
global_instructions = dal.get_global_instructions_for_account()

messages = build_workload_health_chat_messages(workload_health_chat_request, ai, global_instructions)
llm_call = ai.messages_call(messages=messages)

return ChatResponse(
analysis=llm_call.result,
tool_calls=llm_call.tool_calls,
conversation_history=llm_call.messages,
)
except AuthenticationError as e:
raise HTTPException(status_code=401, detail=e.message)


# older api that does not support conversation history
@app.post("/api/conversation")
def issue_conversation_deprecated(conversation_request: ConversationRequest):
Expand Down

0 comments on commit 13feef1

Please sign in to comment.