diff --git a/.gitattributes b/.gitattributes index 3d3699c..c0e9441 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -holmes/.git_archival.json export-subst \ No newline at end of file +holmes/.git_archival.json export-subst diff --git a/.github/workflows/build-binaries-and-brew.yaml b/.github/workflows/build-binaries-and-brew.yaml index 653f581..f238f2e 100644 --- a/.github/workflows/build-binaries-and-brew.yaml +++ b/.github/workflows/build-binaries-and-brew.yaml @@ -22,7 +22,7 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.11' - + - name: Install dependencies if: matrix.os != 'windows-latest' run: | @@ -43,7 +43,7 @@ jobs: if: matrix.os == 'ubuntu-20.04' run: | sudo apt-get install -y binutils - + - name: Update package version (Linux) if: matrix.os == 'ubuntu-20.04' run: sed -i 's/__version__ = .*/__version__ = "${{ github.ref_name }}"/g' holmes/__init__.py @@ -67,7 +67,7 @@ jobs: # regarding the tiktoken part of the command, see https://github.com/openai/tiktoken/issues/80 # regarding the litellm part of the command, see https://github.com/pyinstaller/pyinstaller/issues/8620#issuecomment-2186540504 run: | - pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --hiddenimport litellm.litellm_core_utils.tokenizers --collect-data litellm + pyinstaller holmes.py --add-data 'holmes/plugins/runbooks/*:holmes/plugins/runbooks' --add-data 'holmes/plugins/prompts/*:holmes/plugins/prompts' --add-data 'holmes/plugins/toolsets/*:holmes/plugins/toolsets' --hidden-import=tiktoken_ext.openai_public --hidden-import=tiktoken_ext --hiddenimport litellm.llms.tokenizers --hiddenimport litellm.litellm_core_utils.tokenizers --collect-data litellm ls dist - name: Zip the application (Unix) @@ -91,7 +91,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: - upload_url: ${{ github.event.release.upload_url }} + upload_url: ${{ github.event.release.upload_url }} asset_path: ./holmes-${{ matrix.os }}-${{ github.ref_name }}.zip asset_name: holmes-${{ matrix.os }}-${{ github.ref_name }}.zip asset_content_type: application/octet-stream @@ -105,7 +105,7 @@ jobs: check-latest: needs: build runs-on: ubuntu-20.04 - outputs: + outputs: IS_LATEST: ${{ steps.check-latest.outputs.release == github.ref_name }} steps: - id: check-latest diff --git a/.github/workflows/build-docker-images.yaml b/.github/workflows/build-docker-images.yaml index ba85866..0bfcbf2 100644 --- a/.github/workflows/build-docker-images.yaml +++ b/.github/workflows/build-docker-images.yaml @@ -77,7 +77,7 @@ jobs: # Note: this ignores the "Set as latest release" checkbox in the GitHub UI # it isn't possible to check whether that was set or not # so if you do not want to override the "latest" tag, you should mark the release as a prerelease or a draft - # for prereleases and drafts we don't tag latest + # for prereleases and drafts we don't tag latest - name: Tag and push Docker image as latest if applicable if: ${{ github.event.release.prerelease == false && github.event.release.draft == false }} run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2eaf552..68b44a6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,4 +6,16 @@ repos: - id: poetry-lock pass_filenames: false args: - - --no-update \ No newline at end of file + - --no-update + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.7.2 + hooks: + - id: ruff + entry: ruff check --fix + - id: ruff-format + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: detect-private-key + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6d95164..2b36acb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ Please make sure to read and observe our [Code of Conduct](https://github.com/ro ## Reporting bugs -We encourage those interested to contribute code and also appreciate when issues are reported. +We encourage those interested to contribute code and also appreciate when issues are reported. - Create a new issue and label is as `bug` - Clearly state how to reproduce the bug: @@ -23,7 +23,7 @@ We encourage those interested to contribute code and also appreciate when issues - Which steps are required to reproduce - As LLMs answers may differ between runs - Does it always reproduce, or occasionally? - + ## Contributing Code - Fork the repository and clone it locally. diff --git a/Dockerfile b/Dockerfile index f33f84b..ff138d5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,22 +26,34 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH" RUN curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key -o Release.key # Set the architecture-specific kube lineage URLs -ARG ARM_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.1/kube-lineage-macos-latest-v2.2.1 -ARG AMD_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.1/kube-lineage-ubuntu-latest-v2.2.1 +ARG KUBE_LINEAGE_ARM_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.1/kube-lineage-macos-latest-v2.2.1 +ARG KUBE_LINEAGE_AMD_URL=https://github.com/Avi-Robusta/kube-lineage/releases/download/v2.2.1/kube-lineage-ubuntu-latest-v2.2.1 # Define a build argument to identify the platform ARG TARGETPLATFORM # Conditional download based on the platform RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - curl -L -o kube-lineage $ARM_URL; \ + curl -L -o kube-lineage $KUBE_LINEAGE_ARM_URL; \ elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ - curl -L -o kube-lineage $AMD_URL; \ + curl -L -o kube-lineage $KUBE_LINEAGE_AMD_URL; \ else \ echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \ fi RUN chmod 777 kube-lineage RUN ./kube-lineage --version -RUN curl -sSL -o argocd-linux-amd64 https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-amd64 +# Set the architecture-specific argocd URLs +ARG ARGOCD_ARM_URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-arm64 +ARG ARGOCD_AMD_URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-amd64 +# Conditional download based on the platform +RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + curl -L -o argocd $ARGOCD_ARM_URL; \ + elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + curl -L -o argocd $ARGOCD_AMD_URL; \ + else \ + echo "Unsupported platform: $TARGETPLATFORM"; exit 1; \ + fi +RUN chmod 777 argocd +RUN ./argocd --help # Install Helm RUN curl https://baltocdn.com/helm/signing.asc | gpg --dearmor -o /usr/share/keyrings/helm.gpg \ @@ -100,8 +112,7 @@ COPY --from=builder /app/kube-lineage /usr/local/bin RUN kube-lineage --version # Set up ArgoCD -COPY --from=builder /app/argocd-linux-amd64 /usr/local/bin/argocd -RUN chmod 555 /usr/local/bin/argocd +COPY --from=builder /app/argocd /usr/local/bin/argocd RUN argocd --help # Set up Helm diff --git a/Dockerfile.dev b/Dockerfile.dev index 438e275..be62433 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -59,7 +59,7 @@ ARG PRIVATE_PACKAGE_REGISTRY="none" RUN if [ "${PRIVATE_PACKAGE_REGISTRY}" != "none" ]; then \ pip config set global.index-url "${PRIVATE_PACKAGE_REGISTRY}"; \ fi \ - && pip install poetry + && pip install poetry ARG POETRY_REQUESTS_TIMEOUT RUN poetry config virtualenvs.create false COPY pyproject.toml poetry.lock /app/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3525541 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ + + +check: + poetry run pre-commit run -a diff --git a/README.md b/README.md index 0f3618a..edb1cde 100644 --- a/README.md +++ b/README.md @@ -631,31 +631,31 @@ Using Grafana Loki HolmesGPT can consult logs from [Loki](https://grafana.com/oss/loki/) by proxying through a [Grafana](https://grafana.com/oss/grafana/) instance. -There are 2 parts to configuring access to Grafana Loki: Access/Authentication and search terms. +To configure loki toolset: -For access and authentication, add the following environment variables: - -* `GRAFANA_URL` - e.g. https://my-org.grafana.net -* `GRAFANA_API_KEY` - e.g. glsa_bsm6ZS_sdfs25f +```yaml +toolsets: + grafana/loki: + enabled: true + config: + api_key: "{{ env.GRAFANA_API_KEY }}" + url: "http://loki-url" +``` For search terms, you can optionally tweak the search terms used by the toolset. -This is done by appending the following to your Holmes configuration file: +This is done by appending the following to your Holmes grafana/loki configuration: ```yaml -grafana: - url: https://my-org.grafana.net # - api_key: glsa_bsm6ZS_sdfs25f - loki: - pod_name_search_key: "pod" - namespace_search_key: "namespace" - node_name_search_key: "node" +pod_name_search_key: "pod" +namespace_search_key: "namespace" +node_name_search_key: "node" ``` > You only need to tweak the configuration file if your Loki logs settings for pod, namespace and node differ from the above defaults. -The Loki toolset is configured the using the same Grafana settings as the Grafana Tempo toolset. +
Using Grafana Tempo @@ -664,8 +664,6 @@ HolmesGPT can fetch trace information from Grafana Tempo to debug performance re Tempo is configured the using the same Grafana settings as the Grafana Loki toolset. -grafana: - url: https://my-org.grafana.net #
@@ -875,9 +873,9 @@ Configure Slack to send notifications to specific channels. Provide your Slack t OpenSearch Integration The OpenSearch toolset (`opensearch`) allows Holmes to consult an opensearch cluster for its health, settings and shards information. -The toolset supports multiple opensearch or elasticsearch clusters that are configured by editing Holmes' configuration file (or in cluster to the configuration secret): +The toolset supports multiple opensearch or elasticsearch clusters that are configured by editing Holmes' configuration file: -``` +``` opensearch_clusters: - hosts: - https://my_elasticsearch.us-central1.gcp.cloud.es.io:443 diff --git a/examples/custom_llm.py b/examples/custom_llm.py index f3ec0c0..61d04fd 100644 --- a/examples/custom_llm.py +++ b/examples/custom_llm.py @@ -1,17 +1,14 @@ - from typing import Any, Dict, List, Optional, Type, Union -from holmes.config import Config from holmes.core.llm import LLM from litellm.types.utils import ModelResponse from holmes.core.tool_calling_llm import ToolCallingLLM from holmes.core.tools import Tool, ToolExecutor from holmes.plugins.toolsets import load_builtin_toolsets -from rich.console import Console from pydantic import BaseModel from holmes.plugins.prompts import load_and_render_prompt -import sys -class MyCustomLLM(LLM): + +class MyCustomLLM(LLM): def get_context_window_size(self) -> int: return 128000 @@ -21,36 +18,41 @@ def get_maximum_output_token(self) -> int: def count_tokens_for_message(self, messages: list[dict]) -> int: return 1 - def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] = [], tool_choice: Optional[Union[str, dict]] = None, response_format: Optional[Union[dict, Type[BaseModel]]] = None, temperature:Optional[float] = None, drop_params: Optional[bool] = None) -> ModelResponse: - return ModelResponse(choices=[{ - "finish_reason": "stop", - "index": 0, - "message": { - "role": "assistant", - "content": "There are no issues with your cluster" - } - }], - usage={ - "prompt_tokens": 0, # Integer - "completion_tokens": 0, - "total_tokens": 0 - } - ) + def completion( + self, + messages: List[Dict[str, Any]], + tools: Optional[List[Tool]] = [], + tool_choice: Optional[Union[str, dict]] = None, + response_format: Optional[Union[dict, Type[BaseModel]]] = None, + temperature: Optional[float] = None, + drop_params: Optional[bool] = None, + ) -> ModelResponse: + return ModelResponse( + choices=[ + { + "finish_reason": "stop", + "index": 0, + "message": { + "role": "assistant", + "content": "There are no issues with your cluster", + }, + } + ], + usage={ + "prompt_tokens": 0, # Integer + "completion_tokens": 0, + "total_tokens": 0, + }, + ) def ask_holmes(): - console = Console() - prompt = "what issues do I have in my cluster" system_prompt = load_and_render_prompt("builtin://generic_ask.jinja2") tool_executor = ToolExecutor(load_builtin_toolsets()) - ai = ToolCallingLLM( - tool_executor, - max_steps=10, - llm=MyCustomLLM() - ) + ai = ToolCallingLLM(tool_executor, max_steps=10, llm=MyCustomLLM()) response = ai.prompt_call(system_prompt, prompt) diff --git a/examples/custom_runbooks.yaml b/examples/custom_runbooks.yaml index b5d76e6..57e2e39 100644 --- a/examples/custom_runbooks.yaml +++ b/examples/custom_runbooks.yaml @@ -4,4 +4,4 @@ runbooks: instructions: > Analyze pod logs for errors and also read the monogodb logs Correlate between the two logs and try to find the root cause of the issue. - Based on the logs, report the session ids of impacted transactions \ No newline at end of file + Based on the logs, report the session ids of impacted transactions diff --git a/examples/custom_toolset.yaml b/examples/custom_toolset.yaml index 1f21411..a5b516f 100644 --- a/examples/custom_toolset.yaml +++ b/examples/custom_toolset.yaml @@ -11,7 +11,7 @@ toolsets: docs_url: "https://kubernetes.io/docs/home/" # Icon URL. Used for display in the UI icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s" - # Tags for categorizing toolsets, 'core' will be used for all Holmes features (both cli's commands and chats in UI). + # Tags for categorizing toolsets, 'core' will be used for all Holmes features (both cli's commands and chats in UI). # The 'cluster' tag is used for UI functionality, while 'cli' is for for command-line specific tools tags: - core @@ -24,7 +24,7 @@ toolsets: - name: "switch_cluster" # The LLM looks at this description when deciding what tools are relevant for each task description: "Used to switch between multiple kubernetes contexts(clusters)" - + # A templated bash command using Jinja2 templates # The LLM can only control parameters that you expose as template variables like {{ this_variable }} command: "kubectl config use-context {{ cluster_name }}" diff --git a/helm/holmes/Chart.yaml b/helm/holmes/Chart.yaml index 96bdcef..cb3e8e1 100644 --- a/helm/holmes/Chart.yaml +++ b/helm/holmes/Chart.yaml @@ -7,4 +7,4 @@ type: application # we use 0.0.1 as a placeholder for the version` because Helm wont allow `0.0.0` and we want to be able to run # `helm install` on development checkouts without updating this file. the version doesn't matter in that case anyway version: 0.0.1 -appVersion: 0.0.0 \ No newline at end of file +appVersion: 0.0.0 diff --git a/helm/holmes/templates/holmesgpt-service-account.yaml b/helm/holmes/templates/holmesgpt-service-account.yaml index baee4e9..64c3f55 100644 --- a/helm/holmes/templates/holmesgpt-service-account.yaml +++ b/helm/holmes/templates/holmesgpt-service-account.yaml @@ -229,4 +229,4 @@ subjects: - kind: ServiceAccount name: {{ .Release.Name }}-holmes-service-account namespace: {{ .Release.Namespace }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/holmes/.git_archival.json b/holmes/.git_archival.json index 8da9c6a..30e70ca 100644 --- a/holmes/.git_archival.json +++ b/holmes/.git_archival.json @@ -5,4 +5,3 @@ "refs": "$Format:%D$", "describe": "$Format:%(describe:tags=true,match=v[0-9]*)$" } - diff --git a/holmes/__init__.py b/holmes/__init__.py index 7ce3586..dc81558 100644 --- a/holmes/__init__.py +++ b/holmes/__init__.py @@ -4,7 +4,7 @@ import sys # For relative imports to work in Python 3.6 - see https://stackoverflow.com/a/49375740 -this_path = os.path.dirname(os.path.realpath(__file__)) +this_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(this_path) # This is patched by github actions during release @@ -19,28 +19,50 @@ def get_version() -> str: # we are running from an unreleased dev version try: # Get the latest git tag - tag = subprocess.check_output(["git", "describe", "--tags"], stderr=subprocess.STDOUT, cwd=this_path).decode().strip() + tag = ( + subprocess.check_output( + ["git", "describe", "--tags"], stderr=subprocess.STDOUT, cwd=this_path + ) + .decode() + .strip() + ) # Get the current branch name - branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.STDOUT, cwd=this_path).decode().strip() + branch = ( + subprocess.check_output( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + stderr=subprocess.STDOUT, + cwd=this_path, + ) + .decode() + .strip() + ) # Check if there are uncommitted changes - status = subprocess.check_output(["git", "status", "--porcelain"], stderr=subprocess.STDOUT, cwd=this_path).decode().strip() + status = ( + subprocess.check_output( + ["git", "status", "--porcelain"], + stderr=subprocess.STDOUT, + cwd=this_path, + ) + .decode() + .strip() + ) dirty = "-dirty" if status else "" return f"{tag}-{branch}{dirty}" - + except Exception: pass # we are running without git history, but we still might have git archival data (e.g. if we were pip installed) - archival_file_path = os.path.join(this_path, '.git_archival.json') + archival_file_path = os.path.join(this_path, ".git_archival.json") if os.path.exists(archival_file_path): try: - with open(archival_file_path, 'r') as f: + with open(archival_file_path, "r") as f: archival_data = json.load(f) return f"{archival_data['refs']}-{archival_data['hash-short']}" except Exception: pass - return f"dev-version" + return "dev-version" diff --git a/holmes/common/env_vars.py b/holmes/common/env_vars.py index 4414b79..a8e00d6 100644 --- a/holmes/common/env_vars.py +++ b/holmes/common/env_vars.py @@ -7,10 +7,14 @@ def load_bool(env_var, default: bool): return json.loads(s.lower()) -ENABLED_BY_DEFAULT_TOOLSETS = os.environ.get('ENABLED_BY_DEFAULT_TOOLSETS', 'kubernetes/core,kubernetes/logs,robusta,internet') -HOLMES_HOST = os.environ.get('HOLMES_HOST', '0.0.0.0') -HOLMES_PORT = int(os.environ.get('HOLMES_PORT', 5050)) -ROBUSTA_CONFIG_PATH = os.environ.get('ROBUSTA_CONFIG_PATH', "/etc/robusta/config/active_playbooks.yaml") +ENABLED_BY_DEFAULT_TOOLSETS = os.environ.get( + "ENABLED_BY_DEFAULT_TOOLSETS", "kubernetes/core,kubernetes/logs,robusta,internet" +) +HOLMES_HOST = os.environ.get("HOLMES_HOST", "0.0.0.0") +HOLMES_PORT = int(os.environ.get("HOLMES_PORT", 5050)) +ROBUSTA_CONFIG_PATH = os.environ.get( + "ROBUSTA_CONFIG_PATH", "/etc/robusta/config/active_playbooks.yaml" +) ROBUSTA_ACCOUNT_ID = os.environ.get("ROBUSTA_ACCOUNT_ID", "") STORE_URL = os.environ.get("STORE_URL", "") diff --git a/holmes/config.py b/holmes/config.py index ca6edd2..3fbc444 100644 --- a/holmes/config.py +++ b/holmes/config.py @@ -1,4 +1,3 @@ -from functools import lru_cache import logging import os import yaml @@ -6,10 +5,9 @@ from holmes.core.llm import LLM, DefaultLLM from typing import Any, Dict, List, Optional -from typing import List, Optional -from pydantic import FilePath, SecretStr, Field +from pydantic import FilePath, SecretStr from pydash.arrays import concat @@ -496,7 +494,7 @@ def merge_and_override_bultin_toolsets_with_toolsets_config( @classmethod def load_from_file(cls, config_file: Optional[str], **kwargs) -> "Config": if config_file is not None: - logging.debug(f"Loading config from file %s", config_file) + logging.debug("Loading config from file %s", config_file) config_from_file = load_model_from_file(cls, config_file) elif os.path.exists(DEFAULT_CONFIG_LOCATION): logging.debug( diff --git a/holmes/core/conversations.py b/holmes/core/conversations.py index 2481b4c..6ba5bb5 100644 --- a/holmes/core/conversations.py +++ b/holmes/core/conversations.py @@ -6,6 +6,7 @@ ConversationInvestigationResult, ToolCallConversationResult, IssueChatRequest, + WorkloadHealthChatRequest, ) from holmes.plugins.prompts import load_and_render_prompt from holmes.core.tool_calling_llm import ToolCallingLLM @@ -14,6 +15,7 @@ DEFAULT_TOOL_SIZE = 10000 + def calculate_tool_size( ai: ToolCallingLLM, messages_without_tools: list[dict], number_of_tools: int ) -> int: @@ -136,14 +138,30 @@ def handle_issue_conversation( return system_prompt -def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCallingLLM, - global_instructions: Optional[Instructions] = None): +def build_issue_chat_messages( + issue_chat_request: IssueChatRequest, + ai: ToolCallingLLM, + global_instructions: Optional[Instructions] = None, +): """ This function generates a list of messages for issue conversation and ensures that the message sequence adheres to the model's context window limitations by truncating tool outputs as necessary before sending to llm. We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us. That's why we assume that first message in the conversation is system message and truncate tools for it. + + System prompt handling: + 1. For new conversations (empty conversation_history): + - Creates a new system prompt using generic_ask_for_issue_conversation.jinja2 template + - Includes investigation analysis, tools (if any), and issue type information + - If there are tools, calculates appropriate tool size and truncates tool outputs + + 2. For existing conversations: + - Preserves the conversation history + - Updates the first message (system prompt) with recalculated content + - Truncates tool outputs if necessary to fit context window + - Maintains the original conversation flow while ensuring context limits + Example structure of conversation history: conversation_history = [ # System prompt @@ -180,11 +198,12 @@ def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCall tools_for_investigation = issue_chat_request.investigation_result.tools if not conversation_history or len(conversation_history) == 0: - user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions) + user_prompt = add_global_instructions_to_user_prompt( + user_prompt, global_instructions + ) number_of_tools_for_investigation = len(tools_for_investigation) if number_of_tools_for_investigation == 0: - system_prompt = load_and_render_prompt( template_path, { @@ -255,7 +274,9 @@ def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCall }, ] - user_prompt = add_global_instructions_to_user_prompt(user_prompt, global_instructions) + user_prompt = add_global_instructions_to_user_prompt( + user_prompt, global_instructions + ) conversation_history.append( { @@ -310,9 +331,58 @@ def build_issue_chat_messages(issue_chat_request: IssueChatRequest, ai: ToolCall def build_chat_messages( - ask: str, conversation_history: Optional[List[Dict[str, str]]], ai: ToolCallingLLM, - global_instructions: Optional[Instructions] = None + ask: str, + conversation_history: Optional[List[Dict[str, str]]], + ai: ToolCallingLLM, + global_instructions: Optional[Instructions] = None, ) -> List[dict]: + """ + This function generates a list of messages for general chat conversation and ensures that the message sequence adheres to the model's context window limitations + by truncating tool outputs as necessary before sending to llm. + + We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us. + That's why we assume that first message in the conversation is system message and truncate tools for it. + + System prompt handling: + 1. For new conversations (empty conversation_history): + - Creates a new system prompt using generic_ask_conversation.jinja2 template + - Uses an empty template context (no specific analysis or tools required) + - Adds global instructions to the user prompt if provided + + 2. For existing conversations: + - Preserves the conversation history as is + - No need to update system prompt as it doesn't contain tool-specific content + - Only truncates tool messages if they exist in the conversation + - Maintains the original conversation flow while ensuring context limits + + Example structure of conversation history: + conversation_history = [ + # System prompt for general chat + {"role": "system", "content": "...."}, + # User message with a general question + {"role": "user", "content": "Can you analyze the logs from my application?"}, + # Assistant initiates a tool call + { + "role": "assistant", + "content": None, + "tool_call": { + "name": "fetch_application_logs", + "arguments": "{\"service\": \"backend\", \"time_range\": \"last_hour\"}" + } + }, + # Tool/Function response + { + "role": "tool", + "name": "fetch_application_logs", + "content": "{\"log_entries\": [\"Error in processing request\", \"Connection timeout\"]}" + }, + # Assistant's final response to the user + { + "role": "assistant", + "content": "I've analyzed your application logs and found some issues: there are error messages related to request processing and connection timeouts." + }, + ] + """ template_path = "builtin://generic_ask_conversation.jinja2" if not conversation_history or len(conversation_history) == 0: @@ -330,9 +400,9 @@ def build_chat_messages( }, ] return messages - + ask = add_global_instructions_to_user_prompt(ask, global_instructions) - + conversation_history.append( { "role": "user", @@ -354,3 +424,197 @@ def build_chat_messages( ) truncate_tool_messages(conversation_history, tool_size) return conversation_history + + +def build_workload_health_chat_messages( + workload_health_chat_request: WorkloadHealthChatRequest, + ai: ToolCallingLLM, + global_instructions: Optional[Instructions] = None, +): + """ + This function generates a list of messages for workload health conversation and ensures that the message sequence adheres to the model's context window limitations + by truncating tool outputs as necessary before sending to llm. + + We always expect conversation_history to be passed in the openAI format which is supported by litellm and passed back by us. + That's why we assume that first message in the conversation is system message and truncate tools for it. + + System prompt handling: + 1. For new conversations (empty conversation_history): + - Creates a new system prompt using kubernetes_workload_chat.jinja2 template + - Includes workload analysis, tools (if any), and resource information + - If there are tools, calculates appropriate tool size and truncates tool outputs + + 2. For existing conversations: + - Preserves the conversation history + - Updates the first message (system prompt) with recalculated content + - Truncates tool outputs if necessary to fit context window + - Maintains the original conversation flow while ensuring context limits + + Example structure of conversation history: + conversation_history = [ + # System prompt with workload analysis + {"role": "system", "content": "...."}, + # User message asking about workload health + {"role": "user", "content": "What's the current health status of my deployment?"}, + # Assistant initiates a tool call + { + "role": "assistant", + "content": None, + "tool_call": { + "name": "check_workload_metrics", + "arguments": "{\"namespace\": \"default\", \"workload\": \"my-deployment\"}" + } + }, + # Tool/Function response + { + "role": "tool", + "name": "check_workload_metrics", + "content": "{\"cpu_usage\": \"45%\", \"memory_usage\": \"60%\", \"status\": \"Running\"}" + }, + # Assistant's final response to the user + { + "role": "assistant", + "content": "Your deployment is running normally with CPU usage at 45% and memory usage at 60%." + }, + ] + """ + + template_path = "builtin://kubernetes_workload_chat.jinja2" + + conversation_history = workload_health_chat_request.conversation_history + user_prompt = workload_health_chat_request.ask + workload_analysis = workload_health_chat_request.workload_health_result.analysis + tools_for_workload = workload_health_chat_request.workload_health_result.tools + resource = workload_health_chat_request.resource + + if not conversation_history or len(conversation_history) == 0: + user_prompt = add_global_instructions_to_user_prompt( + user_prompt, global_instructions + ) + + number_of_tools_for_workload = len(tools_for_workload) + if number_of_tools_for_workload == 0: + system_prompt = load_and_render_prompt( + template_path, + { + "workload_analysis": workload_analysis, + "tools_called_for_workload": tools_for_workload, + "resource": resource, + }, + ) + messages = [ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": user_prompt, + }, + ] + return messages + + template_context_without_tools = { + "workload_analysis": workload_analysis, + "tools_called_for_workload": None, + "resource": resource, + } + system_prompt_without_tools = load_and_render_prompt( + template_path, template_context_without_tools + ) + messages_without_tools = [ + { + "role": "system", + "content": system_prompt_without_tools, + }, + { + "role": "user", + "content": user_prompt, + }, + ] + tool_size = calculate_tool_size( + ai, messages_without_tools, number_of_tools_for_workload + ) + + truncated_workload_result_tool_calls = [ + ToolCallConversationResult( + name=tool.name, + description=tool.description, + output=tool.output[:tool_size], + ) + for tool in tools_for_workload + ] + + truncated_template_context = { + "workload_analysis": workload_analysis, + "tools_called_for_workload": truncated_workload_result_tool_calls, + "resource": resource, + } + system_prompt_with_truncated_tools = load_and_render_prompt( + template_path, truncated_template_context + ) + return [ + { + "role": "system", + "content": system_prompt_with_truncated_tools, + }, + { + "role": "user", + "content": user_prompt, + }, + ] + + user_prompt = add_global_instructions_to_user_prompt( + user_prompt, global_instructions + ) + + conversation_history.append( + { + "role": "user", + "content": user_prompt, + } + ) + number_of_tools = len(tools_for_workload) + len( + [message for message in conversation_history if message.get("role") == "tool"] + ) + + if number_of_tools == 0: + return conversation_history + + conversation_history_without_tools = [ + message for message in conversation_history if message.get("role") != "tool" + ] + template_context_without_tools = { + "workload_analysis": workload_analysis, + "tools_called_for_workload": None, + "resource": resource, + } + system_prompt_without_tools = load_and_render_prompt( + template_path, template_context_without_tools + ) + conversation_history_without_tools[0]["content"] = system_prompt_without_tools + + tool_size = calculate_tool_size( + ai, conversation_history_without_tools, number_of_tools + ) + + truncated_workload_result_tool_calls = [ + ToolCallConversationResult( + name=tool.name, description=tool.description, output=tool.output[:tool_size] + ) + for tool in tools_for_workload + ] + + template_context = { + "workload_analysis": workload_analysis, + "tools_called_for_workload": truncated_workload_result_tool_calls, + "resource": resource, + } + system_prompt_with_truncated_tools = load_and_render_prompt( + template_path, template_context + ) + conversation_history[0]["content"] = system_prompt_with_truncated_tools + + truncate_tool_messages(conversation_history, tool_size) + + return conversation_history diff --git a/holmes/core/investigation.py b/holmes/core/investigation.py index c7d7879..7dcb4aa 100644 --- a/holmes/core/investigation.py +++ b/holmes/core/investigation.py @@ -1,18 +1,17 @@ - -from typing import Optional -from rich.console import Console from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT from holmes.config import Config +from holmes.core.investigation_structured_output import process_response_into_sections from holmes.core.issue import Issue from holmes.core.models import InvestigateRequest, InvestigationResult from holmes.core.supabase_dal import SupabaseDal from holmes.utils.robusta import load_robusta_api_key -def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config): + +def investigate_issues( + investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config +): load_robusta_api_key(dal=dal, config=config) - context = dal.get_issue_data( - investigate_request.context.get("robusta_issue_id") - ) + context = dal.get_issue_data(investigate_request.context.get("robusta_issue_id")) resource_instructions = dal.get_resource_instructions( "alert", investigate_request.context.get("issue_type") @@ -36,13 +35,15 @@ def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal issue, prompt=investigate_request.prompt_template, post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT, - sections=investigate_request.sections, instructions=resource_instructions, - global_instructions=global_instructions + global_instructions=global_instructions, ) + + (text_response, sections) = process_response_into_sections(investigation.result) + return InvestigationResult( - analysis=investigation.result, - sections=investigation.sections, + analysis=text_response, + sections=sections, tool_calls=investigation.tool_calls or [], instructions=investigation.instructions, ) diff --git a/holmes/core/investigation_structured_output.py b/holmes/core/investigation_structured_output.py index 4038443..9c24465 100644 --- a/holmes/core/investigation_structured_output.py +++ b/holmes/core/investigation_structured_output.py @@ -1,24 +1,33 @@ -from typing import Any, Dict +from typing import Any, Dict, Optional, Tuple, Union +import json -DEFAULT_SECTIONS = { - "Alert Explanation": "1-2 sentences explaining the alert itself - note don't say \"The alert indicates a warning event related to a Kubernetes pod doing blah\" rather just say \"The pod XYZ did blah\" because that is what the user actually cares about", - "Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains", - "Next Steps": "what you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)", +from pydantic import RootModel + +InputSectionsDataType = Dict[str, str] + +OutputSectionsDataType = Optional[Dict[str, Union[str, None]]] + +SectionsData = RootModel[OutputSectionsDataType] + +DEFAULT_SECTIONS: InputSectionsDataType = { + "Alert Explanation": '1-2 sentences explaining the alert itself - note don\'t say "The alert indicates a warning event related to a Kubernetes pod doing blah" rather just say "The pod XYZ did blah" because that is what the user actually cares about', + "Investigation": "What you checked and found", + "Conclusions and Possible Root causes": "What conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains. Don't say root cause but 'possible root causes'. Be clear to distinguish between what you know for certain and what is a possible explanation", + "Next Steps": "What you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)", "Related logs": "Truncate and share the most relevant logs, especially if these explain the root cause. For example: \nLogs from pod robusta-holmes:\n```\n```\n. Always embed the surroundding +/- 5 log lines to any relevant logs. ", "App or Infra?": "Explain whether the issue is more likely an infrastructure or an application level issue and why you think that.", - "External links": "Provide links to external sources. Where to look when investigating this issue. For example provide links to relevant runbooks, etc. Add a short sentence describing each link." + "External links": "Provide links to external sources. Where to look when investigating this issue. For example provide links to relevant runbooks, etc. Add a short sentence describing each link.", } -def get_output_format_for_investigation(sections: Dict[str, str]) -> Dict[str, Any]: +def get_output_format_for_investigation( + sections: InputSectionsDataType, +) -> Dict[str, Any]: properties = {} required_fields = [] for title, description in sections.items(): - properties[title] = { - "type": ["string", "null"], - "description": description - } + properties[title] = {"type": ["string", "null"], "description": description} required_fields.append(title) schema = { @@ -26,19 +35,46 @@ def get_output_format_for_investigation(sections: Dict[str, str]) -> Dict[str, A "type": "object", "required": required_fields, "properties": properties, - "additionalProperties": False + "additionalProperties": False, } - output_format = { "type": "json_schema", "json_schema": { "name": "InvestigationResult", "schema": schema, "strict": False} } + output_format = { + "type": "json_schema", + "json_schema": { + "name": "InvestigationResult", + "schema": schema, + "strict": False, + }, + } return output_format -def combine_sections(sections: Any) -> str: - if isinstance(sections, dict): - content = '' - for section_title, section_content in sections.items(): - if section_content: - # content = content + f'\n# {" ".join(section_title.split("_")).title()}\n{section_content}' - content = content + f'\n# {section_title}\n{section_content}\n' - return content - return f"{sections}" + +def combine_sections(sections: Dict) -> str: + content = "" + for section_title, section_content in sections.items(): + if section_content: + content = content + f"\n# {section_title}\n{section_content}\n" + return content + + +def process_response_into_sections(response: Any) -> Tuple[str, OutputSectionsDataType]: + if isinstance(response, dict): + # No matter if the result is already structured, we want to go through the code below to validate the JSON + response = json.dumps(response) + + if not isinstance(response, str): + # if it's not a string, we make it so as it'll be parsed later + response = str(response) + + try: + parsed_json = json.loads(response) + # TODO: force dict values into a string would make this more resilient as SectionsData only accept none/str as values + sections = SectionsData(root=parsed_json).root + if sections: + combined = combine_sections(sections) + return (combined, sections) + except Exception: + pass + + return (response, None) diff --git a/holmes/core/issue.py b/holmes/core/issue.py index f811cd0..71dae44 100644 --- a/holmes/core/issue.py +++ b/holmes/core/issue.py @@ -1,29 +1,28 @@ -from datetime import datetime from strenum import StrEnum from typing import Optional from pydantic import BaseModel, ConfigDict -class IssueStatus (StrEnum): +class IssueStatus(StrEnum): OPEN = "open" CLOSED = "closed" # TODO: look at finding in Robusta class Issue(BaseModel): - model_config = ConfigDict(extra='forbid', validate_default=True) + model_config = ConfigDict(extra="forbid", validate_default=True) # Identifier for the issue - source + issue_id should be unique - id: str + id: str - # Name of the issue - not necessarily unique - name: str + # Name of the issue - not necessarily unique + name: str # Source of the issue - e.g. jira source_type: str - # Identifier for the instance of the source - e.g. Jira project key + # Identifier for the instance of the source - e.g. Jira project key source_instance_id: str # Link to the issue, when available @@ -31,12 +30,12 @@ class Issue(BaseModel): # Raw object from the source - e.g. a dict from the source's API raw: dict = None - + # these fields are all optional and used for visual presentation of the issue # there may not be a 1:1 mapping between source fields and these fields, which is OK # e.g. jira issues can have arbitrary statuses like 'closed' and 'resolved' whereas for presentation sake # we want to classify as open/closed so we can color the issue red/green - # if these fields are not present, an LLM may be used to guess them + # if these fields are not present, an LLM may be used to guess them presentation_status: Optional[IssueStatus] = None # Markdown with key metadata about the issue. Suggested format is several lines each styled as "*X*: Y" and separated by \n @@ -53,4 +52,3 @@ class Issue(BaseModel): # created_at: Optional[datetime] = None # Timestamp of when the issue was created # updated_at: Optional[datetime] = None # Timestamp of when the issue was last updated # metadata: Optional[dict] = None # All additional metadata from the source (can be hierchical - e.g. dicts in dicts - \ No newline at end of file diff --git a/holmes/core/llm.py b/holmes/core/llm.py index a790222..e0f9f4d 100644 --- a/holmes/core/llm.py +++ b/holmes/core/llm.py @@ -1,10 +1,8 @@ - import logging from abc import abstractmethod from typing import Any, Dict, List, Optional, Type, Union from litellm.types.utils import ModelResponse -from pydantic.types import SecretStr from holmes.core.tools import Tool from pydantic import BaseModel @@ -19,11 +17,12 @@ def environ_get_safe_int(env_var, default="0"): except ValueError: return int(default) + OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN") OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE") -class LLM: +class LLM: @abstractmethod def get_context_window_size(self) -> int: pass @@ -37,21 +36,24 @@ def count_tokens_for_message(self, messages: list[dict]) -> int: pass @abstractmethod - def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] = [], tool_choice: Optional[Union[str, dict]] = None, response_format: Optional[Union[dict, Type[BaseModel]]] = None, temperature:Optional[float] = None, drop_params: Optional[bool] = None) -> ModelResponse: + def completion( + self, + messages: List[Dict[str, Any]], + tools: Optional[List[Tool]] = [], + tool_choice: Optional[Union[str, dict]] = None, + response_format: Optional[Union[dict, Type[BaseModel]]] = None, + temperature: Optional[float] = None, + drop_params: Optional[bool] = None, + ) -> ModelResponse: pass class DefaultLLM(LLM): - model: str api_key: Optional[str] base_url: Optional[str] - def __init__( - self, - model: str, - api_key: Optional[str] = None - ): + def __init__(self, model: str, api_key: Optional[str] = None): self.model = model self.api_key = api_key self.base_url = None @@ -61,7 +63,7 @@ def __init__( self.check_llm(self.model, self.api_key) - def check_llm(self, model:str, api_key:Optional[str]): + def check_llm(self, model: str, api_key: Optional[str]): logging.debug(f"Checking LiteLLM model {model}") # TODO: this WAS a hack to get around the fact that we can't pass in an api key to litellm.validate_environment # so without this hack it always complains that the environment variable for the api key is missing @@ -78,26 +80,28 @@ def check_llm(self, model:str, api_key:Optional[str]): # Required variables for WatsonX: # - WATSONX_URL: Base URL of your WatsonX instance (required) # - WATSONX_APIKEY or WATSONX_TOKEN: IBM Cloud API key or IAM auth token (one is required) - model_requirements = {'missing_keys': [], 'keys_in_environment': True} + model_requirements = {"missing_keys": [], "keys_in_environment": True} if api_key: os.environ["WATSONX_APIKEY"] = api_key - if not "WATSONX_URL" in os.environ: - model_requirements['missing_keys'].append("WATSONX_URL") - model_requirements['keys_in_environment'] = False - if not "WATSONX_APIKEY" in os.environ and not "WATSONX_TOKEN" in os.environ: - model_requirements['missing_keys'].extend(["WATSONX_APIKEY", "WATSONX_TOKEN"]) - model_requirements['keys_in_environment'] = False + if "WATSONX_URL" not in os.environ: + model_requirements["missing_keys"].append("WATSONX_URL") + model_requirements["keys_in_environment"] = False + if "WATSONX_APIKEY" not in os.environ and "WATSONX_TOKEN" not in os.environ: + model_requirements["missing_keys"].extend( + ["WATSONX_APIKEY", "WATSONX_TOKEN"] + ) + model_requirements["keys_in_environment"] = False # WATSONX_PROJECT_ID is required because we don't let user pass it to completion call directly - if not "WATSONX_PROJECT_ID" in os.environ: - model_requirements['missing_keys'].append("WATSONX_PROJECT_ID") - model_requirements['keys_in_environment'] = False + if "WATSONX_PROJECT_ID" not in os.environ: + model_requirements["missing_keys"].append("WATSONX_PROJECT_ID") + model_requirements["keys_in_environment"] = False # https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces # using custom watsonx deployments might require to set WATSONX_DEPLOYMENT_SPACE_ID env if "watsonx/deployment/" in self.model: logging.warning( - "Custom WatsonX deployment detected. You may need to set the WATSONX_DEPLOYMENT_SPACE_ID " - "environment variable for proper functionality. For more information, refer to the documentation: " - "https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces" + "Custom WatsonX deployment detected. You may need to set the WATSONX_DEPLOYMENT_SPACE_ID " + "environment variable for proper functionality. For more information, refer to the documentation: " + "https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces" ) else: # @@ -107,8 +111,9 @@ def check_llm(self, model:str, api_key:Optional[str]): model_requirements = litellm.validate_environment(model=model) if not model_requirements["keys_in_environment"]: - raise Exception(f"model {model} requires the following environment variables: {model_requirements['missing_keys']}") - + raise Exception( + f"model {model} requires the following environment variables: {model_requirements['missing_keys']}" + ) def _strip_model_prefix(self) -> str: """ @@ -117,36 +122,49 @@ def _strip_model_prefix(self) -> str: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json """ model_name = self.model - if model_name.startswith('openai/'): - model_name = model_name[len('openai/'):] # Strip the 'openai/' prefix - elif model_name.startswith('bedrock/'): - model_name = model_name[len('bedrock/'):] # Strip the 'bedrock/' prefix - elif model_name.startswith('vertex_ai/'): - model_name = model_name[len('vertex_ai/'):] # Strip the 'vertex_ai/' prefix + if model_name.startswith("openai/"): + model_name = model_name[len("openai/") :] # Strip the 'openai/' prefix + elif model_name.startswith("bedrock/"): + model_name = model_name[len("bedrock/") :] # Strip the 'bedrock/' prefix + elif model_name.startswith("vertex_ai/"): + model_name = model_name[ + len("vertex_ai/") : + ] # Strip the 'vertex_ai/' prefix return model_name - # this unfortunately does not seem to work for azure if the deployment name is not a well-known model name - #if not litellm.supports_function_calling(model=model): + # if not litellm.supports_function_calling(model=model): # raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.") + def get_context_window_size(self) -> int: if OVERRIDE_MAX_CONTENT_SIZE: - logging.debug(f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}") + logging.debug( + f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}" + ) return OVERRIDE_MAX_CONTENT_SIZE model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix()) try: - return litellm.model_cost[model_name]['max_input_tokens'] + return litellm.model_cost[model_name]["max_input_tokens"] except Exception: - logging.warning(f"Couldn't find model's name {model_name} in litellm's model list, fallback to 128k tokens for max_input_tokens") + logging.warning( + f"Couldn't find model's name {model_name} in litellm's model list, fallback to 128k tokens for max_input_tokens" + ) return 128000 def count_tokens_for_message(self, messages: list[dict]) -> int: - return litellm.token_counter(model=self.model, - messages=messages) + return litellm.token_counter(model=self.model, messages=messages) - def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] = [], tool_choice: Optional[Union[str, dict]] = None, response_format: Optional[Union[dict, Type[BaseModel]]] = None, temperature:Optional[float] = None, drop_params: Optional[bool] = None) -> ModelResponse: + def completion( + self, + messages: List[Dict[str, Any]], + tools: Optional[List[Tool]] = [], + tool_choice: Optional[Union[str, dict]] = None, + response_format: Optional[Union[dict, Type[BaseModel]]] = None, + temperature: Optional[float] = None, + drop_params: Optional[bool] = None, + ) -> ModelResponse: result = litellm.completion( model=self.model, api_key=self.api_key, @@ -156,7 +174,7 @@ def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] base_url=self.base_url, temperature=temperature, response_format=response_format, - drop_params=drop_params + drop_params=drop_params, ) if isinstance(result, ModelResponse): @@ -166,12 +184,16 @@ def completion(self, messages: List[Dict[str, Any]], tools: Optional[List[Tool]] def get_maximum_output_token(self) -> int: if OVERRIDE_MAX_OUTPUT_TOKEN: - logging.debug(f"Using OVERRIDE_MAX_OUTPUT_TOKEN {OVERRIDE_MAX_OUTPUT_TOKEN}") + logging.debug( + f"Using OVERRIDE_MAX_OUTPUT_TOKEN {OVERRIDE_MAX_OUTPUT_TOKEN}" + ) return OVERRIDE_MAX_OUTPUT_TOKEN model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix()) try: - return litellm.model_cost[model_name]['max_output_tokens'] + return litellm.model_cost[model_name]["max_output_tokens"] except Exception: - logging.warning(f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens") + logging.warning( + f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens" + ) return 4096 diff --git a/holmes/core/models.py b/holmes/core/models.py index 71683cf..0520647 100644 --- a/holmes/core/models.py +++ b/holmes/core/models.py @@ -1,3 +1,4 @@ +from holmes.core.investigation_structured_output import InputSectionsDataType from holmes.core.tool_calling_llm import ToolCallResult from typing import Optional, List, Dict, Any, Union from pydantic import BaseModel, model_validator @@ -21,7 +22,7 @@ class InvestigateRequest(BaseModel): include_tool_calls: bool = False include_tool_call_results: bool = False prompt_template: str = "builtin://generic_investigation.jinja2" - sections: Optional[Dict[str, str]] = None + sections: Optional[InputSectionsDataType] = None # TODO in the future # response_handler: ... @@ -93,10 +94,16 @@ class ChatRequestBaseModel(BaseModel): @model_validator(mode="before") def check_first_item_role(cls, values): conversation_history = values.get("conversation_history") - if conversation_history and isinstance(conversation_history, list) and len(conversation_history)>0: + if ( + conversation_history + and isinstance(conversation_history, list) + and len(conversation_history) > 0 + ): first_item = conversation_history[0] if not first_item.get("role") == "system": - raise ValueError("The first item in conversation_history must contain 'role': 'system'") + raise ValueError( + "The first item in conversation_history must contain 'role': 'system'" + ) return values @@ -126,3 +133,14 @@ class ChatResponse(BaseModel): analysis: str conversation_history: list[dict] tool_calls: Optional[List[ToolCallResult]] = [] + + +class WorkloadHealthInvestigationResult(BaseModel): + analysis: Optional[str] = None + tools: Optional[List[ToolCallConversationResult]] = [] + + +class WorkloadHealthChatRequest(ChatRequestBaseModel): + ask: str + workload_health_result: WorkloadHealthInvestigationResult + resource: dict diff --git a/holmes/core/performance_timing.py b/holmes/core/performance_timing.py index 115c5e6..3c5b07b 100644 --- a/holmes/core/performance_timing.py +++ b/holmes/core/performance_timing.py @@ -7,6 +7,7 @@ LOG_PERFORMANCE, ) + class PerformanceTiming: def __init__(self, name): if not LOG_PERFORMANCE: @@ -40,10 +41,13 @@ def end(self): self.ended = True current_time = time.time() time_since_start = int((current_time - self.start_time) * 1000) - message = f'{self.name}(TOTAL) {time_since_start}ms' + message = f"{self.name}(TOTAL) {time_since_start}ms" logging.info(message) for label, time_since_last, time_since_start in self.timings: - logging.info(f'\t{self.name}({label}) +{time_since_last}ms {time_since_start}ms') + logging.info( + f"\t{self.name}({label}) +{time_since_last}ms {time_since_start}ms" + ) + def log_function_timing(func): @wraps(func) @@ -54,4 +58,5 @@ def function_timing_wrapper(*args, **kwargs): total_time = int((end_time - start_time) * 1000) logging.info(f'Function "{func.__name__}()" took {total_time}ms') return result + return function_timing_wrapper diff --git a/holmes/core/runbooks.py b/holmes/core/runbooks.py index ea277e7..0f2b60d 100644 --- a/holmes/core/runbooks.py +++ b/holmes/core/runbooks.py @@ -13,10 +13,14 @@ def get_instructions_for_issue(self, issue: Issue) -> List[str]: for runbook in self.runbooks: if runbook.match.issue_id and not runbook.match.issue_id.match(issue.id): continue - if runbook.match.issue_name and not runbook.match.issue_name.match(issue.name): + if runbook.match.issue_name and not runbook.match.issue_name.match( + issue.name + ): continue - if runbook.match.source and not runbook.match.source.match(issue.source_type): + if runbook.match.source and not runbook.match.source.match( + issue.source_type + ): continue instructions.append(runbook.instructions) - + return instructions diff --git a/holmes/core/supabase_dal.py b/holmes/core/supabase_dal.py index e62a0e2..323421e 100644 --- a/holmes/core/supabase_dal.py +++ b/holmes/core/supabase_dal.py @@ -52,13 +52,16 @@ class RobustaToken(BaseModel): class SupabaseDal: - def __init__(self): self.enabled = self.__init_config() if not self.enabled: - logging.info("Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible") + logging.info( + "Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible" + ) return - logging.info(f"Initializing Robusta platform connection for account {self.account_id}") + logging.info( + f"Initializing Robusta platform connection for account {self.account_id}" + ) options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS) self.client = create_client(self.url, self.api_key, options) self.user_id = self.sign_in() @@ -69,6 +72,7 @@ def __init__(self): def patch_postgrest_execute(self): logging.info("Patching postgres execute") + # This is somewhat hacky. def execute_with_retry(_self): try: @@ -77,7 +81,9 @@ def execute_with_retry(_self): message = exc.message or "" if exc.code == "PGRST301" or "expired" in message.lower(): # JWT expired. Sign in again and retry the query - logging.error("JWT token expired/invalid, signing in to Supabase again") + logging.error( + "JWT token expired/invalid, signing in to Supabase again" + ) self.sign_in() # update the session to the new one, after re-sign in _self.session = self.client.postgrest.session @@ -99,11 +105,11 @@ def __load_robusta_config() -> Optional[RobustaToken]: return RobustaToken(**json.loads(decoded)) except binascii.Error: raise Exception( - f"binascii.Error encountered. The Robusta UI token is not a valid base64." + "binascii.Error encountered. The Robusta UI token is not a valid base64." ) except json.JSONDecodeError: raise Exception( - f"json.JSONDecodeError encountered. The Robusta UI token could not be parsed as JSON after being base64 decoded." + "json.JSONDecodeError encountered. The Robusta UI token could not be parsed as JSON after being base64 decoded." ) if not os.path.exists(config_file_path): @@ -119,28 +125,28 @@ def __load_robusta_config() -> Optional[RobustaToken]: token = conf["robusta_sink"].get("token") if not token: raise Exception( - f"No robusta token provided to Holmes. " - f"Please set a valid Robusta UI token. " - f"See https://docs.robusta.dev/master/configuration/ai-analysis.html#choosing-and-configuring-an-ai-provider for instructions." + "No robusta token provided to Holmes. " + "Please set a valid Robusta UI token. " + "See https://docs.robusta.dev/master/configuration/ai-analysis.html#choosing-and-configuring-an-ai-provider for instructions." ) if "{{" in token: raise ValueError( - f"The robusta token configured for Holmes appears to be a templating placeholder (e.g. `{{ env.UI_SINK_TOKEN }}`). " - f"Ensure your Helm chart or environment variables are set correctly. " - f"If you store the token in a secret, you must also pass " - f"the environment variable ROBUSTA_UI_TOKEN to Holmes. " - f"See https://docs.robusta.dev/master/configuration/ai-analysis.html#configuring-holmesgpt-access-to-saas-data for instructions." + "The robusta token configured for Holmes appears to be a templating placeholder (e.g. `{ env.UI_SINK_TOKEN }`). " + "Ensure your Helm chart or environment variables are set correctly. " + "If you store the token in a secret, you must also pass " + "the environment variable ROBUSTA_UI_TOKEN to Holmes. " + "See https://docs.robusta.dev/master/configuration/ai-analysis.html#configuring-holmesgpt-access-to-saas-data for instructions." ) try: decoded = base64.b64decode(token) return RobustaToken(**json.loads(decoded)) except binascii.Error: raise Exception( - f"binascii.Error encountered. The robusta token provided to Holmes is not a valid base64." + "binascii.Error encountered. The robusta token provided to Holmes is not a valid base64." ) except json.JSONDecodeError: raise Exception( - f"json.JSONDecodeError encountered. The Robusta token provided to Holmes could not be parsed as JSON after being base64 decoded." + "json.JSONDecodeError encountered. The Robusta token provided to Holmes could not be parsed as JSON after being base64 decoded." ) return None @@ -166,8 +172,12 @@ def __init_config(self) -> bool: def sign_in(self) -> str: logging.info("Supabase DAL login") - res = self.client.auth.sign_in_with_password({"email": self.email, "password": self.password}) - self.client.auth.set_session(res.session.access_token, res.session.refresh_token) + res = self.client.auth.sign_in_with_password( + {"email": self.email, "password": self.password} + ) + self.client.auth.set_session( + res.session.access_token, res.session.refresh_token + ) self.client.postgrest.auth(res.session.access_token) return res.user.id @@ -181,11 +191,10 @@ def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]: issue_data = None try: issue_response = ( - self.client - .table(ISSUES_TABLE) - .select("*") - .filter("id", "eq", issue_id) - .execute() + self.client.table(ISSUES_TABLE) + .select("*") + .filter("id", "eq", issue_id) + .execute() ) if len(issue_response.data): issue_data = issue_response.data[0] @@ -196,25 +205,29 @@ def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]: if not issue_data: return None evidence = ( - self.client - .table(EVIDENCE_TABLE) + self.client.table(EVIDENCE_TABLE) .select("*") .filter("issue_id", "eq", issue_id) .execute() ) enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"} - data = [enrich for enrich in evidence.data if enrich.get("enrichment_type") not in enrichment_blacklist] + data = [ + enrich + for enrich in evidence.data + if enrich.get("enrichment_type") not in enrichment_blacklist + ] issue_data["evidence"] = data return issue_data - def get_resource_instructions(self, type: str, name: Optional[str]) -> Optional[ResourceInstructions]: + def get_resource_instructions( + self, type: str, name: Optional[str] + ) -> Optional[ResourceInstructions]: if not self.enabled or not name: return None res = ( - self.client - .table(RUNBOOKS_TABLE) + self.client.table(RUNBOOKS_TABLE) .select("runbook") .eq("account_id", self.account_id) .eq("subject_type", type) @@ -232,7 +245,9 @@ def get_resource_instructions(self, type: str, name: Optional[str]) -> Optional[ if url: documents.append(ResourceInstructionDocument(url=url)) else: - logging.warning(f"Unsupported runbook for subject_type={type} / subject_name={name}: {document_data}") + logging.warning( + f"Unsupported runbook for subject_type={type} / subject_name={name}: {document_data}" + ) return ResourceInstructions(instructions=instructions, documents=documents) @@ -241,20 +256,19 @@ def get_resource_instructions(self, type: str, name: Optional[str]) -> Optional[ def get_global_instructions_for_account(self) -> Optional[Instructions]: try: res = ( - self.client - .table(RUNBOOKS_TABLE) - .select("runbook") - .eq("account_id", self.account_id) - .eq("subject_type", "Account") - .execute() - ) + self.client.table(RUNBOOKS_TABLE) + .select("runbook") + .eq("account_id", self.account_id) + .eq("subject_type", "Account") + .execute() + ) if res.data: instructions = res.data[0].get("runbook").get("instructions") return Instructions(instructions=instructions) except Exception: logging.exception("Failed to fetch global instructions", exc_info=True) - + return None def create_session_token(self) -> str: @@ -265,14 +279,17 @@ def create_session_token(self) -> str: "user_id": self.user_id, "token": token, "type": "HOLMES", - }, returning=ReturnMethod.minimal # must use this, because the user cannot read this table + }, + returning=ReturnMethod.minimal, # must use this, because the user cannot read this table ).execute() return token def get_ai_credentials(self) -> Tuple[str, str]: if not self.enabled: - raise Exception("You're trying to use ROBUSTA_AI, but Cannot get credentials for ROBUSTA_AI. Store not initialized.") - + raise Exception( + "You're trying to use ROBUSTA_AI, but Cannot get credentials for ROBUSTA_AI. Store not initialized." + ) + with self.lock: session_token = self.token_cache.get("session_token") if not session_token: @@ -296,8 +313,7 @@ def get_workload_issues(self, resource: dict, since_hours: float) -> List[str]: logging.debug(f"getting issues for workload {svc_key}") try: res = ( - self.client - .table(ISSUES_TABLE) + self.client.table(ISSUES_TABLE) .select("id, creation_date, aggregation_key") .eq("account_id", self.account_id) .eq("cluster", cluster) @@ -317,73 +333,81 @@ def get_workload_issues(self, resource: dict, since_hours: float) -> List[str]: unique_issues: list[str] = list(issue_dict.values()) res = ( - self.client - .table(EVIDENCE_TABLE) + self.client.table(EVIDENCE_TABLE) .select("data, enrichment_type") .in_("issue_id", unique_issues) .execute() ) enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"} - data = [evidence.get("data") for evidence in res.data if evidence.get("enrichment_type") not in enrichment_blacklist] + data = [ + evidence.get("data") + for evidence in res.data + if evidence.get("enrichment_type") not in enrichment_blacklist + ] return data - except: + except Exception: logging.exception("failed to fetch workload issues data", exc_info=True) return [] def upsert_holmes_status(self, holmes_status_data: dict) -> None: if not self.enabled: - logging.info("Robusta store not initialized. Skipping upserting holmes status.") + logging.info( + "Robusta store not initialized. Skipping upserting holmes status." + ) return - + updated_at = datetime.now().isoformat() try: - res = ( - self.client - .table(HOLMES_STATUS_TABLE) - .upsert({ - "account_id": self.account_id, - "updated_at": updated_at, - **holmes_status_data, - }, - on_conflict='account_id, cluster_id') + ( + self.client.table(HOLMES_STATUS_TABLE) + .upsert( + { + "account_id": self.account_id, + "updated_at": updated_at, + **holmes_status_data, + }, + on_conflict="account_id, cluster_id", + ) .execute() ) except Exception as error: - logging.error(f"Error happened during upserting holmes status: {error}", - exc_info=True) + logging.error( + f"Error happened during upserting holmes status: {error}", exc_info=True + ) return None - + def sync_toolsets(self, toolsets: list[dict], cluster_name: str) -> None: if not toolsets: logging.warning("No toolsets were provided for synchronization.") return - + if not self.enabled: - logging.info("Robusta store not initialized. Skipping sync holmes toolsets.") + logging.info( + "Robusta store not initialized. Skipping sync holmes toolsets." + ) return - - provided_toolset_names = [toolset['toolset_name'] for toolset in toolsets] - + + provided_toolset_names = [toolset["toolset_name"] for toolset in toolsets] + try: self.client.table(HOLMES_TOOLSET).upsert( - toolsets, - on_conflict='account_id, cluster_id, toolset_name' + toolsets, on_conflict="account_id, cluster_id, toolset_name" ).execute() logging.info("Toolsets upserted successfully.") - - self.client.table(HOLMES_TOOLSET).delete().eq("account_id", - self.account_id).eq( - 'cluster_id', cluster_name).not_.in_( - 'toolset_name', provided_toolset_names + self.client.table(HOLMES_TOOLSET).delete().eq( + "account_id", self.account_id + ).eq("cluster_id", cluster_name).not_.in_( + "toolset_name", provided_toolset_names ).execute() logging.info("Toolsets synchronized successfully.") except Exception as e: - logging.exception(f"An error occurred during toolset synchronization: {e}", - exc_info=True) + logging.exception( + f"An error occurred during toolset synchronization: {e}", exc_info=True + ) diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py index b0c1a44..75353ae 100644 --- a/holmes/core/tool_calling_llm.py +++ b/holmes/core/tool_calling_llm.py @@ -3,7 +3,11 @@ import logging import textwrap from typing import List, Optional, Dict, Type, Union -from holmes.core.investigation_structured_output import DEFAULT_SECTIONS, get_output_format_for_investigation, combine_sections +from holmes.core.investigation_structured_output import ( + DEFAULT_SECTIONS, + InputSectionsDataType, + get_output_format_for_investigation, +) from holmes.core.performance_timing import PerformanceTiming from holmes.utils.tags import format_tags_in_string, parse_messages_tags from holmes.plugins.prompts import load_and_render_prompt @@ -30,11 +34,9 @@ class ToolCallResult(BaseModel): class LLMResult(BaseModel): tool_calls: Optional[List[ToolCallResult]] = None - sections: Optional[Dict[str, Union[str, None]]] = None result: Optional[str] = None unprocessed_result: Optional[str] = None instructions: List[str] = [] - # TODO: clean up these two prompt: Optional[str] = None messages: Optional[List[dict]] = None @@ -57,12 +59,13 @@ class ResourceInstructionDocument(BaseModel): class Instructions(BaseModel): instructions: List[str] = [] + class ResourceInstructions(BaseModel): instructions: List[str] = [] documents: List[ResourceInstructionDocument] = [] -class ToolCallingLLM: +class ToolCallingLLM: llm: LLM def __init__(self, tool_executor: ToolExecutor, max_steps: int, llm: LLM): @@ -91,7 +94,6 @@ def messages_call( post_process_prompt: Optional[str] = None, response_format: Optional[Union[dict, Type[BaseModel]]] = None, ) -> LLMResult: - return self.call(messages, post_process_prompt, response_format) def call( @@ -124,7 +126,6 @@ def call( ) perf_timing.measure("truncate_messages_to_fit_context") - logging.debug(f"sending messages={messages}\n\ntools={tools}") try: full_response = self.llm.completion( @@ -140,9 +141,8 @@ def call( perf_timing.measure("llm.completion") # catch a known error that occurs with Azure and replace the error message with something more obvious to the user except BadRequestError as e: - if ( - "Unrecognized request arguments supplied: tool_choice, tools" - in str(e) + if "Unrecognized request arguments supplied: tool_choice, tools" in str( + e ): raise Exception( "The Azure model you chose is not supported. Model version 1106 and higher required." @@ -159,22 +159,12 @@ def call( tools_to_call = getattr(response_message, "tool_calls", None) text_response = response_message.content - sections:Optional[Dict[str, str]] = None - if isinstance(text_response, str): - try: - parsed_json = json.loads(text_response) - text_response = parsed_json - except json.JSONDecodeError: - pass - if not isinstance(text_response, str): - sections = text_response - text_response = combine_sections(sections) if not tools_to_call: # For chatty models post process and summarize the result # this only works for calls where user prompt is explicitly passed through if post_process_prompt and user_prompt: - logging.info(f"Running post processing on investigation.") + logging.info("Running post processing on investigation.") raw_response = text_response post_processed_response = self._post_processing_call( prompt=user_prompt, @@ -185,7 +175,6 @@ def call( perf_timing.end() return LLMResult( result=post_processed_response, - sections=sections, unprocessed_result=raw_response, tool_calls=tool_calls, prompt=json.dumps(messages, indent=2), @@ -195,7 +184,6 @@ def call( perf_timing.end() return LLMResult( result=text_response, - sections=sections, tool_calls=tool_calls, prompt=json.dumps(messages, indent=2), messages=messages, @@ -231,7 +219,6 @@ def _invoke_tool( logging.warning( f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}" ) - tool_call_id = tool_to_call.id tool = self.tool_executor.get_tool_by_name(tool_name) @@ -358,7 +345,7 @@ def investigate( console: Optional[Console] = None, global_instructions: Optional[Instructions] = None, post_processing_prompt: Optional[str] = None, - sections: Optional[Dict[str, str]] = None + sections: Optional[InputSectionsDataType] = None, ) -> LLMResult: runbooks = self.runbook_manager.get_instructions_for_issue(issue) @@ -376,9 +363,11 @@ def investigate( console.print( "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]" ) - system_prompt = load_and_render_prompt(prompt, {"issue": issue, "sections": sections}) + system_prompt = load_and_render_prompt( + prompt, {"issue": issue, "sections": sections} + ) - if instructions != None and len(instructions.documents) > 0: + if instructions is not None and len(instructions.documents) > 0: docPrompts = [] for document in instructions.documents: docPrompts.append( @@ -393,7 +382,11 @@ def investigate( user_prompt = f'My instructions to check \n"""{user_prompt}"""' - if global_instructions and global_instructions.instructions and len(global_instructions.instructions[0]) > 0: + if ( + global_instructions + and global_instructions.instructions + and len(global_instructions.instructions[0]) > 0 + ): user_prompt += f"\n\nGlobal Instructions (use only if relevant): {global_instructions.instructions[0]}\n" user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}" @@ -403,6 +396,11 @@ def investigate( ) logging.debug("Rendered user prompt:\n%s", textwrap.indent(user_prompt, " ")) - res = self.prompt_call(system_prompt, user_prompt, post_processing_prompt, response_format=get_output_format_for_investigation(sections)) + res = self.prompt_call( + system_prompt, + user_prompt, + post_processing_prompt, + response_format=get_output_format_for_investigation(sections), + ) res.instructions = runbooks return res diff --git a/holmes/core/tools.py b/holmes/core/tools.py index f0515d6..636d397 100644 --- a/holmes/core/tools.py +++ b/holmes/core/tools.py @@ -326,7 +326,7 @@ def check_prerequisites(self): and prereq.expected_output not in result.stdout ): self._status = ToolsetStatusEnum.FAILED - self._error = f"Prerequisites check gave wrong output" + self._error = "Prerequisites check gave wrong output" return except subprocess.CalledProcessError as e: self._status = ToolsetStatusEnum.FAILED @@ -356,6 +356,9 @@ def check_prerequisites(self): self._status = ToolsetStatusEnum.ENABLED + def get_example_config(self) -> Dict[str, Any]: + return {} + class YAMLToolset(Toolset): tools: List[YAMLTool] diff --git a/holmes/main.py b/holmes/main.py index 6f9637c..0e524c0 100644 --- a/holmes/main.py +++ b/holmes/main.py @@ -1,3 +1,4 @@ +# ruff: noqa: E402 import os from holmes.utils.cert_utils import add_custom_certificate @@ -55,6 +56,7 @@ class Verbosity(Enum): VERBOSE = 2 VERY_VERBOSE = 3 + def cli_flags_to_verbosity(verbose_flags: List[bool]) -> Verbosity: if verbose_flags is None or len(verbose_flags) == 0: return Verbosity.NORMAL @@ -65,6 +67,7 @@ def cli_flags_to_verbosity(verbose_flags: List[bool]) -> Verbosity: else: return Verbosity.VERY_VERBOSE + def suppress_noisy_logs(): # disable INFO logs from OpenAI logging.getLogger("httpx").setLevel(logging.WARNING) @@ -80,33 +83,44 @@ def suppress_noisy_logs(): # suppress UserWarnings from the slack_sdk module warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*") + def init_logging(verbose_flags: List[bool] = None): verbosity = cli_flags_to_verbosity(verbose_flags) if verbosity == Verbosity.VERY_VERBOSE: - logging.basicConfig(level=logging.DEBUG, format="%(message)s", handlers=[RichHandler(show_level=False, show_time=False)]) + logging.basicConfig( + level=logging.DEBUG, + format="%(message)s", + handlers=[RichHandler(show_level=False, show_time=False)], + ) elif verbosity == Verbosity.VERBOSE: - logging.basicConfig(level=logging.INFO, format="%(message)s", handlers=[RichHandler(show_level=False, show_time=False)]) + logging.basicConfig( + level=logging.INFO, + format="%(message)s", + handlers=[RichHandler(show_level=False, show_time=False)], + ) logging.getLogger().setLevel(logging.DEBUG) suppress_noisy_logs() else: - logging.basicConfig(level=logging.INFO, format="%(message)s", handlers=[RichHandler(show_level=False, show_time=False)]) + logging.basicConfig( + level=logging.INFO, + format="%(message)s", + handlers=[RichHandler(show_level=False, show_time=False)], + ) suppress_noisy_logs() logging.debug(f"verbosity is {verbosity}") return Console() + # Common cli options # The defaults for options that are also in the config file MUST be None or else the cli defaults will override settings in the config file opt_api_key: Optional[str] = typer.Option( None, help="API key to use for the LLM (if not given, uses environment variables OPENAI_API_KEY or AZURE_API_KEY)", ) -opt_model: Optional[str] = typer.Option( - None, - help="Model to use for the LLM" -) +opt_model: Optional[str] = typer.Option(None, help="Model to use for the LLM") opt_config_file: Optional[Path] = typer.Option( None, "--config", @@ -182,7 +196,8 @@ def init_logging(verbose_flags: List[bool] = None): # Common help texts system_prompt_help = "Advanced. System prompt for LLM. Values starting with builtin:// are loaded from holmes/plugins/prompts, values starting with file:// are loaded from the given path, other values are interpreted as a prompt string" -def parse_documents(documents:Optional[str]) -> List[ResourceInstructionDocument]: + +def parse_documents(documents: Optional[str]) -> List[ResourceInstructionDocument]: resource_documents = [] if documents is not None: @@ -193,6 +208,7 @@ def parse_documents(documents:Optional[str]) -> List[ResourceInstructionDocument return resource_documents + def handle_result( result: LLMResult, console: Console, @@ -205,11 +221,14 @@ def handle_result( if destination == DestinationType.CLI: if show_tool_output and result.tool_calls: for tool_call in result.tool_calls: - console.print(f"[bold magenta]Used Tool:[/bold magenta]", end="") + console.print("[bold magenta]Used Tool:[/bold magenta]", end="") # we need to print this separately with markup=False because it contains arbitrary text and we don't want console.print to interpret it - console.print(f"{tool_call.description}. Output=\n{tool_call.result}", markup=False) + console.print( + f"{tool_call.description}. Output=\n{tool_call.result}", + markup=False, + ) - console.print(f"[bold green]AI:[/bold green]", end=" ") + console.print("[bold green]AI:[/bold green]", end=" ") console.print(Markdown(result.result)) if add_separator: console.print(Rule()) @@ -236,7 +255,6 @@ def ask( destination: Optional[DestinationType] = opt_destination, slack_token: Optional[str] = opt_slack_token, slack_channel: Optional[str] = opt_slack_channel, - # advanced options for this command system_prompt: Optional[str] = typer.Option( "builtin://generic_ask.jinja2", help=system_prompt_help @@ -254,7 +272,7 @@ def ask( ), json_output_file: Optional[str] = opt_json_output_file, echo_request: bool = opt_echo_request, - post_processing_prompt: Optional[str] = opt_post_processing_prompt + post_processing_prompt: Optional[str] = opt_post_processing_prompt, ): """ Ask any question and answer using available tools @@ -270,7 +288,9 @@ def ask( slack_channel=slack_channel, ) system_prompt = load_and_render_prompt(system_prompt) - ai = config.create_console_toolcalling_llm(allowed_toolsets=allowed_toolsets, dal=None) + ai = config.create_console_toolcalling_llm( + allowed_toolsets=allowed_toolsets, dal=None + ) if echo_request: console.print("[bold yellow]User:[/bold yellow] " + prompt) for path in include_file: @@ -290,7 +310,9 @@ def ask( raw={"prompt": prompt}, source_instance_id=socket.gethostname(), ) - handle_result(response, console, destination, config, issue, show_tool_output, False) + handle_result( + response, console, destination, config, issue, show_tool_output, False + ) @investigate_app.command() @@ -302,7 +324,7 @@ def alertmanager( ), alertmanager_label: Optional[List[str]] = typer.Option( [], - help="For filtering alerts with a specific label. Must be of format key=value. If --alertmanager-label is passed multiple times, alerts must match ALL labels" + help="For filtering alerts with a specific label. Must be of format key=value. If --alertmanager-label is passed multiple times, alerts must match ALL labels", ), alertmanager_username: Optional[str] = typer.Option( None, help="Username to use for basic auth" @@ -314,9 +336,7 @@ def alertmanager( None, help="Load alertmanager alerts from a file (used by the test framework)" ), alertmanager_limit: Optional[int] = typer.Option( - None, - "-n", - help="Limit the number of alerts to process" + None, "-n", help="Limit the number of alerts to process" ), # common options api_key: Optional[str] = opt_api_key, @@ -335,7 +355,7 @@ def alertmanager( system_prompt: Optional[str] = typer.Option( "builtin://generic_investigation.jinja2", help=system_prompt_help ), - post_processing_prompt: Optional[str] = opt_post_processing_prompt + post_processing_prompt: Optional[str] = opt_post_processing_prompt, ): """ Investigate a Prometheus/Alertmanager alert @@ -355,21 +375,25 @@ def alertmanager( slack_token=slack_token, slack_channel=slack_channel, custom_toolsets=custom_toolsets, - custom_runbooks=custom_runbooks + custom_runbooks=custom_runbooks, ) - ai = config.create_console_issue_investigator(console, allowed_toolsets=allowed_toolsets) + ai = config.create_console_issue_investigator( + console, allowed_toolsets=allowed_toolsets + ) source = config.create_alertmanager_source() try: issues = source.fetch_issues() except Exception as e: - logging.error(f"Failed to fetch issues from alertmanager", exc_info=e) + logging.error("Failed to fetch issues from alertmanager", exc_info=e) return if alertmanager_limit is not None: - console.print(f"[bold yellow]Limiting to {alertmanager_limit}/{len(issues)} issues.[/bold yellow]") + console.print( + f"[bold yellow]Limiting to {alertmanager_limit}/{len(issues)} issues.[/bold yellow]" + ) issues = issues[:alertmanager_limit] if alertmanager_alertname is not None: @@ -390,11 +414,11 @@ def alertmanager( prompt=system_prompt, console=console, instructions=None, - post_processing_prompt=post_processing_prompt) + post_processing_prompt=post_processing_prompt, + ) results.append({"issue": issue.model_dump(), "result": result.model_dump()}) handle_result(result, console, destination, config, issue, False, True) - if json_output_file: write_json_file(json_output_file, results) @@ -409,7 +433,8 @@ def generate_alertmanager_tests( None, help="Password to use for basic auth" ), output: Optional[Path] = typer.Option( - None, help="Path to dump alertmanager alerts as json (if not given, output curl commands instead)" + None, + help="Path to dump alertmanager alerts as json (if not given, output curl commands instead)", ), config_file: Optional[str] = opt_config_file, verbose: Optional[List[bool]] = opt_verbose, @@ -437,12 +462,12 @@ def jira( jira_url: Optional[str] = typer.Option( None, help="Jira url - e.g. https://your-company.atlassian.net", - envvar="JIRA_URL" + envvar="JIRA_URL", ), jira_username: Optional[str] = typer.Option( None, help="The email address with which you log into Jira", - envvar="JIRA_USERNAME" + envvar="JIRA_USERNAME", ), jira_api_key: str = typer.Option( None, @@ -452,9 +477,7 @@ def jira( None, help="Investigate tickets matching a JQL query (e.g. 'project=DEFAULT_PROJECT')", ), - update: Optional[bool] = typer.Option( - False, help="Update Jira with AI results" - ), + update: Optional[bool] = typer.Option(False, help="Update Jira with AI results"), # common options api_key: Optional[str] = opt_api_key, model: Optional[str] = opt_model, @@ -469,7 +492,7 @@ def jira( system_prompt: Optional[str] = typer.Option( "builtin://generic_investigation.jinja2", help=system_prompt_help ), - post_processing_prompt: Optional[str] = opt_post_processing_prompt + post_processing_prompt: Optional[str] = opt_post_processing_prompt, ): """ Investigate a Jira ticket @@ -485,14 +508,16 @@ def jira( jira_api_key=jira_api_key, jira_query=jira_query, custom_toolsets=custom_toolsets, - custom_runbooks=custom_runbooks + custom_runbooks=custom_runbooks, + ) + ai = config.create_console_issue_investigator( + console, allowed_toolsets=allowed_toolsets ) - ai = config.create_console_issue_investigator(console, allowed_toolsets=allowed_toolsets) source = config.create_jira_source() try: issues = source.fetch_issues() except Exception as e: - logging.error(f"Failed to fetch issues from Jira", exc_info=e) + logging.error("Failed to fetch issues from Jira", exc_info=e) return console.print( @@ -509,7 +534,8 @@ def jira( prompt=system_prompt, console=console, instructions=None, - post_processing_prompt=post_processing_prompt) + post_processing_prompt=post_processing_prompt, + ) console.print(Rule()) console.print(f"[bold green]AI analysis of {issue.url}[/bold green]") @@ -532,10 +558,12 @@ def jira( @investigate_app.command() def github( github_url: str = typer.Option( - "https://api.github.com", help="The GitHub api base url (e.g: https://api.github.com)" + "https://api.github.com", + help="The GitHub api base url (e.g: https://api.github.com)", ), github_owner: Optional[str] = typer.Option( - None, help="The GitHub repository Owner, eg: if the repository url is https://github.com/robusta-dev/holmesgpt, the owner is robusta-dev" + None, + help="The GitHub repository Owner, eg: if the repository url is https://github.com/robusta-dev/holmesgpt, the owner is robusta-dev", ), github_pat: str = typer.Option( None, @@ -544,9 +572,7 @@ def github( None, help="The GitHub repository name, eg: if the repository url is https://github.com/robusta-dev/holmesgpt, the repository name is holmesgpt", ), - update: Optional[bool] = typer.Option( - False, help="Update GitHub with AI results" - ), + update: Optional[bool] = typer.Option(False, help="Update GitHub with AI results"), github_query: Optional[str] = typer.Option( "is:issue is:open", help="Investigate tickets matching a GitHub query (e.g. 'is:issue is:open')", @@ -564,7 +590,7 @@ def github( system_prompt: Optional[str] = typer.Option( "builtin://generic_investigation.jinja2", help=system_prompt_help ), - post_processing_prompt: Optional[str] = opt_post_processing_prompt + post_processing_prompt: Optional[str] = opt_post_processing_prompt, ): """ Investigate a GitHub issue @@ -581,33 +607,37 @@ def github( github_repository=github_repository, github_query=github_query, custom_toolsets=custom_toolsets, - custom_runbooks=custom_runbooks + custom_runbooks=custom_runbooks, + ) + ai = config.create_issue_invcreate_console_issue_investigatorestigator( + console, allowed_toolsets ) - ai = config.create_issue_invcreate_console_issue_investigatorestigator(console, allowed_toolsets) source = config.create_github_source() try: issues = source.fetch_issues() except Exception as e: - logging.error(f"Failed to fetch issues from GitHub", exc_info=e) + logging.error("Failed to fetch issues from GitHub", exc_info=e) return console.print( f"[bold yellow]Analyzing {len(issues)} GitHub Issues.[/bold yellow] [red]Press Ctrl+C to stop.[/red]" ) for i, issue in enumerate(issues): - console.print(f"[bold yellow]Analyzing GitHub issue {i+1}/{len(issues)}: {issue.name}...[/bold yellow]") + console.print( + f"[bold yellow]Analyzing GitHub issue {i+1}/{len(issues)}: {issue.name}...[/bold yellow]" + ) result = ai.investigate( issue=issue, prompt=system_prompt, console=console, instructions=None, - post_processing_prompt=post_processing_prompt) + post_processing_prompt=post_processing_prompt, + ) console.print(Rule()) console.print(f"[bold green]AI analysis of {issue.url}[/bold green]") - console.print(Markdown(result.result.replace( - "\n", "\n\n")), style="bold green") + console.print(Markdown(result.result.replace("\n", "\n\n")), style="bold green") console.print(Rule()) if update: source.write_back_result(issue.id, result) @@ -617,16 +647,20 @@ def github( f"[bold]Not updating issue {issue.url}. Use the --update option to do so.[/bold]" ) + @investigate_app.command() def pagerduty( pagerduty_api_key: str = typer.Option( - None, help="The PagerDuty API key. This can be found in the PagerDuty UI under Integrations > API Access Keys." + None, + help="The PagerDuty API key. This can be found in the PagerDuty UI under Integrations > API Access Keys.", ), pagerduty_user_email: Optional[str] = typer.Option( - None, help="When --update is set, which user will be listed as the user who updated the ticket. (Must be the email of a valid user in your PagerDuty account.)" + None, + help="When --update is set, which user will be listed as the user who updated the ticket. (Must be the email of a valid user in your PagerDuty account.)", ), pagerduty_incident_key: Optional[str] = typer.Option( - None, help="If provided, only analyze a single PagerDuty incident matching this key" + None, + help="If provided, only analyze a single PagerDuty incident matching this key", ), update: Optional[bool] = typer.Option( False, help="Update PagerDuty with AI results" @@ -645,7 +679,7 @@ def pagerduty( system_prompt: Optional[str] = typer.Option( "builtin://generic_investigation.jinja2", help=system_prompt_help ), - post_processing_prompt: Optional[str] = opt_post_processing_prompt + post_processing_prompt: Optional[str] = opt_post_processing_prompt, ): """ Investigate a PagerDuty incident @@ -660,14 +694,14 @@ def pagerduty( pagerduty_user_email=pagerduty_user_email, pagerduty_incident_key=pagerduty_incident_key, custom_toolsets=custom_toolsets, - custom_runbooks=custom_runbooks + custom_runbooks=custom_runbooks, ) ai = config.create_console_issue_investigator(console, allowed_toolsets) source = config.create_pagerduty_source() try: issues = source.fetch_issues() except Exception as e: - logging.error(f"Failed to fetch issues from PagerDuty", exc_info=e) + logging.error("Failed to fetch issues from PagerDuty", exc_info=e) return console.print( @@ -676,19 +710,21 @@ def pagerduty( results = [] for i, issue in enumerate(issues): - console.print(f"[bold yellow]Analyzing PagerDuty incident {i+1}/{len(issues)}: {issue.name}...[/bold yellow]") + console.print( + f"[bold yellow]Analyzing PagerDuty incident {i+1}/{len(issues)}: {issue.name}...[/bold yellow]" + ) result = ai.investigate( issue=issue, prompt=system_prompt, console=console, instructions=None, - post_processing_prompt=post_processing_prompt) + post_processing_prompt=post_processing_prompt, + ) console.print(Rule()) console.print(f"[bold green]AI analysis of {issue.url}[/bold green]") - console.print(Markdown(result.result.replace( - "\n", "\n\n")), style="bold green") + console.print(Markdown(result.result.replace("\n", "\n\n")), style="bold green") console.print(Rule()) if update: source.write_back_result(issue.id, result) @@ -702,16 +738,16 @@ def pagerduty( if json_output_file: write_json_file(json_output_file, results) + @investigate_app.command() def opsgenie( - opsgenie_api_key: str = typer.Option( - None, help="The OpsGenie API key" - ), + opsgenie_api_key: str = typer.Option(None, help="The OpsGenie API key"), opsgenie_team_integration_key: str = typer.Option( None, help=OPSGENIE_TEAM_INTEGRATION_KEY_HELP ), opsgenie_query: Optional[str] = typer.Option( - None, help="E.g. 'message: Foo' (see https://support.atlassian.com/opsgenie/docs/search-queries-for-alerts/)" + None, + help="E.g. 'message: Foo' (see https://support.atlassian.com/opsgenie/docs/search-queries-for-alerts/)", ), update: Optional[bool] = typer.Option( False, help="Update OpsGenie with AI results" @@ -730,7 +766,7 @@ def opsgenie( "builtin://generic_investigation.jinja2", help=system_prompt_help ), post_processing_prompt: Optional[str] = opt_post_processing_prompt, - documents: Optional[str] = opt_documents + documents: Optional[str] = opt_documents, ): """ Investigate an OpsGenie alert @@ -745,32 +781,34 @@ def opsgenie( opsgenie_team_integration_key=opsgenie_team_integration_key, opsgenie_query=opsgenie_query, custom_toolsets=custom_toolsets, - custom_runbooks=custom_runbooks + custom_runbooks=custom_runbooks, ) ai = config.create_console_issue_investigator(console, allowed_toolsets) source = config.create_opsgenie_source() try: issues = source.fetch_issues() except Exception as e: - logging.error(f"Failed to fetch issues from OpsGenie", exc_info=e) + logging.error("Failed to fetch issues from OpsGenie", exc_info=e) return console.print( f"[bold yellow]Analyzing {len(issues)} OpsGenie alerts.[/bold yellow] [red]Press Ctrl+C to stop.[/red]" ) for i, issue in enumerate(issues): - console.print(f"[bold yellow]Analyzing OpsGenie alert {i+1}/{len(issues)}: {issue.name}...[/bold yellow]") + console.print( + f"[bold yellow]Analyzing OpsGenie alert {i+1}/{len(issues)}: {issue.name}...[/bold yellow]" + ) result = ai.investigate( issue=issue, prompt=system_prompt, console=console, instructions=None, - post_processing_prompt=post_processing_prompt) + post_processing_prompt=post_processing_prompt, + ) console.print(Rule()) console.print(f"[bold green]AI analysis of {issue.url}[/bold green]") - console.print(Markdown(result.result.replace( - "\n", "\n\n")), style="bold green") + console.print(Markdown(result.result.replace("\n", "\n\n")), style="bold green") console.print(Rule()) if update: source.write_back_result(issue.id, result) diff --git a/holmes/plugins/destinations/__init__.py b/holmes/plugins/destinations/__init__.py index 0c97d44..41fa2a2 100644 --- a/holmes/plugins/destinations/__init__.py +++ b/holmes/plugins/destinations/__init__.py @@ -1,5 +1,6 @@ from strenum import StrEnum + class DestinationType(StrEnum): SLACK = "slack" CLI = "cli" diff --git a/holmes/plugins/destinations/slack/__init__.py b/holmes/plugins/destinations/slack/__init__.py index a12b115..1d8b71a 100644 --- a/holmes/plugins/destinations/slack/__init__.py +++ b/holmes/plugins/destinations/slack/__init__.py @@ -1 +1,2 @@ +# ruff: noqa: F401 from .plugin import SlackDestination diff --git a/holmes/plugins/destinations/slack/plugin.py b/holmes/plugins/destinations/slack/plugin.py index 357c2e4..2d40ba7 100644 --- a/holmes/plugins/destinations/slack/plugin.py +++ b/holmes/plugins/destinations/slack/plugin.py @@ -73,7 +73,7 @@ def send_issue(self, issue: Issue, result: LLMResult) -> None: ) elif e.response.data["error"] == "invalid_auth": logging.error( - f"Unable to authenticate using the provided Slack token. Please verify the setting of --slack-token" + "Unable to authenticate using the provided Slack token. Please verify the setting of --slack-token" ) else: logging.error(f"Error sending message: {e}. message={text}") @@ -108,7 +108,7 @@ def __send_prompt_for_debugging(self, parent_thread, result: LLMResult) -> None: text = "*🐞 DEBUG: messages with OpenAI*" file_response = self.client.files_upload_v2( - content=result.prompt, title=f"ai-prompt" + content=result.prompt, title="ai-prompt" ) permalink = file_response["file"]["permalink"] text += f"\n`<{permalink}|ai-prompt>`" diff --git a/holmes/plugins/interfaces.py b/holmes/plugins/interfaces.py index 5f265b7..461ee88 100644 --- a/holmes/plugins/interfaces.py +++ b/holmes/plugins/interfaces.py @@ -1,9 +1,8 @@ -from typing import List, Iterable, Pattern -from pydantic import BaseModel -from openai import AzureOpenAI, OpenAI +from typing import List, Iterable from holmes.core.issue import Issue from holmes.core.tool_calling_llm import LLMResult + # Sources must implement this class SourcePlugin: def fetch_issues(self) -> List[Issue]: @@ -17,9 +16,9 @@ def stream_issues(self) -> Iterable[Issue]: def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: raise NotImplementedError() + # Destinations must implement this class DestinationPlugin: - def send_issue(self, issue: Issue, result: LLMResult): raise NotImplementedError() diff --git a/holmes/plugins/prompts/__init__.py b/holmes/plugins/prompts/__init__.py index 5dde457..000d671 100644 --- a/holmes/plugins/prompts/__init__.py +++ b/holmes/plugins/prompts/__init__.py @@ -4,6 +4,7 @@ THIS_DIR = os.path.abspath(os.path.dirname(__file__)) + def load_prompt(prompt: str) -> str: """ prompt is either in the format 'builtin://' or 'file://' or a regular string @@ -12,14 +13,15 @@ def load_prompt(prompt: str) -> str: regular strings are returned as is (as literal strings) """ if prompt.startswith("builtin://"): - path = os.path.join(THIS_DIR, prompt[len("builtin://"):]) + path = os.path.join(THIS_DIR, prompt[len("builtin://") :]) elif prompt.startswith("file://"): - path = prompt[len("file://"):] + path = prompt[len("file://") :] else: return prompt - + return open(path).read() + def load_and_render_prompt(prompt: str, context: dict = None) -> str: """ prompt is in the format 'builtin://' or 'file://' or a regular string diff --git a/holmes/plugins/prompts/_general_instructions.jinja2 b/holmes/plugins/prompts/_general_instructions.jinja2 index 8126075..996cbfe 100644 --- a/holmes/plugins/prompts/_general_instructions.jinja2 +++ b/holmes/plugins/prompts/_general_instructions.jinja2 @@ -8,7 +8,6 @@ In general: * in this case, try to find substrings or search for the correct spellings * always provide detailed information like exact resource names, versions, labels, etc * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names -* when giving an answer don't say root cause but "possible root causes" and be clear to distinguish between what you know for certain and what is a possible explanation * if a runbook url is present as well as tool that can fetch it, you MUST fetch the runbook before beginning your investigation. * if you don't know, say that the analysis was inconclusive. * if there are multiple possible causes list them in a numbered list. diff --git a/holmes/plugins/prompts/generic_ask.jinja2 b/holmes/plugins/prompts/generic_ask.jinja2 index 2932690..d78ef32 100644 --- a/holmes/plugins/prompts/generic_ask.jinja2 +++ b/holmes/plugins/prompts/generic_ask.jinja2 @@ -27,4 +27,4 @@ Relevant logs: 2021-01-01T00:00:00.000Z [ERROR] Missing required field 'email' in request body ``` -Validation error led to unhandled Java exception causing a crash. \ No newline at end of file +Validation error led to unhandled Java exception causing a crash. diff --git a/holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 b/holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 index f718279..8a67401 100644 --- a/holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +++ b/holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 @@ -4,8 +4,8 @@ Do not say 'based on the tool output' or explicitly refer to tools at all. If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time. ### Context Awareness: -Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{issue}}. -However, not all questions may be directly related to that investigation. +Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{issue}}. +However, not all questions may be directly related to that investigation. Use results of the investigation and conversation history to maintain continuity when appropriate, ensuring efficiency in your responses. #### Results of issue Investigation: @@ -46,4 +46,4 @@ Relevant logs: 2021-01-01T00:00:00.000Z [ERROR] Missing required field 'email' in request body ``` -Validation error led to unhandled Java exception causing a crash. \ No newline at end of file +Validation error led to unhandled Java exception causing a crash. diff --git a/holmes/plugins/prompts/generic_post_processing.jinja2 b/holmes/plugins/prompts/generic_post_processing.jinja2 index 3016581..3f2d07b 100644 --- a/holmes/plugins/prompts/generic_post_processing.jinja2 +++ b/holmes/plugins/prompts/generic_post_processing.jinja2 @@ -10,4 +10,4 @@ This is the original prompt: {{ prompt }} This is the investigation to summarize: -{{ investigation }} \ No newline at end of file +{{ investigation }} diff --git a/holmes/plugins/prompts/kubernetes_workload_ask.jinja2 b/holmes/plugins/prompts/kubernetes_workload_ask.jinja2 index a1237eb..7948228 100644 --- a/holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +++ b/holmes/plugins/prompts/kubernetes_workload_ask.jinja2 @@ -19,9 +19,9 @@ Use these rules when deciding how to apply them: * Before finalizing your answer double-check if any Global Instructions apply. If so, ensure you have correctly followed those instructions. In general: -* when it can provide extra information, first run as many tools as you need to gather more information, then respond. +* when it can provide extra information, first run as many tools as you need to gather more information, then respond. * if possible, do so repeatedly with different tool calls each time to gather more information. -* do not stop investigating until you are at the final root cause you are able to find. +* do not stop investigating until you are at the final root cause you are able to find. * use the "five whys" methodology to find the root cause. * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that. * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and. @@ -40,7 +40,7 @@ In general: * do not give an answer like "The pod is pending" as that doesn't state why the pod is pending and how to fix it. * do not give an answer like "Pod's node affinity/selector doesn't match any available nodes" because that doesn't include data on WHICH label doesn't match * if investigating an issue on many pods, there is no need to check more than 3 individual pods in the same deployment. pick up to a representative 3 from each deployment if relevant -* if you find errors and warning in a pods logs and you believe they indicate a real issue. consider the pod as not healthy. +* if you find errors and warning in a pods logs and you believe they indicate a real issue. consider the pod as not healthy. * if the user says something isn't working, ALWAYS: ** use kubectl_describe on the owner workload + individual pods and look for any transient issues they might have been referring to ** check the application aspects with kubectl_logs + kubectl_previous_logs and other relevant tools @@ -76,4 +76,4 @@ Here are issues and configuration changes that happend to this kubernetes worklo {% for a in alerts %} {{ a }} {% endfor %} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/holmes/plugins/prompts/kubernetes_workload_chat.jinja2 b/holmes/plugins/prompts/kubernetes_workload_chat.jinja2 new file mode 100644 index 0000000..cc63b18 --- /dev/null +++ b/holmes/plugins/prompts/kubernetes_workload_chat.jinja2 @@ -0,0 +1,38 @@ +You are a tool-calling AI assist provided with common DevOps and IT tools that you can use to troubleshoot problems or answer questions. +Whenever possible, you MUST first use tools to investigate, then answer the question. +Do not say 'based on the tool output' or explicitly refer to tools at all. +If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time. + +### Context Awareness: +Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{resource}}. +However, not all questions may be directly related to that investigation. +Use results of the investigation and conversation history to maintain continuity when appropriate, ensuring efficiency in your responses. + +#### Results of Workload Health Check Analysis: +{{workload_analysis}} + +{% if tools_called_for_workload %} +Tools used for the workload analysis: +{% for tool in tools_called_for_workload %} + {{ tool }} +{% endfor %} +{% endif %} + + +{% include '_global_instructions.jinja2' %} +{% include '_general_instructions.jinja2' %} + +Style guide: +* Reply with terse output. +* Be painfully concise. +* Leave out "the" and filler words when possible. +* Be terse but not at the expense of leaving out important data like the root cause and how to fix. + +Examples: + +User: Why did the workload-example app crash? +(Call tool kubectl_find_resource kind=pod keyword=workload`) +(Call tool kubectl_previous_logs namespace=demos pod=workload-example-1299492-d9g9d # this pod name was found from the previous tool call) + +AI: `workload-example-1299492-d9g9d` crashed due to email validation error during HTTP request for /api/create_user +Relevant logs: diff --git a/holmes/plugins/runbooks/__init__.py b/holmes/plugins/runbooks/__init__.py index 948a634..0259299 100644 --- a/holmes/plugins/runbooks/__init__.py +++ b/holmes/plugins/runbooks/__init__.py @@ -1,20 +1,19 @@ import os import os.path -from typing import List, Literal, Optional, Pattern, Union +from typing import List, Optional, Pattern -from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +from pydantic import BaseModel, PrivateAttr from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file THIS_DIR = os.path.abspath(os.path.dirname(__file__)) -class IssueMatcher (RobustaBaseConfig): - issue_id: Optional[Pattern] = None # unique id - issue_name: Optional[Pattern] = None # not necessary unique + +class IssueMatcher(RobustaBaseConfig): + issue_id: Optional[Pattern] = None # unique id + issue_name: Optional[Pattern] = None # not necessary unique source: Optional[Pattern] = None -class RunbookContext(RobustaBaseConfig): - type: "URL" class Runbook(RobustaBaseConfig): match: IssueMatcher @@ -28,15 +27,18 @@ def set_path(self, path: str): def get_path(self) -> str: return self._path + class ListOfRunbooks(BaseModel): runbooks: List[Runbook] + def load_runbooks_from_file(path: str) -> List[Runbook]: data: ListOfRunbooks = load_model_from_file(ListOfRunbooks, file_path=path) for runbook in data.runbooks: runbook.set_path(path) return data.runbooks + def load_builtin_runbooks() -> List[Runbook]: all_runbooks = [] for filename in os.listdir(THIS_DIR): diff --git a/holmes/plugins/runbooks/jira.yaml b/holmes/plugins/runbooks/jira.yaml index 546fc1c..448bfb7 100644 --- a/holmes/plugins/runbooks/jira.yaml +++ b/holmes/plugins/runbooks/jira.yaml @@ -9,4 +9,4 @@ runbooks: Ignore issues related to jira itself, like plugin or licensing problems. Never give an answer like "XYZ is experiencing an issue, as indicated by the Jira issue. Further investigation is needed to determine the exact cause." You are the agent that is supposed to investigate so do so! - If you have references to a service or a component, start by searching for related infrastructure or resources using tools that take keywords \ No newline at end of file + If you have references to a service or a component, start by searching for related infrastructure or resources using tools that take keywords diff --git a/holmes/plugins/runbooks/kube-prometheus-stack.yaml b/holmes/plugins/runbooks/kube-prometheus-stack.yaml index a2c59a2..b785411 100644 --- a/holmes/plugins/runbooks/kube-prometheus-stack.yaml +++ b/holmes/plugins/runbooks/kube-prometheus-stack.yaml @@ -7,4 +7,4 @@ runbooks: instructions: > Check if the cluster is a managed cluster like EKS by fetching nodes and looking at their labels. If so, tell the user this is likely a known false positive in the kube-prometheus-stack alert because Prometheus can't scrape the scheduler which is managed by the cloud provider. - On the other hand, if this is a self-managed Kubernetes, either the scheduler is really down (unlikely) or it is running but Prometheus can't scrape it. \ No newline at end of file + On the other hand, if this is a self-managed Kubernetes, either the scheduler is really down (unlikely) or it is running but Prometheus can't scrape it. diff --git a/holmes/plugins/sources/github/__init__.py b/holmes/plugins/sources/github/__init__.py index 719c146..39d48e5 100644 --- a/holmes/plugins/sources/github/__init__.py +++ b/holmes/plugins/sources/github/__init__.py @@ -1,9 +1,8 @@ import logging +from typing import List from holmes.core.tool_calling_llm import LLMResult from holmes.plugins.interfaces import SourcePlugin from holmes.core.issue import Issue -from typing import List, Pattern -from holmes.core.tool_calling_llm import LLMResult import requests @@ -16,25 +15,26 @@ def __init__(self, url: str, owner: str, repository: str, pat: str, query: str): self.query = query def fetch_issues(self) -> List[Issue]: - logging.info(f"Fetching All issues from {self.url} for repository {self.owner}/{self.repository}") + logging.info( + f"Fetching All issues from {self.url} for repository {self.owner}/{self.repository}" + ) try: data = [] url = f"{self.url}/search/issues" headers = { "Authorization": f"token {self.pat}", "Accept": "application/vnd.github.v3+json", - "X-GitHub-Api-Version": "2022-11-28" - } - params = { - "per_page": "100" + "X-GitHub-Api-Version": "2022-11-28", } + params = {"per_page": "100"} default_q = f"repo:{self.owner}/{self.repository}" params["q"] = f"{default_q} {self.query}" while url: - response = requests.get( - url=url, headers=headers, params=params) + response = requests.get(url=url, headers=headers, params=params) if response.status_code != 200: - raise Exception(f"Failed to get issues:{response.status_code} {response.text}") + raise Exception( + f"Failed to get issues:{response.status_code} {response.text}" + ) logging.info(f"Got {response}") response.raise_for_status() data.extend(response.json().get("items", [])) @@ -45,7 +45,7 @@ def fetch_issues(self) -> List[Issue]: url = link.split(";")[0].strip()[1:-1] return [self.convert_to_issue(issue) for issue in data] except requests.RequestException as e: - raise ConnectionError(f"Failed to fetch data from GitHub.") from e + raise ConnectionError("Failed to fetch data from GitHub.") from e def convert_to_issue(self, github_issue): return Issue( @@ -62,12 +62,14 @@ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: headers = { "Authorization": f"token {self.pat}", "Accept": "application/vnd.github.v3+json", - "X-GitHub-Api-Version": "2022-11-28" + "X-GitHub-Api-Version": "2022-11-28", } response = requests.post( url=url, - json={"body": f"Automatic AI Investigation by Robusta:\n\n{result_data.result}\n"}, - headers=headers + json={ + "body": f"Automatic AI Investigation by Robusta:\n\n{result_data.result}\n" + }, + headers=headers, ) response.raise_for_status() diff --git a/holmes/plugins/sources/jira/__init__.py b/holmes/plugins/sources/jira/__init__.py index 764354c..72c1b80 100644 --- a/holmes/plugins/sources/jira/__init__.py +++ b/holmes/plugins/sources/jira/__init__.py @@ -1,15 +1,12 @@ import logging -from typing import List, Literal, Optional, Pattern +from typing import List -import humanize import requests -from pydantic import BaseModel, SecretStr, ValidationError, parse_obj_as, validator from requests.auth import HTTPBasicAuth from holmes.core.issue import Issue -from holmes.core.tool_calling_llm import LLMResult, ToolCallingLLM, ToolCallResult +from holmes.core.tool_calling_llm import LLMResult from holmes.plugins.interfaces import SourcePlugin -from holmes.plugins.utils import dict_to_markdown class JiraSource(SourcePlugin): @@ -25,9 +22,7 @@ def fetch_issues(self) -> List[Issue]: response = requests.get( f"{self.url}/rest/api/2/search", params={"jql": self.jql_query}, - auth=HTTPBasicAuth( - self.username, self.api_key - ), + auth=HTTPBasicAuth(self.username, self.api_key), headers={"Accept": "application/json"}, ) if response.status_code != 200: @@ -39,7 +34,7 @@ def fetch_issues(self) -> List[Issue]: data = response.json() return [self.convert_to_issue(issue) for issue in data.get("issues", [])] except requests.RequestException as e: - raise ConnectionError(f"Failed to fetch data from Jira.") from e + raise ConnectionError("Failed to fetch data from Jira.") from e def convert_to_issue(self, jira_issue): return Issue( @@ -63,9 +58,7 @@ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: response = requests.post( comment_url, json=comment_data, - auth=HTTPBasicAuth( - self.username, self.api_key - ), + auth=HTTPBasicAuth(self.username, self.api_key), headers={"Accept": "application/json"}, ) response.raise_for_status() diff --git a/holmes/plugins/sources/opsgenie/__init__.py b/holmes/plugins/sources/opsgenie/__init__.py index b773d7e..3407f8e 100644 --- a/holmes/plugins/sources/opsgenie/__init__.py +++ b/holmes/plugins/sources/opsgenie/__init__.py @@ -2,14 +2,17 @@ from holmes.core.tool_calling_llm import LLMResult from holmes.plugins.interfaces import SourcePlugin from holmes.core.issue import Issue -from typing import List, Pattern, Optional +from typing import List, Optional import requests import markdown OPSGENIE_TEAM_INTEGRATION_KEY_HELP = "OpsGenie Team Integration key for writing back results. (NOT a normal API Key.) Get it from Teams > YourTeamName > Integrations > Add Integration > API Key. Don't forget to turn on the integration!" + class OpsGenieSource(SourcePlugin): - def __init__(self, api_key: str, query: str, team_integration_key: Optional[str] = None): + def __init__( + self, api_key: str, query: str, team_integration_key: Optional[str] = None + ): self.api_key = api_key self.query = query self.team_integration_key = team_integration_key @@ -21,25 +24,24 @@ def fetch_issues(self) -> List[Issue]: url = "https://api.opsgenie.com/v2/alerts" headers = { "Authorization": f"GenieKey {self.api_key}", - "Content-Type": "application/json" - } - params = { - "query": self.query, - "limit": 100 + "Content-Type": "application/json", } + params = {"query": self.query, "limit": 100} while url: # TODO: also fetch notes and description response = requests.get(url, headers=headers, params=params) logging.debug(f"Got {response.json()}") if response.status_code != 200: - raise Exception(f"Failed to get alerts: {response.status_code} {response.text}") + raise Exception( + f"Failed to get alerts: {response.status_code} {response.text}" + ) response.raise_for_status() data.extend(response.json().get("data", [])) next_url = response.json().get("paging", {}).get("next", None) url = next_url if next_url else None return [self.convert_to_issue(alert) for alert in data] except requests.RequestException as e: - raise ConnectionError(f"Failed to fetch data from OpsGenie.") from e + raise ConnectionError("Failed to fetch data from OpsGenie.") from e def convert_to_issue(self, opsgenie_alert): return Issue( @@ -50,11 +52,13 @@ def convert_to_issue(self, opsgenie_alert): url=opsgenie_alert["tinyId"], raw=opsgenie_alert, ) - + def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: if self.team_integration_key is None: - raise Exception(f"Please set '--opsgenie-team-integration-key' to write back results. This is an {OPSGENIE_TEAM_INTEGRATION_KEY_HELP}") - + raise Exception( + f"Please set '--opsgenie-team-integration-key' to write back results. This is an {OPSGENIE_TEAM_INTEGRATION_KEY_HELP}" + ) + # TODO: update description to make this more visible (right now we add a comment) html_output = markdown.markdown(result_data.result) logging.debug(f"HTML output: {html_output}") @@ -62,12 +66,12 @@ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: url = f"https://api.opsgenie.com/v2/alerts/{issue_id}/notes?identifierType=id" headers = { "Authorization": f"GenieKey {self.team_integration_key}", - "Content-Type": "application/json" + "Content-Type": "application/json", } response = requests.post( url=url, json={"note": f"Automatic AI Investigation by Robusta:\n\n{html_output}\n"}, - headers=headers + headers=headers, ) logging.debug(f"Response: {response.json()}") response.raise_for_status() @@ -82,4 +86,6 @@ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: response.raise_for_status() json_response = response.json() if not json_response["data"]["success"]: - raise Exception(f"Failed to write back result to OpsGenie: {json_response['data']['status']}") + raise Exception( + f"Failed to write back result to OpsGenie: {json_response['data']['status']}" + ) diff --git a/holmes/plugins/sources/pagerduty/__init__.py b/holmes/plugins/sources/pagerduty/__init__.py index b72f867..36f9e9e 100644 --- a/holmes/plugins/sources/pagerduty/__init__.py +++ b/holmes/plugins/sources/pagerduty/__init__.py @@ -1,18 +1,21 @@ import logging -from typing import List, Pattern, Optional +from typing import List, Optional import requests -from pydantic import BaseModel from holmes.core.issue import Issue from holmes.core.tool_calling_llm import LLMResult from holmes.plugins.interfaces import SourcePlugin from holmes.utils.markdown_utils import markdown_to_plain_text -class PagerDutySource(SourcePlugin): - def __init__(self, api_key: str, user_email: str, incident_key: Optional[str] = None): - self.api_url = "https://api.pagerduty.com" # currently hard-coded, can expose it if useful +class PagerDutySource(SourcePlugin): + def __init__( + self, api_key: str, user_email: str, incident_key: Optional[str] = None + ): + self.api_url = ( + "https://api.pagerduty.com" # currently hard-coded, can expose it if useful + ) self.api_key = api_key self.user_email = user_email self.incident_key = incident_key @@ -22,18 +25,17 @@ def fetch_issues(self) -> List[Issue]: try: headers = { "Authorization": f"Token token={self.api_key}", - "Accept": "application/vnd.pagerduty+json;version=2" + "Accept": "application/vnd.pagerduty+json;version=2", } # excludes resolved query_params = "?statuses[]=triggered&statuses[]=acknowledged" if self.incident_key: - query_params =f"{query_params}&incident_key={self.incident_key}" + query_params = f"{query_params}&incident_key={self.incident_key}" response = requests.get( - f"{self.api_url}/incidents{query_params}", - headers=headers + f"{self.api_url}/incidents{query_params}", headers=headers ) if response.status_code != 200: print(f"Got response: {response}") @@ -45,7 +47,7 @@ def fetch_issues(self) -> List[Issue]: data = response.json() return [self.convert_to_issue(issue) for issue in data.get("incidents", [])] except requests.RequestException as e: - raise ConnectionError(f"Failed to fetch data from PagerDuty.") from e + raise ConnectionError("Failed to fetch data from PagerDuty.") from e def convert_to_issue(self, source_issue): return Issue( @@ -60,7 +62,9 @@ def convert_to_issue(self, source_issue): def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: logging.info(f"Writing back result to issue {issue_id}") if not self.user_email: - raise Exception(f"When using --update mode, --pagerduty-user-email must be provided") + raise Exception( + "When using --update mode, --pagerduty-user-email must be provided" + ) try: url = f"{self.api_url}/incidents/{issue_id}/notes" @@ -75,22 +79,22 @@ def write_back_result(self, issue_id: str, result_data: LLMResult) -> None: "content": f"Automatic AI Investigation by HolmesGPT:\n\n{comment}" } } - response = requests.post( - url, - json=comment_data, - headers=headers - ) + response = requests.post(url, json=comment_data, headers=headers) response.raise_for_status() data = response.json() logging.debug(f"Comment added to issue {issue_id}: {data}") except requests.RequestException as e: - logging.error(f"Failed to write back result to PagerDuty: {e}; {e.response.text}") + logging.error( + f"Failed to write back result to PagerDuty: {e}; {e.response.text}" + ) raise + # Run with: # poetry run python3 -m holmes.plugins.sources.pagerduty if __name__ == "__main__": import sys + pd_source = PagerDutySource(api_key=sys.argv[1], user_email=sys.argv[2]) issues = pd_source.fetch_issues() for issue in issues: diff --git a/holmes/plugins/sources/prometheus/__init__.py b/holmes/plugins/sources/prometheus/__init__.py index b61916d..e69de29 100644 --- a/holmes/plugins/sources/prometheus/__init__.py +++ b/holmes/plugins/sources/prometheus/__init__.py @@ -1 +0,0 @@ -from .plugin import AlertManagerSource \ No newline at end of file diff --git a/holmes/plugins/sources/prometheus/models.py b/holmes/plugins/sources/prometheus/models.py index 5ea0eb2..ca66e2a 100644 --- a/holmes/plugins/sources/prometheus/models.py +++ b/holmes/plugins/sources/prometheus/models.py @@ -3,7 +3,6 @@ from typing import Dict, List, Optional from urllib.parse import parse_qs, unquote, urlparse from pydantic import BaseModel, computed_field -import humanize # these models are used by AlertManager's push API (when alertmanager pushes alerts to us by webhook) diff --git a/holmes/plugins/sources/prometheus/plugin.py b/holmes/plugins/sources/prometheus/plugin.py index 31e56ac..64cb671 100644 --- a/holmes/plugins/sources/prometheus/plugin.py +++ b/holmes/plugins/sources/prometheus/plugin.py @@ -2,12 +2,12 @@ import logging import re from pathlib import Path -from typing import List, Literal, Optional, Pattern +from typing import List, Optional, Pattern import humanize import requests import rich -from pydantic import BaseModel, ValidationError, parse_obj_as, validator +from pydantic import parse_obj_as from pydantic.json import pydantic_encoder from requests.auth import HTTPBasicAuth import rich.segment @@ -46,9 +46,13 @@ def __init__( # we don't mention --alertmanager-file to avoid confusing users - most users wont care about it raise ValueError("--alertmanager-url must be specified") if self.url is not None and self.filepath is not None: - logging.warning(f"Ignoring --alertmanager-url because --alertmanager-file is specified") + logging.warning( + "Ignoring --alertmanager-url because --alertmanager-file is specified" + ) if self.label_filter and self.filepath is not None: - logging.warning(f"Ignoring --label-filter because --alertmanager-file is specified") + logging.warning( + "Ignoring --label-filter because --alertmanager-file is specified" + ) if self.url and not ( self.url.startswith("http://") or self.url.startswith("https://") ): @@ -126,7 +130,9 @@ def output_curl_commands(self, console: rich.console.Console) -> None: """ alerts = self.__fetch_issues_from_api() for alert in alerts: - alert_json = json.dumps([alert.model_dump()], default=pydantic_encoder) # Wrap in a list + alert_json = json.dumps( + [alert.model_dump()], default=pydantic_encoder + ) # Wrap in a list curl_command = ( f"curl -X POST -H 'Content-Type: application/json' " f"-d '{alert_json}' {self.url}/api/v2/alerts" diff --git a/holmes/plugins/toolsets/__init__.py b/holmes/plugins/toolsets/__init__.py index a9fb057..2fd779b 100644 --- a/holmes/plugins/toolsets/__init__.py +++ b/holmes/plugins/toolsets/__init__.py @@ -6,7 +6,6 @@ from holmes.core.supabase_dal import SupabaseDal from holmes.plugins.toolsets.datetime import DatetimeToolset from holmes.plugins.toolsets.findings import FindingsToolset -from holmes.plugins.toolsets.grafana.common import GrafanaConfig from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset from holmes.plugins.toolsets.internet import InternetToolset @@ -14,8 +13,7 @@ from holmes.plugins.toolsets.prometheus import PrometheusToolset from holmes.core.tools import Toolset, YAMLToolset -from typing import Dict -from typing import Optional +from holmes.plugins.toolsets.opensearch import OpenSearchToolset import yaml THIS_DIR = os.path.abspath(os.path.dirname(__file__)) diff --git a/holmes/plugins/toolsets/docker.yaml b/holmes/plugins/toolsets/docker.yaml index 6e4c482..b0e72ae 100644 --- a/holmes/plugins/toolsets/docker.yaml +++ b/holmes/plugins/toolsets/docker.yaml @@ -44,4 +44,3 @@ toolsets: - name: "docker_diff" description: "Inspect changes to files or directories on a container's filesystem" command: "docker diff {{ container_id }}" - diff --git a/holmes/plugins/toolsets/findings.py b/holmes/plugins/toolsets/findings.py index 1451bea..3f619e6 100644 --- a/holmes/plugins/toolsets/findings.py +++ b/holmes/plugins/toolsets/findings.py @@ -4,13 +4,18 @@ from typing import Optional from typing_extensions import Dict from holmes.core.supabase_dal import SupabaseDal -from holmes.core.tools import StaticPrerequisite, Tool, ToolParameter, Toolset, ToolsetTag +from holmes.core.tools import ( + StaticPrerequisite, + Tool, + ToolParameter, + Toolset, + ToolsetTag, +) PARAM_FINDING_ID = "id" class FetchRobustaFinding(Tool): - _dal: Optional[SupabaseDal] def __init__(self, dal: Optional[SupabaseDal]): @@ -51,8 +56,8 @@ def invoke(self, params: Dict) -> str: return f"There was an internal error while fetching finding {finding_id}" - def get_parameterized_one_liner(self, params:Dict) -> str: - return f"Fetch metadata and history" + def get_parameterized_one_liner(self, params: Dict) -> str: + return "Fetch metadata and history" class FindingsToolset(Toolset): @@ -72,6 +77,8 @@ def __init__(self, dal: Optional[SupabaseDal]): name="robusta", prerequisites=[dal_prereq], tools=[FetchRobustaFinding(dal)], - tags=[ToolsetTag.CORE,], - is_default=True + tags=[ + ToolsetTag.CORE, + ], + is_default=True, ) diff --git a/holmes/plugins/toolsets/grafana/base_grafana_toolset.py b/holmes/plugins/toolsets/grafana/base_grafana_toolset.py index 9f29efc..8ec784d 100644 --- a/holmes/plugins/toolsets/grafana/base_grafana_toolset.py +++ b/holmes/plugins/toolsets/grafana/base_grafana_toolset.py @@ -1,5 +1,5 @@ import logging -from typing import Any +from typing import Any, ClassVar, Type from holmes.core.tools import ( Tool, Toolset, @@ -11,17 +11,28 @@ class BaseGrafanaToolset(Toolset): - def __init__(self, name: str, description: str, icon_url: str, tools: list[Tool]): + config_class: ClassVar[Type[GrafanaConfig]] = GrafanaConfig + + def __init__( + self, + name: str, + description: str, + icon_url: str, + tools: list[Tool], + doc_url: str, + ): super().__init__( name=name, description=description, icon_url=icon_url, + docs_url=doc_url, prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], tools=tools, tags=[ ToolsetTag.CORE, ], - enabled=False + enabled=False, + is_default=True, ) def prerequisites_callable(self, config: dict[str, Any]) -> bool: @@ -30,10 +41,16 @@ def prerequisites_callable(self, config: dict[str, Any]) -> bool: return False try: - self._grafana_config = GrafanaConfig(**config) - is_healthy = get_health(self._grafana_config.url, self._grafana_config.api_key) + self._grafana_config = BaseGrafanaToolset.config_class(**config) + is_healthy = get_health( + self._grafana_config.url, self._grafana_config.api_key + ) return is_healthy except Exception: logging.exception("Failed to set up grafana toolset") return False + + def get_example_config(self): + example_config = GrafanaConfig(api_key="YOUR API KEY", url="YOUR GRAFANA URL") + return example_config.model_dump() diff --git a/holmes/plugins/toolsets/grafana/common.py b/holmes/plugins/toolsets/grafana/common.py index e5c5e1a..bc7f8bd 100644 --- a/holmes/plugins/toolsets/grafana/common.py +++ b/holmes/plugins/toolsets/grafana/common.py @@ -1,23 +1,12 @@ from typing import Dict, Optional, Union import uuid import time -import os from pydantic import BaseModel - -GRAFANA_URL_ENV_NAME = "GRAFANA_URL" -GRAFANA_API_KEY_ENV_NAME = "GRAFANA_API_KEY" ONE_HOUR_IN_SECONDS = 3600 -class GrafanaLokiConfig(BaseModel): - pod_name_search_key: str = "pod" - namespace_search_key: str = "namespace" - node_name_search_key: str = "node" - - class GrafanaConfig(BaseModel): - loki: GrafanaLokiConfig = GrafanaLokiConfig() api_key: str url: str @@ -59,7 +48,7 @@ def get_datasource_id(dict: Dict, param: str) -> str: try: if uuid.UUID(datasource_id, version=4): return f"uid/{datasource_id}" - except: + except Exception: pass - + return datasource_id diff --git a/holmes/plugins/toolsets/grafana/grafana_api.py b/holmes/plugins/toolsets/grafana/grafana_api.py index a7ccb4c..7e5e1de 100644 --- a/holmes/plugins/toolsets/grafana/grafana_api.py +++ b/holmes/plugins/toolsets/grafana/grafana_api.py @@ -1,4 +1,3 @@ - import logging import requests from typing import Any, Dict, List, Optional @@ -6,13 +5,17 @@ from holmes.plugins.toolsets.grafana.common import headers + @backoff.on_exception( backoff.expo, # Exponential backoff requests.exceptions.RequestException, # Retry on request exceptions max_tries=5, # Maximum retries - giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) and e.response.status_code < 500, + giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) + and e.response.status_code < 500, ) -def list_grafana_datasources(grafana_url:str, api_key: str, source_name: Optional[str] = None) -> List[Dict[str, Any]]: +def list_grafana_datasources( + grafana_url: str, api_key: str, source_name: Optional[str] = None +) -> List[Dict[str, Any]]: """ List all configured datasources from a Grafana instance with retry and backoff. @@ -23,7 +26,7 @@ def list_grafana_datasources(grafana_url:str, api_key: str, source_name: Optiona List of datasource configurations. """ try: - url = f'{grafana_url}/api/datasources' + url = f"{grafana_url}/api/datasources" headers_ = headers(api_key=api_key) logging.info(f"Fetching datasources from: {url}") @@ -35,12 +38,13 @@ def list_grafana_datasources(grafana_url:str, api_key: str, source_name: Optiona return datasources relevant_datasources = [ - ds for ds in datasources - if ds['type'].lower() == source_name.lower() + ds for ds in datasources if ds["type"].lower() == source_name.lower() ] for ds in relevant_datasources: - logging.info(f"Found datasource: {ds['name']} (type: {ds['type']}, id: {ds['id']})") + logging.info( + f"Found datasource: {ds['name']} (type: {ds['type']}, id: {ds['id']})" + ) return relevant_datasources except requests.exceptions.RequestException as e: @@ -51,15 +55,16 @@ def list_grafana_datasources(grafana_url:str, api_key: str, source_name: Optiona backoff.expo, # Exponential backoff requests.exceptions.RequestException, # Retry on request exceptions max_tries=5, # Maximum retries - giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) and e.response.status_code < 500, + giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) + and e.response.status_code < 500, ) -def get_health(grafana_url:str, api_key: str) -> bool: +def get_health(grafana_url: str, api_key: str) -> bool: try: - url = f'{grafana_url}/api/health' + url = f"{grafana_url}/api/health" headers_ = headers(api_key=api_key) response = requests.get(url, headers=headers_, timeout=10) # Added timeout response.raise_for_status() return True - except: + except Exception: return False diff --git a/holmes/plugins/toolsets/grafana/loki_api.py b/holmes/plugins/toolsets/grafana/loki_api.py index a911fe5..2a8d15f 100644 --- a/holmes/plugins/toolsets/grafana/loki_api.py +++ b/holmes/plugins/toolsets/grafana/loki_api.py @@ -1,6 +1,5 @@ -import logging import requests -from typing import Dict, List, Optional +from typing import Dict, List import backoff from holmes.plugins.toolsets.grafana.common import headers diff --git a/holmes/plugins/toolsets/grafana/tempo_api.py b/holmes/plugins/toolsets/grafana/tempo_api.py index a6d855a..6e75deb 100644 --- a/holmes/plugins/toolsets/grafana/tempo_api.py +++ b/holmes/plugins/toolsets/grafana/tempo_api.py @@ -1,15 +1,16 @@ - import requests from typing import Dict, List import backoff + def execute_tempo_query_with_retry( - grafana_url:str, + grafana_url: str, api_key: str, tempo_datasource_id: str, query_params: dict, retries: int = 3, - timeout: int = 5): + timeout: int = 5, +): """ Execute a Tempo API query through Grafana with retries and timeout. @@ -22,21 +23,22 @@ def execute_tempo_query_with_retry( Returns: List of trace results. """ - url = f'{grafana_url}/api/datasources/proxy/{tempo_datasource_id}/api/search' + url = f"{grafana_url}/api/datasources/proxy/{tempo_datasource_id}/api/search" @backoff.on_exception( backoff.expo, # Exponential backoff requests.exceptions.RequestException, # Retry on request exceptions max_tries=retries, # Maximum retries - giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) and e.response.status_code < 500, + giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) + and e.response.status_code < 500, ) def make_request(): response = requests.post( url, headers={ - 'Authorization': f'Bearer {api_key}', - 'Accept': 'application/json', - 'Content-Type': 'application/json', + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", + "Content-Type": "application/json", }, json=query_params, timeout=timeout, # Set timeout for the request @@ -51,7 +53,7 @@ def make_request(): def query_tempo_traces_by_duration( - grafana_url:str, + grafana_url: str, api_key: str, tempo_datasource_id: str, min_duration: str, @@ -78,11 +80,13 @@ def query_tempo_traces_by_duration( "end": str(end), "limit": str(limit), } - return execute_tempo_query_with_retry(grafana_url, api_key, tempo_datasource_id, query_params) + return execute_tempo_query_with_retry( + grafana_url, api_key, tempo_datasource_id, query_params + ) def query_tempo_trace_by_id( - grafana_url:str, + grafana_url: str, api_key: str, tempo_datasource_id: str, trace_id: str, @@ -101,20 +105,21 @@ def query_tempo_trace_by_id( Returns: Trace details. """ - url = f'{grafana_url}/api/datasources/proxy/{tempo_datasource_id}/api/traces/{trace_id}' + url = f"{grafana_url}/api/datasources/proxy/{tempo_datasource_id}/api/traces/{trace_id}" @backoff.on_exception( backoff.expo, # Exponential backoff requests.exceptions.RequestException, # Retry on request exceptions max_tries=retries, # Maximum retries - giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) and e.response.status_code < 500, + giveup=lambda e: isinstance(e, requests.exceptions.HTTPError) + and e.response.status_code < 500, ) def make_request(): response = requests.get( url, headers={ - 'Authorization': f'Bearer {api_key}', - 'Accept': 'application/json', + "Authorization": f"Bearer {api_key}", + "Accept": "application/json", }, timeout=timeout, # Set timeout for the request ) @@ -124,13 +129,13 @@ def make_request(): try: return make_request() except requests.exceptions.RequestException as e: - raise Exception(f"Failed to retrieve trace by ID after retries: {e} \n for URL: {url}") + raise Exception( + f"Failed to retrieve trace by ID after retries: {e} \n for URL: {url}" + ) + def process_trace_json(trace_json): - result = { - "total_elapsed_time_ms": 0, - "applications": [] - } + result = {"total_elapsed_time_ms": 0, "applications": []} # First pass: Collect basic details about spans spans_info = {} @@ -162,7 +167,7 @@ def process_trace_json(trace_json): "exclusive_time_ms": elapsed_time_ns / 1_000_000, "start_time": start_time, "end_time": end_time, - "loki_labels": {"app": app_name} + "loki_labels": {"app": app_name}, } # Second pass: Subtract child span times from parent spans @@ -177,18 +182,20 @@ def process_trace_json(trace_json): app_info = { "app_name": span_data["app_name"], "service_name": span_data["service_name"], - #"elapsed_time_ms": span_data["elapsed_time_ms"], # this confuses the llm + # "elapsed_time_ms": span_data["elapsed_time_ms"], # this confuses the llm "elapsed_service_time_ms": span_data["exclusive_time_ms"], "start_time": span_data["start_time"], "end_time": span_data["end_time"], - "loki_labels": span_data["loki_labels"] + "loki_labels": span_data["loki_labels"], } if app_info["app_name"]: result["applications"].append(app_info) # Set the total elapsed time to the root span's time (if available) - root_span = max(spans_info.values(), key=lambda x: x["elapsed_time_ms"], default=None) + root_span = max( + spans_info.values(), key=lambda x: x["elapsed_time_ms"], default=None + ) if root_span: result["total_elapsed_time_ms"] = root_span["elapsed_time_ms"] diff --git a/holmes/plugins/toolsets/grafana/toolset_grafana_loki.py b/holmes/plugins/toolsets/grafana/toolset_grafana_loki.py index 0023359..1cc0cf2 100644 --- a/holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +++ b/holmes/plugins/toolsets/grafana/toolset_grafana_loki.py @@ -5,6 +5,7 @@ from holmes.core.tools import Tool, ToolParameter from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset from holmes.plugins.toolsets.grafana.common import ( + GrafanaConfig, get_datasource_id, get_param_or_raise, process_timestamps, @@ -17,8 +18,13 @@ ) -class ListLokiDatasources(Tool): +class GrafanaLokiConfig(GrafanaConfig): + pod_name_search_key: str = "pod" + namespace_search_key: str = "namespace" + node_name_search_key: str = "node" + +class ListLokiDatasources(Tool): def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="list_loki_datasources", @@ -40,7 +46,6 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GetLokiLogsByNode(Tool): - def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="fetch_loki_logs_by_node", @@ -84,7 +89,7 @@ def invoke(self, params: Dict) -> str: api_key=self._toolset._grafana_config.api_key, loki_datasource_id=get_datasource_id(params, "loki_datasource_id"), node_name=get_param_or_raise(params, "node_name"), - node_name_search_key=self._toolset._grafana_config.loki.node_name_search_key, + node_name_search_key=self._toolset._grafana_config.node_name_search_key, start=start, end=end, limit=int(get_param_or_raise(params, "limit")), @@ -158,7 +163,6 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GetLokiLogsByPod(Tool): - def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="fetch_loki_logs_by_pod", @@ -208,8 +212,8 @@ def invoke(self, params: Dict) -> str: loki_datasource_id=get_datasource_id(params, "loki_datasource_id"), pod_regex=get_param_or_raise(params, "pod_regex"), namespace=get_param_or_raise(params, "namespace"), - namespace_search_key=self._toolset._grafana_config.loki.namespace_search_key, - pod_name_search_key=self._toolset._grafana_config.loki.pod_name_search_key, + namespace_search_key=self._toolset._grafana_config.namespace_search_key, + pod_name_search_key=self._toolset._grafana_config.pod_name_search_key, start=start, end=end, limit=int(get_param_or_raise(params, "limit")), @@ -221,11 +225,14 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GrafanaLokiToolset(BaseGrafanaToolset): + config_class = GrafanaLokiConfig + def __init__(self): super().__init__( name="grafana/loki", description="Fetchs kubernetes pods and node logs from Loki", icon_url="https://grafana.com/media/docs/loki/logo-grafana-loki.png", + doc_url="https://grafana.com/oss/loki/", tools=[ ListLokiDatasources(self), GetLokiLogsByNode(self), @@ -233,3 +240,9 @@ def __init__(self): GetLokiLogsByLabel(self), ], ) + + def get_example_config(self): + example_config = GrafanaLokiConfig( + api_key="YOUR API KEY", url="YOUR GRAFANA URL" + ) + return example_config.model_dump() diff --git a/holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py b/holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py index 815ee27..ef9fa2a 100644 --- a/holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +++ b/holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py @@ -11,7 +11,6 @@ ) from holmes.plugins.toolsets.grafana.grafana_api import list_grafana_datasources from holmes.plugins.toolsets.grafana.common import ( - GrafanaConfig, get_datasource_id, get_param_or_raise, process_timestamps, @@ -19,7 +18,6 @@ class ListAllDatasources(Tool): - def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="list_all_datasources", @@ -40,7 +38,6 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GetTempoTracesByMinDuration(Tool): - def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="fetch_tempo_traces_by_min_duration", @@ -97,7 +94,6 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GetTempoTraceById(Tool): - def __init__(self, toolset: BaseGrafanaToolset): super().__init__( name="fetch_tempo_trace_by_id", @@ -131,12 +127,12 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GrafanaTempoToolset(BaseGrafanaToolset): - def __init__(self): super().__init__( name="grafana/tempo", description="Fetchs kubernetes traces from Tempo", icon_url="https://grafana.com/static/assets/img/blog/tempo.png", + doc_url="https://grafana.com/oss/tempo/", tools=[ ListAllDatasources(self), GetTempoTracesByMinDuration(self), diff --git a/holmes/plugins/toolsets/helm.yaml b/holmes/plugins/toolsets/helm.yaml index f1ec5e6..bd3c82e 100644 --- a/holmes/plugins/toolsets/helm.yaml +++ b/holmes/plugins/toolsets/helm.yaml @@ -12,11 +12,11 @@ toolsets: - name: "helm_list" description: "Use to get all the current helm releases" command: "helm list" - + - name: "helm_values" description: "Use to gather Helm values or any released helm chart" command: "helm get values -a {{ release_name }} -n {{ namespace }} -o json" - + - name: "helm_status" description: "Check the status of a Helm release" command: "helm status {{ release_name }} -n {{ namespace }}" @@ -39,4 +39,4 @@ toolsets: - name: "helm_notes" description: "Show the notes provided by the Helm chart" - command: "helm get notes {{ release_name }} -n {{ namespace }}" \ No newline at end of file + command: "helm get notes {{ release_name }} -n {{ namespace }}" diff --git a/holmes/plugins/toolsets/internet.py b/holmes/plugins/toolsets/internet.py index 50cb745..421e305 100644 --- a/holmes/plugins/toolsets/internet.py +++ b/holmes/plugins/toolsets/internet.py @@ -12,49 +12,61 @@ import requests # TODO: change and make it holmes -INTERNET_TOOLSET_USER_AGENT = os.environ.get("INTERNET_TOOLSET_USER_AGENT", "Mozilla/5.0 (X11; Linux x86_64; rv:128.0; holmesgpt;) Gecko/20100101 Firefox/128.0") -INTERNET_TOOLSET_TIMEOUT_SECONDS = int(os.environ.get("INTERNET_TOOLSET_TIMEOUT_SECONDS", "60")) +INTERNET_TOOLSET_USER_AGENT = os.environ.get( + "INTERNET_TOOLSET_USER_AGENT", + "Mozilla/5.0 (X11; Linux x86_64; rv:128.0; holmesgpt;) Gecko/20100101 Firefox/128.0", +) +INTERNET_TOOLSET_TIMEOUT_SECONDS = int( + os.environ.get("INTERNET_TOOLSET_TIMEOUT_SECONDS", "60") +) SELECTORS_TO_REMOVE = [ - 'script', 'style', 'meta', 'link', 'noscript', - 'header', 'footer', 'nav', - 'iframe', 'svg', 'img', - 'button', - 'menu', 'sidebar', 'aside', - '.header' - '.footer' - '.navigation', - '.nav', - '.menu', - '.sidebar', - '.ad', - '.advertisement', - '.social', - '.popup', - '.modal', - '.banner', - '.cookie-notice', - '.social-share', - '.related-articles', - '.recommended', - '#header' - '#footer' - '#navigation', - '#nav', - '#menu', - '#sidebar', - '#ad', - '#advertisement', - '#social', - '#popup', - '#modal', - '#banner', - '#cookie-notice', - '#social-share', - '#related-articles', - '#recommended' + "script", + "style", + "meta", + "link", + "noscript", + "header", + "footer", + "nav", + "iframe", + "svg", + "img", + "button", + "menu", + "sidebar", + "aside", + ".header" ".footer" ".navigation", + ".nav", + ".menu", + ".sidebar", + ".ad", + ".advertisement", + ".social", + ".popup", + ".modal", + ".banner", + ".cookie-notice", + ".social-share", + ".related-articles", + ".recommended", + "#header" "#footer" "#navigation", + "#nav", + "#menu", + "#sidebar", + "#ad", + "#advertisement", + "#social", + "#popup", + "#modal", + "#banner", + "#cookie-notice", + "#social-share", + "#related-articles", + "#recommended", ] + def scrape(url) -> Tuple[Optional[str], Optional[str]]: response = None content = None @@ -62,16 +74,14 @@ def scrape(url) -> Tuple[Optional[str], Optional[str]]: try: response = requests.get( url, - headers={ - 'User-Agent': INTERNET_TOOLSET_USER_AGENT - }, - timeout=INTERNET_TOOLSET_TIMEOUT_SECONDS + headers={"User-Agent": INTERNET_TOOLSET_USER_AGENT}, + timeout=INTERNET_TOOLSET_TIMEOUT_SECONDS, ) response.raise_for_status() except Timeout: logging.error( f"Failed to load {url}. Timeout after {INTERNET_TOOLSET_TIMEOUT_SECONDS} seconds", - exc_info=True + exc_info=True, ) except RequestException as e: logging.error(f"Failed to load {url}: {str(e)}", exc_info=True) @@ -80,15 +90,18 @@ def scrape(url) -> Tuple[Optional[str], Optional[str]]: if response: content = response.text try: - content_type = response.headers['content-type'] + content_type = response.headers["content-type"] if content_type: mime_type = content_type.split(";")[0] except Exception: - logging.info(f"Failed to parse content type from headers {response.headers}") + logging.info( + f"Failed to parse content type from headers {response.headers}" + ) return (content, mime_type) -def cleanup(soup:BeautifulSoup): + +def cleanup(soup: BeautifulSoup): """Remove all elements that are irrelevant to the textual representation of a web page. This includes images, extra data, even links as there is no intention to navigate from that page. """ @@ -105,9 +118,7 @@ def cleanup(soup:BeautifulSoup): return soup - -def html_to_markdown(page_source:str): - +def html_to_markdown(page_source: str): soup = BeautifulSoup(page_source, "html.parser") soup = cleanup(soup) page_source = str(soup) @@ -156,7 +167,6 @@ def __init__(self): ) def invoke(self, params: Any) -> str: - url: str = params["url"] content, mime_type = scrape(url) @@ -185,6 +195,8 @@ def __init__(self): icon_url="https://platform.robusta.dev/demos/internet-access.svg", prerequisites=[], tools=[FetchWebpage()], - tags=[ToolsetTag.CORE,], - is_default=True + tags=[ + ToolsetTag.CORE, + ], + is_default=True, ) diff --git a/holmes/plugins/toolsets/kubernetes.yaml b/holmes/plugins/toolsets/kubernetes.yaml index 3194805..33b8190 100644 --- a/holmes/plugins/toolsets/kubernetes.yaml +++ b/holmes/plugins/toolsets/kubernetes.yaml @@ -10,11 +10,11 @@ toolsets: tools: - name: "kubectl_describe" - description: > - Run kubectl describe -n , - call this when users ask for description, + description: > + Run kubectl describe -n , + call this when users ask for description, for example when a user asks - - 'describe pod xyz-123' + - 'describe pod xyz-123' - 'show service xyz-123 in namespace my-ns' command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}" @@ -29,7 +29,7 @@ toolsets: - name: "kubectl_get_by_kind_in_cluster" description: "Run `kubectl get -A --show-labels` to get all resources of a given type in the cluster" command: "kubectl get -A --show-labels -o wide {{ kind }}" - + - name: "kubectl_find_resource" description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels" command: "kubectl get -A --show-labels -o wide {{ kind }} | grep {{ keyword }}" @@ -41,7 +41,7 @@ toolsets: - name: "kubectl_events" description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment, 'job'', 'node', etc." command: "kubectl events --for {{resource_type}}/{{ pod_name }} -n {{ namespace }}" - + - name: "kubectl_memory_requests_all_namespaces" description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly." command: | @@ -125,12 +125,12 @@ toolsets: } print namespace, name, sum_memory(requests) " Mi"; }' | sort -k3 -nr - + - name: "kubernetes_jq_query" - description: > + description: > Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not) command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }} - + # NOTE: this is only possible for probes with a healthz endpoint - we do this to avoid giving the LLM generic # http GET capabilities which are more powerful than we want to expose diff --git a/holmes/plugins/toolsets/opensearch.py b/holmes/plugins/toolsets/opensearch.py index 84d4c73..5e824c5 100644 --- a/holmes/plugins/toolsets/opensearch.py +++ b/holmes/plugins/toolsets/opensearch.py @@ -1,7 +1,7 @@ import logging from typing import Any, Dict, List, Optional -from pydantic import ConfigDict +from pydantic import BaseModel, ConfigDict from holmes.core.tools import ( CallablePrerequisite, Tool, @@ -12,18 +12,45 @@ from opensearchpy import OpenSearch +class OpenSearchHttpAuth(BaseModel): + username: str + password: str + + +class OpenSearchHost(BaseModel): + host: str + port: int = 9200 + + +class OpenSearchCluster(BaseModel): + hosts: list[OpenSearchHost] + headers: Optional[dict[str, Any]] = None + use_ssl: bool = True + ssl_assert_hostname: bool = False + verify_certs: bool = False + ssl_show_warn: bool = False + http_auth: Optional[OpenSearchHttpAuth] = None + + +class OpenSearchConfig(BaseModel): + opensearch_clusters: list[OpenSearchCluster] + + class OpenSearchClient: def __init__(self, **kwargs): - # Handle http_auth explicitly if "http_auth" in kwargs: http_auth = kwargs.pop("http_auth") if isinstance(http_auth, dict): - kwargs["http_auth"] = (http_auth.get("username"), http_auth.get("password")) + kwargs["http_auth"] = ( + http_auth.get("username"), + http_auth.get("password"), + ) # Initialize OpenSearch client self.client = OpenSearch(**kwargs) -def get_client(clients:List[OpenSearchClient], host:Optional[str]): + +def get_client(clients: List[OpenSearchClient], host: Optional[str]): if len(clients) == 1: return clients[0] @@ -46,7 +73,6 @@ class BaseOpenSearchTool(Tool): class ListShards(BaseOpenSearchTool): - def __init__(self, toolset: "OpenSearchToolset"): super().__init__( name="opensearch_list_shards", @@ -71,7 +97,6 @@ def get_parameterized_one_liner(self, params: Dict) -> str: class GetClusterSettings(BaseOpenSearchTool): - def __init__(self, toolset: "OpenSearchToolset"): super().__init__( name="opensearch_get_cluster_settings", @@ -98,7 +123,6 @@ def get_parameterized_one_liner(self, params) -> str: class GetClusterHealth(BaseOpenSearchTool): - def __init__(self, toolset: "OpenSearchToolset"): super().__init__( name="opensearch_get_cluster_health", @@ -127,13 +151,12 @@ class OpenSearchToolset(Toolset): clients: List[OpenSearchClient] = [] def __init__(self): - super().__init__( name="opensearch", enabled=False, description="Provide cluster metadata information like health, shards, settings.", docs_url="https://opensearch.org/docs/latest/clients/python-low-level/", - icon_url="https://upload.wikimedia.org/wikipedia/commons/9/91/Opensearch_Logo.svg", + icon_url="https://opensearch.org/assets/brand/PNG/Mark/opensearch_mark_default.png", prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)], tools=[ ListShards(self), @@ -143,21 +166,40 @@ def __init__(self): tags=[ ToolsetTag.CORE, ], - is_default=False, + is_default=True, ) def prerequisites_callable(self, config: dict[str, Any]) -> bool: if not config: return False - clusters_configs: list[dict[str, Any]] = config.get("opensearch_clusters", []) - for cluster in clusters_configs: - try: - logging.info(f"Setting up OpenSearch client") - client = OpenSearchClient(**cluster) - if client.client.cluster.health(params={"timeout": 5}): - self.clients.append(client) - except Exception: - logging.exception("Failed to set up opensearch client") - - return len(self.clients) > 0 + try: + os_config = OpenSearchConfig(**config) + + for cluster in os_config.opensearch_clusters: + try: + logging.info("Setting up OpenSearch client") + cluster_kwargs = cluster.model_dump() + client = OpenSearchClient(**cluster_kwargs) + if client.client.cluster.health(params={"timeout": 5}): + self.clients.append(client) + except Exception: + logging.exception("Failed to set up opensearch client") + + return len(self.clients) > 0 + except Exception: + logging.exception("Failed to set up grafana toolset") + return False + + def get_example_config(self) -> Dict[str, Any]: + example_config = OpenSearchConfig( + opensearch_clusters=[ + OpenSearchCluster( + hosts=[OpenSearchHost(host="YOUR OPENSEACH HOST")], + headers={"Authorization": "{{ env.OPENSEARCH_BEARER_TOKEN }}"}, + use_ssl=True, + ssl_assert_hostname=False, + ) + ] + ) + return example_config.model_dump() diff --git a/holmes/plugins/utils.py b/holmes/plugins/utils.py index d381be3..4a56bcf 100644 --- a/holmes/plugins/utils.py +++ b/holmes/plugins/utils.py @@ -1,6 +1,7 @@ # this file contains utilities that plugin writers are likely to use - not utilities that are only relevant for core from typing import Dict + def dict_to_markdown(items: Dict[str, str]) -> str: if not items: return "" @@ -10,4 +11,4 @@ def dict_to_markdown(items: Dict[str, str]) -> str: # TODO: if v is a url, linkify it text += f"• *{k}*: {v}\n" - return text \ No newline at end of file + return text diff --git a/holmes/utils/default_toolset_installation_guide.jinja2 b/holmes/utils/default_toolset_installation_guide.jinja2 index 1cb32dd..d8c8775 100644 --- a/holmes/utils/default_toolset_installation_guide.jinja2 +++ b/holmes/utils/default_toolset_installation_guide.jinja2 @@ -1,5 +1,5 @@ {% if enabled %} -This integration is enabled by default. +This integration is enabled by default. If you would like to disable this toolset (not recommended), you need to update the `generated_values.yaml` configuration. @@ -25,6 +25,10 @@ holmes: toolsets: {{toolset_name}}: enabled: true + {% if example_config %} + config: + {{ example_config | indent(8) }} + {% endif %} ``` {% endif %} @@ -33,4 +37,4 @@ And deploy the updated configuration using Helm: ```bash helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName= -``` \ No newline at end of file +``` diff --git a/holmes/utils/definitions.py b/holmes/utils/definitions.py index b316eb3..61403f2 100644 --- a/holmes/utils/definitions.py +++ b/holmes/utils/definitions.py @@ -7,4 +7,4 @@ class RobustaConfig(BaseModel): sinks_config: List[Dict[str, Dict]] - global_config: dict \ No newline at end of file + global_config: dict diff --git a/holmes/utils/file_utils.py b/holmes/utils/file_utils.py index 5847942..ea4b9e6 100644 --- a/holmes/utils/file_utils.py +++ b/holmes/utils/file_utils.py @@ -8,8 +8,8 @@ def write_json_file(json_output_file: str, json_ob_to_dump): dirname = os.path.dirname(json_output_file) if dirname: os.makedirs(dirname, exist_ok=True) - with open(json_output_file , 'w' , encoding='utf-8') as f: + with open(json_output_file, "w", encoding="utf-8") as f: json.dump(json_ob_to_dump, f, ensure_ascii=False, indent=4, default=str) - except Exception as e: - logging.exception(f"Failed to create the json file.") + except Exception: + logging.exception("Failed to create the json file.") return diff --git a/holmes/utils/global_instructions.py b/holmes/utils/global_instructions.py index 226a060..d4d8187 100644 --- a/holmes/utils/global_instructions.py +++ b/holmes/utils/global_instructions.py @@ -2,7 +2,13 @@ from holmes.core.tool_calling_llm import Instructions -def add_global_instructions_to_user_prompt(user_prompt: str, global_instructions: Optional[Instructions]) -> str: - if global_instructions and global_instructions.instructions and len(global_instructions.instructions[0]) > 0: +def add_global_instructions_to_user_prompt( + user_prompt: str, global_instructions: Optional[Instructions] +) -> str: + if ( + global_instructions + and global_instructions.instructions + and len(global_instructions.instructions[0]) > 0 + ): user_prompt += f"\n\nGlobal Instructions (use only if relevant): {global_instructions.instructions[0]}\n" return user_prompt diff --git a/holmes/utils/holmes_status.py b/holmes/utils/holmes_status.py index 60733a9..ad17b3f 100644 --- a/holmes/utils/holmes_status.py +++ b/holmes/utils/holmes_status.py @@ -6,11 +6,13 @@ def update_holmes_status_in_db(dal: SupabaseDal, config: Config): logging.info("Updating status of holmes") - + if not config.cluster_name: - raise Exception("Cluster name is missing in the configuration. Please ensure 'CLUSTER_NAME' is defined in the environment variables, " - "or verify that a cluster name is provided in the Robusta configuration file.") - + raise Exception( + "Cluster name is missing in the configuration. Please ensure 'CLUSTER_NAME' is defined in the environment variables, " + "or verify that a cluster name is provided in the Robusta configuration file." + ) + dal.upsert_holmes_status( { "cluster_id": config.cluster_name, diff --git a/holmes/utils/holmes_sync_toolsets.py b/holmes/utils/holmes_sync_toolsets.py index a485b03..340f199 100644 --- a/holmes/utils/holmes_sync_toolsets.py +++ b/holmes/utils/holmes_sync_toolsets.py @@ -1,4 +1,7 @@ from datetime import datetime +from typing import Any + +import yaml from holmes.config import Config @@ -39,7 +42,7 @@ def holmes_sync_toolsets_status(dal: SupabaseDal, config: Config) -> None: account_id=dal.account_id, status=toolset.get_status(), error=toolset.get_error(), - updated_at=updated_at + updated_at=updated_at, ).model_dump(exclude_none=True) ) dal.sync_toolsets(db_toolsets, config.cluster_name) @@ -47,19 +50,17 @@ def holmes_sync_toolsets_status(dal: SupabaseDal, config: Config) -> None: def render_default_installation_instructions_for_toolset(toolset: Toolset) -> str: env_vars = toolset.get_environment_variables() - context = { + context: dict[str, Any] = { "env_vars": env_vars if env_vars else [], "toolset_name": toolset.name, "enabled": toolset.enabled, - "default_toolset": toolset.is_default, + "example_config": yaml.dump(toolset.get_example_config()), } - if toolset.is_default: - installation_instructions = load_and_render_prompt( - "file://holmes/utils/default_toolset_installation_guide.jinja2", context - ) - return installation_instructions - installation_instructions = load_and_render_prompt( - "file://holmes/utils/installation_guide.jinja2", context + template = ( + "file://holmes/utils/default_toolset_installation_guide.jinja2" + if toolset.is_default + else "file://holmes/utils/installation_guide.jinja2" ) + installation_instructions = load_and_render_prompt(template, context) return installation_instructions diff --git a/holmes/utils/installation_guide.jinja2 b/holmes/utils/installation_guide.jinja2 index bbb8c0e..c1e4393 100644 --- a/holmes/utils/installation_guide.jinja2 +++ b/holmes/utils/installation_guide.jinja2 @@ -22,4 +22,4 @@ holmes: # Add other configurations as needed tools: # Define the tools included in this toolset -``` \ No newline at end of file +``` diff --git a/holmes/utils/markdown_utils.py b/holmes/utils/markdown_utils.py index d29aa16..3f43eaa 100644 --- a/holmes/utils/markdown_utils.py +++ b/holmes/utils/markdown_utils.py @@ -42,13 +42,14 @@ def to_plain_text(element): class PlainTextExtension(Extension): - def extendMarkdown(self, md): - md.serializer = to_plain_text + def extendMarkdown(self, md): + md.serializer = to_plain_text md.stripTopLevelTags = False # Extention register actually runs before the format is set and it ends up rewriting serializer that we have just changed md.set_output_format = lambda x: x + def markdown_to_plain_text(text): md = Markdown(extensions=[PlainTextExtension()]) - return md.convert(text) \ No newline at end of file + return md.convert(text) diff --git a/holmes/utils/pydantic_utils.py b/holmes/utils/pydantic_utils.py index 2d3c806..620985f 100644 --- a/holmes/utils/pydantic_utils.py +++ b/holmes/utils/pydantic_utils.py @@ -2,7 +2,6 @@ from typing import Any, Dict, List, Tuple, Type, Union, Annotated import typer -import yaml from benedict import benedict from pydantic import BaseModel, ValidationError, BeforeValidator, ConfigDict @@ -10,8 +9,10 @@ PromptField = Annotated[str, BeforeValidator(lambda v: load_prompt(v))] + class RobustaBaseConfig(BaseModel): - model_config = ConfigDict(extra='forbid', validate_default=True) + model_config = ConfigDict(extra="forbid", validate_default=True) + def loc_to_dot_sep(loc: Tuple[Union[str, int], ...]) -> str: path = "" @@ -34,9 +35,7 @@ def convert_errors(e: ValidationError) -> List[Dict[str, Any]]: return new_errors -def load_model_from_file( - model: Type[BaseModel], file_path: str, yaml_path: str = None -): +def load_model_from_file(model: Type[BaseModel], file_path: str, yaml_path: str = None): try: contents = benedict(file_path, format="yaml") if yaml_path is not None: diff --git a/holmes/utils/robusta.py b/holmes/utils/robusta.py index 619d5d7..cc201bd 100644 --- a/holmes/utils/robusta.py +++ b/holmes/utils/robusta.py @@ -1,4 +1,3 @@ - import os from holmes.config import Config @@ -6,7 +5,7 @@ from pydantic import SecretStr -def load_robusta_api_key(dal:SupabaseDal, config:Config): +def load_robusta_api_key(dal: SupabaseDal, config: Config): if os.environ.get("ROBUSTA_AI"): account_id, token = dal.get_ai_credentials() config.api_key = SecretStr(f"{account_id} {token}") diff --git a/holmes/utils/tags.py b/holmes/utils/tags.py index 0242dcd..e71e6b8 100644 --- a/holmes/utils/tags.py +++ b/holmes/utils/tags.py @@ -1,5 +1,3 @@ - - import logging from typing import Optional from typing_extensions import Dict, List @@ -7,7 +5,8 @@ import json from copy import deepcopy -def stringify_tag(tag:Dict[str, str]) -> Optional[str]: + +def stringify_tag(tag: Dict[str, str]) -> Optional[str]: """ This serializes a dictionary into something more readable to the LLM. Although I have not seen much difference in quality of output, in theory this can help the LLM @@ -42,7 +41,8 @@ def stringify_tag(tag:Dict[str, str]) -> Optional[str]: return formatted_string -def format_tags_in_string(user_prompt:str) -> str: + +def format_tags_in_string(user_prompt: str) -> str: """ Formats the tags included in a user's message. E.g. @@ -50,7 +50,7 @@ def format_tags_in_string(user_prompt:str) -> str: -> 'how many pods are running on node my-node?' """ try: - pattern = r'<<(.*?)>>' + pattern = r"<<(.*?)>>" def replace_match(match): try: @@ -68,13 +68,13 @@ def replace_match(match): return user_prompt -def parse_messages_tags(messages:List[Dict[str, str]]) -> List[Dict[str, str]]: +def parse_messages_tags(messages: List[Dict[str, str]]) -> List[Dict[str, str]]: """ - Parses the user messages for tags and format these. - System messages and llm responses are ignored and left as-is + Parses the user messages for tags and format these. + System messages and llm responses are ignored and left as-is - This method returns a shallow copy of the messages list with the exception - of the messages that have been parsed. + This method returns a shallow copy of the messages list with the exception + of the messages that have been parsed. """ formatted_messages = [] for message in messages: @@ -85,7 +85,9 @@ def parse_messages_tags(messages:List[Dict[str, str]]) -> List[Dict[str, str]]: formatted_message = deepcopy(message) formatted_message["content"] = formatted_str formatted_messages.append(formatted_message) - logging.debug(f"Message with tags '{original_message}' formatted to '{formatted_message}'") + logging.debug( + f"Message with tags '{original_message}' formatted to '{formatted_message}'" + ) else: formatted_messages.append(message) diff --git a/loki/docker-compose.yaml b/loki/docker-compose.yaml index b8a4a75..1201e20 100644 --- a/loki/docker-compose.yaml +++ b/loki/docker-compose.yaml @@ -33,7 +33,7 @@ services: datasources: - name: Loki type: loki - access: proxy + access: proxy orgId: 1 url: http://loki:3100 basicAuth: false diff --git a/server.py b/server.py index 8df2261..a9f2a0f 100644 --- a/server.py +++ b/server.py @@ -1,3 +1,4 @@ +# ruff: noqa: E402 import os from holmes.utils.cert_utils import add_custom_certificate @@ -10,16 +11,13 @@ from holmes.core import investigation from contextlib import asynccontextmanager from holmes.utils.holmes_status import update_holmes_status_in_db -import jinja2 import logging import uvicorn import colorlog -import uuid import time from litellm.exceptions import AuthenticationError from fastapi import FastAPI, HTTPException, Request -from rich.console import Console from holmes.utils.robusta import load_robusta_api_key from holmes.common.env_vars import ( @@ -34,8 +32,8 @@ build_chat_messages, build_issue_chat_messages, handle_issue_conversation, + build_workload_health_chat_messages, ) -from holmes.core.issue import Issue from holmes.core.models import ( InvestigationResult, ConversationRequest, @@ -45,6 +43,7 @@ ChatRequest, ChatResponse, IssueChatRequest, + WorkloadHealthChatRequest, ) from holmes.plugins.prompts import load_and_render_prompt from holmes.utils.holmes_sync_toolsets import holmes_sync_toolsets_status @@ -91,6 +90,7 @@ async def lifespan(app: FastAPI): if LOG_PERFORMANCE: + @app.middleware("http") async def log_requests(request: Request, call_next): start_time = time.time() @@ -101,18 +101,19 @@ async def log_requests(request: Request, call_next): finally: process_time = int((time.time() - start_time) * 1000) - status_code = 'unknown' + status_code = "unknown" if response: status_code = response.status_code - logging.info(f"Request completed {request.method} {request.url.path} status={status_code} latency={process_time}ms") + logging.info( + f"Request completed {request.method} {request.url.path} status={status_code} latency={process_time}ms" + ) + @app.post("/api/investigate") def investigate_issues(investigate_request: InvestigateRequest): try: result = investigation.investigate_issues( - investigate_request=investigate_request, - dal=dal, - config=config + investigate_request=investigate_request, dal=dal, config=config ) return result @@ -144,10 +145,13 @@ def workload_health_check(request: WorkloadHealthRequest): request.ask = f"{request.ask}\n My instructions for the investigation '''{nl.join(instructions)}'''" global_instructions = dal.get_global_instructions_for_account() - request.ask = add_global_instructions_to_user_prompt(request.ask, global_instructions) - - system_prompt = load_and_render_prompt(request.prompt_template, context={'alerts': workload_alerts}) + request.ask = add_global_instructions_to_user_prompt( + request.ask, global_instructions + ) + system_prompt = load_and_render_prompt( + request.prompt_template, context={"alerts": workload_alerts} + ) ai = config.create_toolcalling_llm(dal=dal) @@ -165,6 +169,29 @@ def workload_health_check(request: WorkloadHealthRequest): raise HTTPException(status_code=401, detail=e.message) +@app.post("/api/workload_health_chat") +def workload_health_conversation( + workload_health_chat_request: WorkloadHealthChatRequest, +): + try: + load_robusta_api_key(dal=dal, config=config) + ai = config.create_toolcalling_llm(dal=dal) + global_instructions = dal.get_global_instructions_for_account() + + messages = build_workload_health_chat_messages( + workload_health_chat_request, ai, global_instructions + ) + llm_call = ai.messages_call(messages=messages) + + return ChatResponse( + analysis=llm_call.result, + tool_calls=llm_call.tool_calls, + conversation_history=llm_call.messages, + ) + except AuthenticationError as e: + raise HTTPException(status_code=401, detail=e.message) + + # older api that does not support conversation history @app.post("/api/conversation") def issue_conversation_deprecated(conversation_request: ConversationRequest): @@ -191,7 +218,9 @@ def issue_conversation(issue_chat_request: IssueChatRequest): ai = config.create_toolcalling_llm(dal=dal) global_instructions = dal.get_global_instructions_for_account() - messages = build_issue_chat_messages(issue_chat_request, ai, global_instructions) + messages = build_issue_chat_messages( + issue_chat_request, ai, global_instructions + ) llm_call = ai.messages_call(messages=messages) return ChatResponse( @@ -212,7 +241,10 @@ def chat(chat_request: ChatRequest): global_instructions = dal.get_global_instructions_for_account() messages = build_chat_messages( - chat_request.ask, chat_request.conversation_history, ai=ai, global_instructions=global_instructions + chat_request.ask, + chat_request.conversation_history, + ai=ai, + global_instructions=global_instructions, ) llm_call = ai.messages_call(messages=messages) @@ -232,6 +264,10 @@ def get_model(): if __name__ == "__main__": log_config = uvicorn.config.LOGGING_CONFIG - log_config["formatters"]["access"]["fmt"] = "%(asctime)s %(levelname)-8s %(message)s" - log_config["formatters"]["default"]["fmt"] = "%(asctime)s %(levelname)-8s %(message)s" + log_config["formatters"]["access"]["fmt"] = ( + "%(asctime)s %(levelname)-8s %(message)s" + ) + log_config["formatters"]["default"]["fmt"] = ( + "%(asctime)s %(levelname)-8s %(message)s" + ) uvicorn.run(app, host=HOLMES_HOST, port=HOLMES_PORT, log_config=log_config) diff --git a/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/Dockerfile b/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/Dockerfile index 754163c..f932d01 100644 --- a/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/Dockerfile +++ b/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/Dockerfile @@ -17,4 +17,3 @@ EXPOSE 8000 8001 # Run the FastAPI app CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] - diff --git a/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/app.py b/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/app.py index da618db..4a88c70 100644 --- a/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/app.py +++ b/tests/llm/fixtures/test_ask_holmes/07_high_latency/helm/app.py @@ -1,15 +1,15 @@ +# ruff: noqa: F821 import os import logging import time -from fastapi import FastAPI, Request +from fastapi import FastAPI from fastapi.responses import HTMLResponse -from sqlalchemy import create_engine, text from prometheus_fastapi_instrumentator import Instrumentator - -app = FastAPI() from random import randint from time import sleep +app = FastAPI() + # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -27,26 +27,21 @@ # Add Prometheus middleware Instrumentator().instrument(app).expose(app) + def check_promotional_notifications(): - logger.info("Connecting to promotions database to see if we should try to upsell user") + logger.info( + "Connecting to promotions database to see if we should try to upsell user" + ) try: logger.info(f"Connecting to database at {DB_HOST}") start_time = time.time() logger.info(f"Fetching data using stored procedure: {STORED_PROCEDURE}") # Execute the stored procedure # - sleep(randint(5,10)) + sleep(randint(5, 10)) # Fetch the result - result = [ - ( - True, - { - "type": "notification", - "discount": "$15" - } - ) - ] + result = [(True, {"type": "notification", "discount": "$15"})] end_time = time.time() logger.info(f"Database call completed in {end_time - start_time:.2f} seconds.") for row in result: @@ -57,6 +52,7 @@ def check_promotional_notifications(): logger.error(f"Error checking for promotions: {e}") return False + @app.get("/", response_class=HTMLResponse) def read_root(): logger.info("Received request for checkout page.") @@ -76,6 +72,7 @@ def read_root(): """ + if __name__ == "__main__": # Start Prometheus metrics server start_http_server(8001) diff --git a/tests/llm/fixtures/test_ask_holmes/07_high_latency/kubectl_describe_pod.txt b/tests/llm/fixtures/test_ask_holmes/07_high_latency/kubectl_describe_pod.txt index 46bda31..d1350a2 100644 --- a/tests/llm/fixtures/test_ask_holmes/07_high_latency/kubectl_describe_pod.txt +++ b/tests/llm/fixtures/test_ask_holmes/07_high_latency/kubectl_describe_pod.txt @@ -47,11 +47,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-r6cdm (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready True - ContainersReady True - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready True + ContainersReady True + PodScheduled True Volumes: kube-api-access-r6cdm: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/helm/sock-shop.yaml b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/helm/sock-shop.yaml index 368f55a..ec21f74 100644 --- a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/helm/sock-shop.yaml +++ b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/helm/sock-shop.yaml @@ -900,4 +900,3 @@ spec: targetPort: 27017 selector: name: user-db - diff --git a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_describe.txt index 6941f07..e67a06a 100644 --- a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_describe.txt @@ -44,11 +44,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-7fvz6 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-7fvz6: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_previous_logs.txt b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_previous_logs.txt index 2c8269a..9510d5e 100644 --- a/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_previous_logs.txt +++ b/tests/llm/fixtures/test_ask_holmes/08_sock_shop_frontend/kubectl_previous_logs.txt @@ -45,7 +45,7 @@ npm ERR! npm v2.15.11 npm ERR! code ELIFECYCLE npm ERR! microservices-demo-front-end@0.0.1 start: `node server.js` npm ERR! Exit status 1 -npm ERR! +npm ERR! npm ERR! Failed at the microservices-demo-front-end@0.0.1 start script 'node server.js'. npm ERR! This is most likely a problem with the microservices-demo-front-end package, npm ERR! not with npm itself. @@ -54,7 +54,7 @@ npm ERR! node server.js npm ERR! You can get information on how to open an issue for this project with: npm ERR! npm bugs microservices-demo-front-end npm ERR! Or if that isn't available, you can get their info via: -npm ERR! +npm ERR! npm ERR! npm owner ls microservices-demo-front-end npm ERR! There is likely additional logging output above. npm ERR! Linux 6.1.0-30-amd64 @@ -69,7 +69,7 @@ npm ERR! syscall open npm ERR! rofs EROFS: read-only file system, open 'npm-debug.log.896050243' npm ERR! rofs This is most likely not a problem with npm itself npm ERR! rofs and is related to the file system being read-only. -npm ERR! rofs +npm ERR! rofs npm ERR! rofs Often virtualized file systems, or other file systems npm ERR! rofs that don't support symlinks, give this error. diff --git a/tests/llm/fixtures/test_ask_holmes/09_crashpod/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/09_crashpod/kubectl_describe.txt index fef4813..9aa9184 100644 --- a/tests/llm/fixtures/test_ask_holmes/09_crashpod/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/09_crashpod/kubectl_describe.txt @@ -40,11 +40,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pplcf (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-pplcf: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/kubectl_describe.txt index ce04118..056a9bd 100644 --- a/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/10_image_pull_backoff/kubectl_describe.txt @@ -17,9 +17,9 @@ IPs: Controlled By: ReplicaSet/customer-relations-webapp-7c67c65579 Containers: crw-main-container: - Container ID: + Container ID: Image: yourcompany/crw:latest - Image ID: + Image ID: Port: Host Port: State: Waiting @@ -31,11 +31,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-c6ztg (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-c6ztg: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/11_init_containers/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/11_init_containers/kubectl_describe.txt index a207813..b458d6a 100644 --- a/tests/llm/fixtures/test_ask_holmes/11_init_containers/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/11_init_containers/kubectl_describe.txt @@ -40,9 +40,9 @@ Init Containers: /work-dir from workdir (rw) Containers: exporter: - Container ID: + Container ID: Image: nginx - Image ID: + Image ID: Port: 80/TCP Host Port: 0/TCP State: Waiting @@ -55,15 +55,15 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-k7gpf (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized False - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized False + Ready False + ContainersReady False + PodScheduled True Volumes: workdir: Type: EmptyDir (a temporary directory that shares a pod's lifetime) - Medium: + Medium: SizeLimit: kube-api-access-k7gpf: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/kubectl_describe.txt index 1e04b8d..7d7c024 100644 --- a/tests/llm/fixtures/test_ask_holmes/12_job_crashing/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/12_job_crashing/kubectl_describe.txt @@ -29,10 +29,10 @@ Pod Template: /bin/sh -c Args: - echo 'Java Network Exception: - All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 - All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 - All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 + echo 'Java Network Exception: + All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 + All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 + All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256 All host(s) tried for db query failed (tried: prod-db:3333) - no available connection and the queue has reached its max size 256'; sleep 60; exit 1 Environment: Mounts: diff --git a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/kubectl_describe.txt index 29daf8b..16c5ef3 100644 --- a/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/13_pending_node_selector/kubectl_describe.txt @@ -8,7 +8,7 @@ Node: Labels: Annotations: Status: Pending -IP: +IP: IPs: Containers: nginx: @@ -20,7 +20,7 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pk5lt (ro) Conditions: Type Status - PodScheduled False + PodScheduled False Volumes: kube-api-access-pk5lt: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/kubectl_describe_pod.txt b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/kubectl_describe_pod.txt index 77f4d55..d431385 100644 --- a/tests/llm/fixtures/test_ask_holmes/14_pending_resources/kubectl_describe_pod.txt +++ b/tests/llm/fixtures/test_ask_holmes/14_pending_resources/kubectl_describe_pod.txt @@ -9,7 +9,7 @@ Labels: app=user-profile-resources pod-template-hash=659d4dd659 Annotations: Status: Pending -IP: +IP: IPs: Controlled By: ReplicaSet/user-profile-resources-659d4dd659 Containers: @@ -30,7 +30,7 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-7hdk7 (ro) Conditions: Type Status - PodScheduled False + PodScheduled False Volumes: kube-api-access-7hdk7: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/kubectl_describe.txt index f4dfd91..b28c00e 100644 --- a/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/15_failed_readiness_probe/kubectl_describe.txt @@ -33,11 +33,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-zwx78 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-zwx78: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/fast_oom_deployment.yaml b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/fast_oom_deployment.yaml index f805781..b47cbd8 100644 --- a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/fast_oom_deployment.yaml +++ b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/fast_oom_deployment.yaml @@ -29,4 +29,4 @@ spec: memory: 100Mi restartPolicy: Always nodeSelector: - kubernetes.io/arch: amd64 \ No newline at end of file + kubernetes.io/arch: amd64 diff --git a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/kubectl_describe.txt index 7de4f2a..1f26f01 100644 --- a/tests/llm/fixtures/test_ask_holmes/17_oom_kill/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/17_oom_kill/kubectl_describe.txt @@ -45,11 +45,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-2dkh7 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-2dkh7: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/kubectl_describe.txt index 22cc01e..79db5fe 100644 --- a/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/18_crash_looping_v2/kubectl_describe.txt @@ -33,11 +33,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-d879h (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready True - ContainersReady True - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready True + ContainersReady True + PodScheduled True Volumes: cert-volume: Type: ConfigMap (a volume populated by a ConfigMap) @@ -49,7 +49,7 @@ Volumes: Optional: false writable-certs: Type: EmptyDir (a temporary directory that shares a pod's lifetime) - Medium: + Medium: SizeLimit: kube-api-access-d879h: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/job-service-account-event/get-data-service-account.yaml b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/job-service-account-event/get-data-service-account.yaml index bcd920f..f34deab 100644 --- a/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/job-service-account-event/get-data-service-account.yaml +++ b/tests/llm/fixtures/test_ask_holmes/21_job_fail_curl_no_svc_account/job-service-account-event/get-data-service-account.yaml @@ -2,4 +2,4 @@ apiVersion: v1 kind: ServiceAccount metadata: name: get-data-service-account - namespace: default \ No newline at end of file + namespace: default diff --git a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/kubectl_describe_pod.txt b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/kubectl_describe_pod.txt index b4ada63..ee719f9 100644 --- a/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/kubectl_describe_pod.txt +++ b/tests/llm/fixtures/test_ask_holmes/23_app_error_in_current_logs/kubectl_describe_pod.txt @@ -32,11 +32,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-df7m2 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready True - ContainersReady True - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready True + ContainersReady True + PodScheduled True Volumes: kube-api-access-df7m2: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/kubectl_describe.txt index 835bb88..b67a8e0 100644 --- a/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/24_misconfigured_pvc/kubectl_describe.txt @@ -9,7 +9,7 @@ Labels: app=redis pod-template-hash=747ffc844f Annotations: Status: Pending -IP: +IP: IPs: Controlled By: ReplicaSet/redis-747ffc844f Containers: @@ -38,7 +38,7 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-pnp44 (ro) Conditions: Type Status - PodScheduled False + PodScheduled False Volumes: redis-storage: Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace) diff --git a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/kubectl_describe.txt b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/kubectl_describe.txt index 1c807d1..d9d0c51 100644 --- a/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/kubectl_describe.txt +++ b/tests/llm/fixtures/test_ask_holmes/25_misconfigured_ingress_class/kubectl_describe.txt @@ -3,13 +3,13 @@ stdout: Name: my-http-ingress Labels: Namespace: default -Address: +Address: Ingress Class: example-ingress-class Default backend: Rules: Host Path Backends ---- ---- -------- - app.example.com + app.example.com / my-http-service:80 (10.244.0.194:8080) Annotations: Events: @@ -35,11 +35,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-68j26 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready True - ContainersReady True - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready True + ContainersReady True + PodScheduled True Volumes: kube-api-access-68j26: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_ask_holmes/27_permissions_error_no_helm_tools/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/27_permissions_error_no_helm_tools/test_case.yaml index fbcbd32..a98d289 100644 --- a/tests/llm/fixtures/test_ask_holmes/27_permissions_error_no_helm_tools/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/27_permissions_error_no_helm_tools/test_case.yaml @@ -1,6 +1,6 @@ user_prompt: "is there any kind of these resources on cluster kind: MyResource metadata: name: my-resource-instance" -expected_output: - - Modify the generated_values.yaml file to include the missing permissions +expected_output: + - Modify the generated_values.yaml file to include the missing permissions - helm upgrade robusta/robusta --values=generated_values.yaml --set clusterName= before_test: kubectl apply -f./custom_resources.yaml after_test: kubectl delete -f./custom_resources.yaml diff --git a/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/helm_list.txt b/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/helm_list.txt index eb552f3..4f246fe 100644 --- a/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/helm_list.txt +++ b/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/helm_list.txt @@ -1,6 +1,6 @@ {"toolset_name":"helm/core","tool_name":"helm_list"} stdout: NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION -robusta default 135 2025-01-06 15:47:11.45987258 +0100 +0100 deployed robusta-0.0.1 0.0.0 +robusta default 135 2025-01-06 15:47:11.45987258 +0100 +0100 deployed robusta-0.0.1 0.0.0 -stderr: \ No newline at end of file +stderr: diff --git a/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/test_case.yaml b/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/test_case.yaml index 2658f92..344b054 100644 --- a/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/test_case.yaml +++ b/tests/llm/fixtures/test_ask_holmes/28_permissions_error_helm_tools_enabled/test_case.yaml @@ -1,5 +1,5 @@ user_prompt: "is there any kind of these resources on cluster kind: MyResource metadata: name: my-resource-instance" -expected_output: +expected_output: - To resolve this, update your configuration by adding the necessary permissions - helm upgrade robusta robusta/robusta --values=generated_values.yaml --set clusterName=test-cluster before_test: kubectl apply -f./custom_resources.yaml diff --git a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/kubectl_describe.txt index 06ba10e..e9dbb24 100644 --- a/tests/llm/fixtures/test_investigate/02_crashloop_backoff/kubectl_describe.txt +++ b/tests/llm/fixtures/test_investigate/02_crashloop_backoff/kubectl_describe.txt @@ -40,9 +40,9 @@ Init Containers: /work-dir from workdir (rw) Containers: exporter: - Container ID: + Container ID: Image: nginx - Image ID: + Image ID: Port: 80/TCP Host Port: 0/TCP State: Waiting @@ -55,15 +55,15 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-79tv5 (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized False - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers True + Initialized False + Ready False + ContainersReady False + PodScheduled True Volumes: workdir: Type: EmptyDir (a temporary directory that shares a pod's lifetime) - Medium: + Medium: SizeLimit: kube-api-access-79tv5: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_investigate/03_cpu_throttling/issue_data.json b/tests/llm/fixtures/test_investigate/03_cpu_throttling/issue_data.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/03_cpu_throttling/issue_data.json +++ b/tests/llm/fixtures/test_investigate/03_cpu_throttling/issue_data.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/03_cpu_throttling/resource_instructions.json b/tests/llm/fixtures/test_investigate/03_cpu_throttling/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/03_cpu_throttling/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/03_cpu_throttling/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/resource_instructions.json b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/04_image_pull_backoff/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/04_image_pull_backoff/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/05_crashpod/issue_data.json b/tests/llm/fixtures/test_investigate/05_crashpod/issue_data.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/05_crashpod/issue_data.json +++ b/tests/llm/fixtures/test_investigate/05_crashpod/issue_data.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/05_crashpod/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/05_crashpod/kubectl_describe.txt index d2a5c75..41cd21c 100644 --- a/tests/llm/fixtures/test_investigate/05_crashpod/kubectl_describe.txt +++ b/tests/llm/fixtures/test_investigate/05_crashpod/kubectl_describe.txt @@ -38,11 +38,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-d879h (ro) Conditions: Type Status - PodReadyToStartContainers True - Initialized True - Ready True - ContainersReady True - PodScheduled True + PodReadyToStartContainers True + Initialized True + Ready True + ContainersReady True + PodScheduled True Volumes: cert-volume: Type: ConfigMap (a volume populated by a ConfigMap) @@ -54,7 +54,7 @@ Volumes: Optional: false writable-certs: Type: EmptyDir (a temporary directory that shares a pod's lifetime) - Medium: + Medium: SizeLimit: kube-api-access-d879h: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_investigate/05_crashpod/resource_instructions.json b/tests/llm/fixtures/test_investigate/05_crashpod/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/05_crashpod/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/05_crashpod/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/06_job_failure/resource_instructions.json b/tests/llm/fixtures/test_investigate/06_job_failure/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/06_job_failure/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/06_job_failure/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/07_job_syntax_error/issue_data.json b/tests/llm/fixtures/test_investigate/07_job_syntax_error/issue_data.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/07_job_syntax_error/issue_data.json +++ b/tests/llm/fixtures/test_investigate/07_job_syntax_error/issue_data.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/07_job_syntax_error/kubectl_describe_pod.txt b/tests/llm/fixtures/test_investigate/07_job_syntax_error/kubectl_describe_pod.txt index 8e4e446..e0bc4e4 100644 --- a/tests/llm/fixtures/test_investigate/07_job_syntax_error/kubectl_describe_pod.txt +++ b/tests/llm/fixtures/test_investigate/07_job_syntax_error/kubectl_describe_pod.txt @@ -40,11 +40,11 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-tjb7n (ro) Conditions: Type Status - PodReadyToStartContainers False - Initialized True - Ready False - ContainersReady False - PodScheduled True + PodReadyToStartContainers False + Initialized True + Ready False + ContainersReady False + PodScheduled True Volumes: kube-api-access-tjb7n: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_investigate/07_job_syntax_error/resource_instructions.json b/tests/llm/fixtures/test_investigate/07_job_syntax_error/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/07_job_syntax_error/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/07_job_syntax_error/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/08_memory_pressure/issue_data.json b/tests/llm/fixtures/test_investigate/08_memory_pressure/issue_data.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/08_memory_pressure/issue_data.json +++ b/tests/llm/fixtures/test_investigate/08_memory_pressure/issue_data.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/08_memory_pressure/resource_instructions.json b/tests/llm/fixtures/test_investigate/08_memory_pressure/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/08_memory_pressure/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/08_memory_pressure/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/helm/Dockerfile b/tests/llm/fixtures/test_investigate/09_high_latency/helm/Dockerfile index 754163c..f932d01 100644 --- a/tests/llm/fixtures/test_investigate/09_high_latency/helm/Dockerfile +++ b/tests/llm/fixtures/test_investigate/09_high_latency/helm/Dockerfile @@ -17,4 +17,3 @@ EXPOSE 8000 8001 # Run the FastAPI app CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"] - diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/helm/app.py b/tests/llm/fixtures/test_investigate/09_high_latency/helm/app.py index 834709f..ef742d7 100644 --- a/tests/llm/fixtures/test_investigate/09_high_latency/helm/app.py +++ b/tests/llm/fixtures/test_investigate/09_high_latency/helm/app.py @@ -1,12 +1,14 @@ +# ruff: noqa: F821 import logging import time +from random import randint +from time import sleep + from fastapi import FastAPI from fastapi.responses import HTMLResponse from prometheus_fastapi_instrumentator import Instrumentator app = FastAPI() -from random import randint -from time import sleep # Configure logging logging.basicConfig(level=logging.INFO) @@ -18,24 +20,19 @@ # Add Prometheus middleware Instrumentator().instrument(app).expose(app) + def check_promotional_notifications(): - logger.info("Connecting to promotions database to see if we should try to upsell user") + logger.info( + "Connecting to promotions database to see if we should try to upsell user" + ) try: logger.info("Successfully connected to database") start_time = time.time() logger.info(f"Fetching data using stored procedure: {STORED_PROCEDURE}") - sleep(randint(5,10)) + sleep(randint(5, 10)) - result = [ - ( - True, - { - "type": "notification", - "discount": f"${randint(6,50)}" - } - ) - ] + result = [(True, {"type": "notification", "discount": f"${randint(6,50)}"})] end_time = time.time() logger.info(f"Database call completed in {end_time - start_time:.2f} seconds.") for row in result: @@ -46,6 +43,7 @@ def check_promotional_notifications(): logger.error(f"Error checking for promotions: {e}") return False + @app.get("/", response_class=HTMLResponse) def read_root(): logger.info("Received request for checkout page.") @@ -65,6 +63,7 @@ def read_root(): """ + if __name__ == "__main__": # Start Prometheus metrics server start_http_server(8001) diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/kubectl_logs_all_containers.txt b/tests/llm/fixtures/test_investigate/09_high_latency/kubectl_logs_all_containers.txt index ac9f6c3..4ef059d 100644 --- a/tests/llm/fixtures/test_investigate/09_high_latency/kubectl_logs_all_containers.txt +++ b/tests/llm/fixtures/test_investigate/09_high_latency/kubectl_logs_all_containers.txt @@ -95,7 +95,7 @@ INFO: 10.244.0.8:52554 - "GET /metrics HTTP/1.1" 200 OK

Promotions: True

- + Checkout Status @@ -105,7 +105,7 @@ INFO: 10.244.0.8:52554 - "GET /metrics HTTP/1.1" 200 OK

Promotions: True

- + Checkout Status @@ -115,7 +115,7 @@ INFO: 10.244.0.8:52554 - "GET /metrics HTTP/1.1" 200 OK

Promotions: True

- + Checkout Status @@ -125,7 +125,7 @@ INFO: 10.244.0.8:52554 - "GET /metrics HTTP/1.1" 200 OK

Promotions: True

- + Checkout Status diff --git a/tests/llm/fixtures/test_investigate/09_high_latency/resource_instructions.json b/tests/llm/fixtures/test_investigate/09_high_latency/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/09_high_latency/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/09_high_latency/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt index 4ab888a..2d6c1c5 100644 --- a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/kubectl_describe_pod.txt @@ -9,7 +9,7 @@ Labels: app=user-profile-resources pod-template-hash=659d4dd659 Annotations: Status: Pending -IP: +IP: IPs: Controlled By: ReplicaSet/user-profile-resources-659d4dd659 Containers: @@ -30,7 +30,7 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-7hdk7 (ro) Conditions: Type Status - PodScheduled False + PodScheduled False Volumes: kube-api-access-7hdk7: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json index 9e26dfe..0967ef4 100644 --- a/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json +++ b/tests/llm/fixtures/test_investigate/11_KubeDeploymentReplicasMismatch/resource_instructions.json @@ -1 +1 @@ -{} \ No newline at end of file +{} diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/fetch_webpage.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/fetch_webpage.txt index ba63953..1a6e4a5 100644 --- a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/fetch_webpage.txt +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/fetch_webpage.txt @@ -61,4 +61,3 @@ Talk with developers or read documentation about the app, ensure to define sane default values to start the app. See [Debugging Pods](https://kubernetes.io/docs/tasks/debug-application-cluster/debug-application/#debugging-pods) - diff --git a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt index d91c244..80b5d60 100644 --- a/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt +++ b/tests/llm/fixtures/test_investigate/13_KubePodNotReady/kubectl_describe.txt @@ -9,7 +9,7 @@ Labels: app=user-profile-resources pod-template-hash=659d4dd659 Annotations: Status: Pending -IP: +IP: IPs: Controlled By: ReplicaSet/user-profile-resources-659d4dd659 Containers: @@ -30,7 +30,7 @@ Containers: /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-7hdk7 (ro) Conditions: Type Status - PodScheduled False + PodScheduled False Volumes: kube-api-access-7hdk7: Type: Projected (a volume that contains injected data from multiple sources) diff --git a/tests/llm/test_ask_holmes.py b/tests/llm/test_ask_holmes.py index 5ac0776..6acdaf1 100644 --- a/tests/llm/test_ask_holmes.py +++ b/tests/llm/test_ask_holmes.py @@ -8,7 +8,7 @@ from holmes.core.tool_calling_llm import LLMResult, ToolCallingLLM from holmes.core.tools import ToolExecutor import tests.llm.utils.braintrust as braintrust_util -from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality +from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness from tests.llm.utils.commands import after_test, before_test from tests.llm.utils.constants import PROJECT from tests.llm.utils.system import readable_timestamp @@ -18,42 +18,49 @@ from tests.llm.utils.system import get_machine_state_tags from os import path -TEST_CASES_FOLDER = Path(path.abspath(path.join( - path.dirname(__file__), - "fixtures", "test_ask_holmes" -))) +TEST_CASES_FOLDER = Path( + path.abspath(path.join(path.dirname(__file__), "fixtures", "test_ask_holmes")) +) system_metadata = get_machine_state_tags() DATASET_NAME = f"ask_holmes:{system_metadata.get('branch', 'unknown_branch')}" -def get_test_cases(): +def get_test_cases(): unique_test_id = os.environ.get("PYTEST_XDIST_TESTRUNUID", readable_timestamp()) - experiment_name = f'ask_holmes:{unique_test_id}' + experiment_name = f"ask_holmes:{unique_test_id}" if os.environ.get("EXPERIMENT_ID"): experiment_name = f'ask_holmes:{os.environ.get("EXPERIMENT_ID")}' mh = MockHelper(TEST_CASES_FOLDER) - if os.environ.get('UPLOAD_DATASET') and os.environ.get('BRAINTRUST_API_KEY'): - bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME) + if os.environ.get("UPLOAD_DATASET") and os.environ.get("BRAINTRUST_API_KEY"): + bt_helper = braintrust_util.BraintrustEvalHelper( + project_name=PROJECT, dataset_name=DATASET_NAME + ) bt_helper.upload_test_cases(mh.load_test_cases()) test_cases = mh.load_ask_holmes_test_cases() return [(experiment_name, test_case) for test_case in test_cases] + def idfn(val): if isinstance(val, AskHolmesTestCase): return val.id else: return str(val) + @pytest.mark.llm -@pytest.mark.skipif(not os.environ.get('BRAINTRUST_API_KEY'), reason="BRAINTRUST_API_KEY must be set to run LLM evaluations") +@pytest.mark.skipif( + not os.environ.get("BRAINTRUST_API_KEY"), + reason="BRAINTRUST_API_KEY must be set to run LLM evaluations", +) @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn) def test_ask_holmes(experiment_name, test_case): - - bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME) + bt_helper = braintrust_util.BraintrustEvalHelper( + project_name=PROJECT, dataset_name=DATASET_NAME + ) eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) @@ -77,14 +84,18 @@ def test_ask_holmes(experiment_name, test_case): if not isinstance(expected, list): expected = [expected] - debug_expected = '\n- '.join(expected) + debug_expected = "\n- ".join(expected) print(f"** EXPECTED **\n- {debug_expected}") correctness_eval = evaluate_correctness(output=output, expected_elements=expected) - print(f"\n** CORRECTNESS **\nscore = {correctness_eval.score}\nrationale = {correctness_eval.metadata.get('rationale', '')}") + print( + f"\n** CORRECTNESS **\nscore = {correctness_eval.score}\nrationale = {correctness_eval.metadata.get('rationale', '')}" + ) scores["correctness"] = correctness_eval.score if len(test_case.retrieval_context) > 0: - scores["context"] = evaluate_context_usage(output=output, context_items=test_case.retrieval_context, input=input).score + scores["context"] = evaluate_context_usage( + output=output, context_items=test_case.retrieval_context, input=input + ).score bt_helper.end_evaluation( eval=eval, @@ -92,7 +103,7 @@ def test_ask_holmes(experiment_name, test_case): output=output or "", expected=str(expected), id=test_case.id, - scores=scores + scores=scores, ) print(f"\n** OUTPUT **\n{output}") print(f"\n** SCORES **\n{scores}") @@ -101,9 +112,10 @@ def test_ask_holmes(experiment_name, test_case): assert scores.get("correctness", 0) >= test_case.evaluation.correctness -def ask_holmes(test_case:AskHolmesTestCase) -> LLMResult: - - mock = MockToolsets(generate_mocks=test_case.generate_mocks, test_case_folder=test_case.folder) +def ask_holmes(test_case: AskHolmesTestCase) -> LLMResult: + mock = MockToolsets( + generate_mocks=test_case.generate_mocks, test_case_folder=test_case.folder + ) expected_tools = [] if not os.environ.get("RUN_LIVE"): @@ -115,12 +127,10 @@ def ask_holmes(test_case:AskHolmesTestCase) -> LLMResult: ai = ToolCallingLLM( tool_executor=tool_executor, max_steps=10, - llm=DefaultLLM(os.environ.get("MODEL", "gpt-4o")) + llm=DefaultLLM(os.environ.get("MODEL", "gpt-4o")), ) chat_request = ChatRequest(ask=test_case.user_prompt) - messages = build_chat_messages( - chat_request.ask, [], ai=ai - ) + messages = build_chat_messages(chat_request.ask, [], ai=ai) return ai.messages_call(messages=messages) diff --git a/tests/llm/test_investigate.py b/tests/llm/test_investigate.py index ec923c2..9e61c23 100644 --- a/tests/llm/test_investigate.py +++ b/tests/llm/test_investigate.py @@ -1,4 +1,3 @@ - import os from pathlib import Path from typing import Optional @@ -11,7 +10,11 @@ from holmes.core.investigation import investigate_issues from holmes.core.supabase_dal import SupabaseDal from holmes.core.tools import ToolExecutor -from tests.llm.utils.classifiers import evaluate_context_usage, evaluate_correctness, evaluate_factuality, evaluate_previous_logs_mention +from tests.llm.utils.classifiers import ( + evaluate_context_usage, + evaluate_correctness, + evaluate_previous_logs_mention, +) from tests.llm.utils.constants import PROJECT from tests.llm.utils.system import get_machine_state_tags, readable_timestamp from tests.llm.utils.mock_dal import MockSupabaseDal @@ -20,23 +23,23 @@ from os import path system_metadata = get_machine_state_tags() -TEST_CASES_FOLDER = Path(path.abspath(path.join( - path.dirname(__file__), - "fixtures", "test_investigate" -))) +TEST_CASES_FOLDER = Path( + path.abspath(path.join(path.dirname(__file__), "fixtures", "test_investigate")) +) DATASET_NAME = f"investigate:{system_metadata.get('branch', 'unknown_branch')}" + class MockConfig(Config): - def __init__(self, test_case:InvestigateTestCase): + def __init__(self, test_case: InvestigateTestCase): super().__init__() self._test_case = test_case - def create_tool_executor( - self, dal:Optional[SupabaseDal] - ) -> ToolExecutor: - - mock = MockToolsets(generate_mocks=self._test_case.generate_mocks, test_case_folder=self._test_case.folder) + def create_tool_executor(self, dal: Optional[SupabaseDal]) -> ToolExecutor: + mock = MockToolsets( + generate_mocks=self._test_case.generate_mocks, + test_case_folder=self._test_case.folder, + ) expected_tools = [] for tool_mock in self._test_case.tool_mocks: @@ -45,22 +48,25 @@ def create_tool_executor( return ToolExecutor(mock.mocked_toolsets) -def get_test_cases(): +def get_test_cases(): unique_test_id = os.environ.get("PYTEST_XDIST_TESTRUNUID", readable_timestamp()) - experiment_name = f'investigate:{unique_test_id}' + experiment_name = f"investigate:{unique_test_id}" if os.environ.get("EXPERIMENT_ID"): experiment_name = f'investigate:{os.environ.get("EXPERIMENT_ID")}' mh = MockHelper(TEST_CASES_FOLDER) - if os.environ.get('UPLOAD_DATASET') and os.environ.get('BRAINTRUST_API_KEY'): - bt_helper = braintrust_util.BraintrustEvalHelper(project_name=PROJECT, dataset_name=DATASET_NAME) + if os.environ.get("UPLOAD_DATASET") and os.environ.get("BRAINTRUST_API_KEY"): + bt_helper = braintrust_util.BraintrustEvalHelper( + project_name=PROJECT, dataset_name=DATASET_NAME + ) bt_helper.upload_test_cases(mh.load_test_cases()) test_cases = mh.load_investigate_test_cases() return [(experiment_name, test_case) for test_case in test_cases] + def idfn(val): if isinstance(val, InvestigateTestCase): return val.id @@ -69,10 +75,12 @@ def idfn(val): @pytest.mark.llm -@pytest.mark.skipif(not os.environ.get('BRAINTRUST_API_KEY'), reason="BRAINTRUST_API_KEY must be set to run LLM evaluations") +@pytest.mark.skipif( + not os.environ.get("BRAINTRUST_API_KEY"), + reason="BRAINTRUST_API_KEY must be set to run LLM evaluations", +) @pytest.mark.parametrize("experiment_name, test_case", get_test_cases(), ids=idfn) def test_investigate(experiment_name, test_case): - config = MockConfig(test_case) config.model = os.environ.get("MODEL", "gpt-4o") @@ -80,7 +88,7 @@ def test_investigate(experiment_name, test_case): test_case_folder=Path(test_case.folder), generate_mocks=test_case.generate_mocks, issue_data=test_case.issue_data, - resource_instructions=test_case.resource_instructions + resource_instructions=test_case.resource_instructions, ) input = test_case.investigate_request @@ -95,9 +103,7 @@ def test_investigate(experiment_name, test_case): # eval = bt_helper.start_evaluation(experiment_name, name=test_case.id) result = investigate_issues( - investigate_request=test_case.investigate_request, - config=config, - dal=mock_dal + investigate_request=test_case.investigate_request, config=config, dal=mock_dal ) assert result @@ -105,18 +111,21 @@ def test_investigate(experiment_name, test_case): scores = {} - - debug_expected = '\n- '.join(expected) + debug_expected = "\n- ".join(expected) print(f"** EXPECTED **\n- {debug_expected}") correctness_eval = evaluate_correctness(output=output, expected_elements=expected) - print(f"\n** CORRECTNESS **\nscore = {correctness_eval.score}\nrationale = {correctness_eval.metadata.get('rationale', '')}") + print( + f"\n** CORRECTNESS **\nscore = {correctness_eval.score}\nrationale = {correctness_eval.metadata.get('rationale', '')}" + ) scores["correctness"] = correctness_eval.score scores["previous_logs"] = evaluate_previous_logs_mention(output=output).score if len(test_case.retrieval_context) > 0: - scores["context"] = evaluate_context_usage(input=input, output=output, context_items=test_case.retrieval_context).score + scores["context"] = evaluate_context_usage( + input=input, output=output, context_items=test_case.retrieval_context + ).score # bt_helper.end_evaluation( # eval=eval, @@ -135,10 +144,17 @@ def test_investigate(experiment_name, test_case): assert expected_section_title in result.sections if test_case.expected_sections: - for expected_section_title, expected_section_array_content in test_case.expected_sections.items(): - assert expected_section_title in result.sections, f"Expected to see section [{expected_section_title}] in result but that section is missing" + for ( + expected_section_title, + expected_section_array_content, + ) in test_case.expected_sections.items(): + assert ( + expected_section_title in result.sections + ), f"Expected to see section [{expected_section_title}] in result but that section is missing" for expected_content in expected_section_array_content: - assert expected_content in result.sections.get(expected_section_title, ""), f"Expected to see content [{expected_content}] in section [{expected_section_title}] but could not find such content" + assert ( + expected_content in result.sections.get(expected_section_title, "") + ), f"Expected to see content [{expected_content}] in section [{expected_section_title}] but could not find such content" if test_case.evaluation.correctness: assert scores.get("correctness", 0) >= test_case.evaluation.correctness diff --git a/tests/llm/test_mocks.py b/tests/llm/test_mocks.py index aafac7e..7098203 100644 --- a/tests/llm/test_mocks.py +++ b/tests/llm/test_mocks.py @@ -3,47 +3,57 @@ import pytest import tempfile -@pytest.mark.parametrize("params", [ - ({"field1": "1", "field2": "2"}), - ({"field1": "1", "field2": "2", "field3": "3"}) -]) + +@pytest.mark.parametrize( + "params", + [({"field1": "1", "field2": "2"}), ({"field1": "1", "field2": "2", "field3": "3"})], +) def test_mock_tools_match(params): mock = MockToolsets(test_case_folder=tempfile.gettempdir(), generate_mocks=False) - mock.mock_tool(ToolMock( - source_file="test", - toolset_name="kubernetes/core", - tool_name="kubectl_describe", - match_params={"field1": "1", "field2": "2"}, - return_value="this tool is mocked" - )) + mock.mock_tool( + ToolMock( + source_file="test", + toolset_name="kubernetes/core", + tool_name="kubectl_describe", + match_params={"field1": "1", "field2": "2"}, + return_value="this tool is mocked", + ) + ) tool_executor = ToolExecutor(mock.mocked_toolsets) result = tool_executor.invoke("kubectl_describe", params) assert result == "this tool is mocked" -@pytest.mark.parametrize("params", [ - ({}), - ({"field1": "1"}), - ({"field2": "2"}), - ({"field1": "1", "field2": "XXX"}), - ({"field1": "XXX", "field2": "2"}), - ({"field3": "3"}) -]) + +@pytest.mark.parametrize( + "params", + [ + ({}), + ({"field1": "1"}), + ({"field2": "2"}), + ({"field1": "1", "field2": "XXX"}), + ({"field1": "XXX", "field2": "2"}), + ({"field3": "3"}), + ], +) def test_mock_tools_do_not_match(params): mock = MockToolsets(test_case_folder=tempfile.gettempdir(), generate_mocks=True) - mock.mock_tool(ToolMock( - source_file="test", - toolset_name="kubernetes/core", - tool_name="kubectl_describe", - match_params={"field1": "1", "field2": "2"}, - return_value="this tool is mocked" - )) + mock.mock_tool( + ToolMock( + source_file="test", + toolset_name="kubernetes/core", + tool_name="kubectl_describe", + match_params={"field1": "1", "field2": "2"}, + return_value="this tool is mocked", + ) + ) tool_executor = ToolExecutor(mock.mocked_toolsets) result = tool_executor.invoke("kubectl_describe", params) assert result != "this tool is mocked" + def test_mock_tools_does_not_throws_if_no_match(): mock = MockToolsets(test_case_folder=tempfile.gettempdir(), generate_mocks=True) tool_executor = ToolExecutor(mock.mocked_toolsets) - tool_executor.invoke("kubectl_describe", {"foo":"bar"}) + tool_executor.invoke("kubectl_describe", {"foo": "bar"}) diff --git a/tests/llm/utils/braintrust.py b/tests/llm/utils/braintrust.py index 6cca231..ddca270 100644 --- a/tests/llm/utils/braintrust.py +++ b/tests/llm/utils/braintrust.py @@ -1,4 +1,3 @@ - import braintrust from braintrust import Dataset, Experiment, ReadonlyExperiment, Span import logging @@ -7,19 +6,26 @@ from tests.llm.utils.mock_utils import HolmesTestCase from tests.llm.utils.system import get_machine_state_tags -def find_dataset_row_by_test_case(dataset:Dataset, test_case:HolmesTestCase): + +def find_dataset_row_by_test_case(dataset: Dataset, test_case: HolmesTestCase): for row in dataset: if row.get("id") == test_case.id: return row return None -def pop_test_case(test_cases:List[HolmesTestCase], id:str) -> Optional[HolmesTestCase]: + +def pop_test_case( + test_cases: List[HolmesTestCase], id: str +) -> Optional[HolmesTestCase]: for test_case in test_cases: if test_case.id == id: test_cases.remove(test_case) return test_case -def pop_matching_test_case_if_exists(test_cases:List[HolmesTestCase], item:Any) -> Optional[HolmesTestCase]: + +def pop_matching_test_case_if_exists( + test_cases: List[HolmesTestCase], item: Any +) -> Optional[HolmesTestCase]: """ This function is expected to mutate the test_cases list then remove the matching test case from the list and return it @@ -29,15 +35,14 @@ def pop_matching_test_case_if_exists(test_cases:List[HolmesTestCase], item:Any) return pop_test_case(test_cases, test_case_id) -class BraintrustEvalHelper(): - def __init__(self, project_name:str, dataset_name:str) -> None: +class BraintrustEvalHelper: + def __init__(self, project_name: str, dataset_name: str) -> None: self.project_name = project_name self.dataset_name = dataset_name self.dataset = braintrust.init_dataset(project=project_name, name=dataset_name) self.experiment = None - - def upload_test_cases(self, test_cases:List[HolmesTestCase]): + def upload_test_cases(self, test_cases: List[HolmesTestCase]): logging.info(f"Uploading f{len(test_cases)} test cases to braintrust") logging.info(f"Found dataset: {self.dataset.summarize()}") @@ -54,9 +59,7 @@ def upload_test_cases(self, test_cases:List[HolmesTestCase]): id=test_case.id, input=input, expected=test_case.expected_output, - metadata={ - "test_case": test_case.model_dump() - }, + metadata={"test_case": test_case.model_dump()}, tags=[], ) @@ -66,34 +69,44 @@ def upload_test_cases(self, test_cases:List[HolmesTestCase]): id=test_case.id, input=input, expected=test_case.expected_output, - metadata={ - "test_case": test_case.model_dump() - }, + metadata={"test_case": test_case.model_dump()}, tags=[], ) logging.info(self.dataset.summarize()) - - def resolve_dataset_item(self, test_case:HolmesTestCase) -> Optional[Any]: + def resolve_dataset_item(self, test_case: HolmesTestCase) -> Optional[Any]: return find_dataset_row_by_test_case(self.dataset, test_case) - def start_evaluation(self, experiment_name:str, name:str) -> Span: + def start_evaluation(self, experiment_name: str, name: str) -> Span: if not self.experiment: - experiment:Experiment|ReadonlyExperiment = braintrust.init( + experiment: Experiment | ReadonlyExperiment = braintrust.init( project=self.project_name, experiment=experiment_name, dataset=self.dataset, open=False, update=True, - metadata=get_machine_state_tags()) + metadata=get_machine_state_tags(), + ) - if isinstance(experiment, ReadonlyExperiment): # Ensures type checker knows this is a writable experiment - raise Exception("Experiment must be writable. The above options open=False and update=True ensure this is the case so this exception should never be raised") + if isinstance( + experiment, ReadonlyExperiment + ): # Ensures type checker knows this is a writable experiment + raise Exception( + "Experiment must be writable. The above options open=False and update=True ensure this is the case so this exception should never be raised" + ) self.experiment = experiment return self.experiment.start_span(name=name) - def end_evaluation(self, eval:Span, input:str, output:str, expected:str, id:str, scores:dict[str, Any]): + def end_evaluation( + self, + eval: Span, + input: str, + output: str, + expected: str, + id: str, + scores: dict[str, Any], + ): if not self.experiment: raise Exception("start_evaluation() must be called before end_evaluation()") @@ -102,6 +115,6 @@ def end_evaluation(self, eval:Span, input:str, output:str, expected:str, id:str, output=output, expected=expected, dataset_record_id=id, - scores=scores + scores=scores, ) self.experiment.flush() diff --git a/tests/llm/utils/classifiers.py b/tests/llm/utils/classifiers.py index 168390a..906c50a 100644 --- a/tests/llm/utils/classifiers.py +++ b/tests/llm/utils/classifiers.py @@ -5,8 +5,11 @@ classifier_model = os.environ.get("CLASSIFIER_MODEL", "gpt-4o") -def evaluate_app_or_infra(app_or_infra:Union[Literal["infra"], Literal["app"]], output:Optional[str], input:Optional[str]): - +def evaluate_app_or_infra( + app_or_infra: Union[Literal["infra"], Literal["app"]], + output: Optional[str], + input: Optional[str], +): expected = None if app_or_infra == "app": expected = "The output should mention the issue is likely to be an application issue (as opposed to an infrastructure issue)" @@ -31,12 +34,14 @@ def evaluate_app_or_infra(app_or_infra:Union[Literal["infra"], Literal["app"]], prompt_template=prompt_prefix, choice_scores={"A": 0, "B": 0.33, "C": 0.67, "D": 1}, use_cot=True, - model=classifier_model + model=classifier_model, ) return classifier(input=None, output=output, expected=expected) -def evaluate_context_usage(context_items:List[str], output:Optional[str], input:Optional[str]): +def evaluate_context_usage( + context_items: List[str], output: Optional[str], input: Optional[str] +): context = "\n- ".join(context_items) prompt_prefix = """ # CONTEXT @@ -66,12 +71,12 @@ def evaluate_context_usage(context_items:List[str], output:Optional[str], input: prompt_template=prompt_prefix, choice_scores={"A": 0, "B": 0.33, "C": 0.67, "D": 1}, use_cot=True, - model=classifier_model + model=classifier_model, ) return classifier(input=input, output=output, expected=context) -def evaluate_previous_logs_mention(output:Optional[str]): +def evaluate_previous_logs_mention(output: Optional[str]): prompt_prefix = """ OUTPUT @@ -93,13 +98,12 @@ def evaluate_previous_logs_mention(output:Optional[str]): prompt_template=prompt_prefix, choice_scores={"A": 1, "B": 1, "C": 0, "D": 1}, use_cot=True, - model=classifier_model + model=classifier_model, ) return classifier(input=None, output=output, expected=None) -def evaluate_correctness(expected_elements:List[str], output:Optional[str]): - +def evaluate_correctness(expected_elements: List[str], output: Optional[str]): expected_elements_str = "\n- ".join(expected_elements) prompt_prefix = """ @@ -131,11 +135,13 @@ def evaluate_correctness(expected_elements:List[str], output:Optional[str]): prompt_template=prompt_prefix, choice_scores={"A": 1, "B": 0}, use_cot=True, - model=classifier_model + model=classifier_model, ) return classifier(input=input, output=output, expected=expected_elements_str) -def evaluate_factuality(input:Optional[str], output:Optional[str], expected:Optional[str]): +def evaluate_factuality( + input: Optional[str], output: Optional[str], expected: Optional[str] +): eval_factuality = Factuality() return eval_factuality(input=input, output=output, expected=expected) diff --git a/tests/llm/utils/commands.py b/tests/llm/utils/commands.py index b7a7c47..96c166b 100644 --- a/tests/llm/utils/commands.py +++ b/tests/llm/utils/commands.py @@ -1,17 +1,20 @@ - import logging import os import subprocess from tests.llm.utils.mock_utils import HolmesTestCase -def invoke_command( - command: str, - cwd:str - ) -> str: + +def invoke_command(command: str, cwd: str) -> str: try: logging.debug(f"Running `{command}` in {cwd}") result = subprocess.run( - command, shell=True, capture_output=True, text=True, check=True, stdin=subprocess.DEVNULL, cwd=cwd + command, + shell=True, + capture_output=True, + text=True, + check=True, + stdin=subprocess.DEVNULL, + cwd=cwd, ) output = f"{result.stdout}\n{result.stderr}" @@ -23,13 +26,15 @@ def invoke_command( logging.error(message) raise e -def before_test(test_case:HolmesTestCase): + +def before_test(test_case: HolmesTestCase): if test_case.before_test and os.environ.get("RUN_LIVE"): commands = test_case.before_test.split("\n") for command in commands: invoke_command(command=command, cwd=test_case.folder) -def after_test(test_case:HolmesTestCase): + +def after_test(test_case: HolmesTestCase): if test_case.after_test and os.environ.get("RUN_LIVE"): commands = test_case.after_test.split("\n") for command in commands: diff --git a/tests/llm/utils/constants.py b/tests/llm/utils/constants.py index af28ed6..36ebc94 100644 --- a/tests/llm/utils/constants.py +++ b/tests/llm/utils/constants.py @@ -1,4 +1,3 @@ - -PROJECT="HolmesGPT" +PROJECT = "HolmesGPT" AUTO_GENERATED_FILE_SUFFIX = ".AUTOGENERATED" diff --git a/tests/llm/utils/langfuse.py b/tests/llm/utils/langfuse.py index 077d5ce..ee7bff3 100644 --- a/tests/llm/utils/langfuse.py +++ b/tests/llm/utils/langfuse.py @@ -1,21 +1,30 @@ - import logging from typing import Any, Dict, List, Optional, Union from langfuse import Langfuse from langfuse.client import DatasetItemClient -from langfuse.model import Dataset, DatasetItem, DatasetStatus -from tests.llm.utils.mock_utils import AskHolmesTestCase, HolmesTestCase, InvestigateTestCase +from langfuse.model import DatasetItem +from tests.llm.utils.mock_utils import ( + AskHolmesTestCase, + HolmesTestCase, + InvestigateTestCase, +) # init langfuse = Langfuse() -def pop_test_case(test_cases:List[HolmesTestCase], id:str) -> Optional[HolmesTestCase]: + +def pop_test_case( + test_cases: List[HolmesTestCase], id: str +) -> Optional[HolmesTestCase]: for test_case in test_cases: if test_case.id == id: test_cases.remove(test_case) return test_case -def pop_matching_test_case_if_exists(test_cases:List[HolmesTestCase], item:Union[DatasetItem, DatasetItemClient]) -> Optional[HolmesTestCase]: + +def pop_matching_test_case_if_exists( + test_cases: List[HolmesTestCase], item: Union[DatasetItem, DatasetItemClient] +) -> Optional[HolmesTestCase]: """ This function is expected to mutate the test_cases list then remove the matching test case from the list and return it @@ -26,33 +35,32 @@ def pop_matching_test_case_if_exists(test_cases:List[HolmesTestCase], item:Union test_case_id = item.metadata.get("test_case", {}).get("id") return pop_test_case(test_cases, test_case_id) -def archive_dataset_item(dataset_name:str, item:Union[DatasetItem, DatasetItemClient]): + +def archive_dataset_item( + dataset_name: str, item: Union[DatasetItem, DatasetItemClient] +): langfuse.create_dataset_item( - id=item.id, - dataset_name=dataset_name, - status="ARCHIVED" + id=item.id, dataset_name=dataset_name, status="ARCHIVED" ) -def get_input(test_case:HolmesTestCase) -> Dict[str, Any]: + +def get_input(test_case: HolmesTestCase) -> Dict[str, Any]: input = {} if isinstance(test_case, AskHolmesTestCase): - input = { - "user_prompt": test_case.user_prompt - } + input = {"user_prompt": test_case.user_prompt} elif isinstance(test_case, InvestigateTestCase): input = test_case.investigate_request.model_dump() input["id"] = test_case.id return input -def upload_test_cases(test_cases:List[HolmesTestCase], dataset_name:str): + +def upload_test_cases(test_cases: List[HolmesTestCase], dataset_name: str): logging.info(f"Uploading f{len(test_cases)} test cases to langfuse") try: dataset = langfuse.get_dataset(dataset_name) except Exception: - langfuse.create_dataset( - name=dataset_name - ) + langfuse.create_dataset(name=dataset_name) dataset = langfuse.get_dataset(dataset_name) logging.info(f"Found f{len(dataset.items)} existing dataset items") @@ -71,28 +79,28 @@ def upload_test_cases(test_cases:List[HolmesTestCase], dataset_name:str): dataset_name=dataset_name, input=get_input(test_case), expected_output=test_case.expected_output, - metadata={ - "test_case": test_case.model_dump() - } + metadata={"test_case": test_case.model_dump()}, ) - for test_case in test_cases: logging.info(f"Creating f{test_case.id}") langfuse.create_dataset_item( dataset_name=dataset_name, input=get_input(test_case), - expected_output={ - "answer": test_case.expected_output - }, - metadata={ - "test_case": test_case.model_dump() - } + expected_output={"answer": test_case.expected_output}, + metadata={"test_case": test_case.model_dump()}, ) -def resolve_dataset_item(test_case:HolmesTestCase, dataset_name:str) -> Optional[DatasetItemClient]: + +def resolve_dataset_item( + test_case: HolmesTestCase, dataset_name: str +) -> Optional[DatasetItemClient]: dataset = langfuse.get_dataset(dataset_name) for item in dataset.items: - if item.metadata and item.metadata.get("test_case") and item.metadata.get("test_case").get("id") == test_case.id: + if ( + item.metadata + and item.metadata.get("test_case") + and item.metadata.get("test_case").get("id") == test_case.id + ): return item return diff --git a/tests/llm/utils/mock_dal.py b/tests/llm/utils/mock_dal.py index 7813278..08077c5 100644 --- a/tests/llm/utils/mock_dal.py +++ b/tests/llm/utils/mock_dal.py @@ -10,9 +10,15 @@ from tests.llm.utils.constants import AUTO_GENERATED_FILE_SUFFIX from tests.llm.utils.mock_utils import read_file -class MockSupabaseDal(SupabaseDal): - def __init__(self, test_case_folder:Path, issue_data:Optional[Dict], resource_instructions:Optional[ResourceInstructions], generate_mocks:bool): +class MockSupabaseDal(SupabaseDal): + def __init__( + self, + test_case_folder: Path, + issue_data: Optional[Dict], + resource_instructions: Optional[ResourceInstructions], + generate_mocks: bool, + ): super().__init__() self._issue_data = issue_data self._resource_instructions = resource_instructions @@ -27,15 +33,19 @@ def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]: if self._generate_mocks: file_path = self._get_mock_file_path("issue_data") - with open(file_path, 'w') as f: + with open(file_path, "w") as f: f.write(json.dumps(data or {}, indent=2)) f.close() - logging.warning(f"A mock file was generated for you at {file_path} with the contentof dal.get_issue_data({issue_id})") + logging.warning( + f"A mock file was generated for you at {file_path} with the contentof dal.get_issue_data({issue_id})" + ) return data - def get_resource_instructions(self, type: str, name: Optional[str]) -> Optional[ResourceInstructions]: + def get_resource_instructions( + self, type: str, name: Optional[str] + ) -> Optional[ResourceInstructions]: if self._resource_instructions is not None: return self._resource_instructions else: @@ -43,33 +53,43 @@ def get_resource_instructions(self, type: str, name: Optional[str]) -> Optional[ if self._generate_mocks: file_path = self._get_mock_file_path("resource_instructions") - with open(file_path, 'w') as f: + with open(file_path, "w") as f: f.write(json.dumps(data or {}, indent=2)) f.close() - logging.warning(f"A mock file was generated for you at {file_path} with the contentof dal.get_resource_instructions({type}, {name})") + logging.warning( + f"A mock file was generated for you at {file_path} with the contentof dal.get_resource_instructions({type}, {name})" + ) return data - def _get_mock_file_path(self, entity_type:str): - return f"{self._test_case_folder}/{entity_type}.json{AUTO_GENERATED_FILE_SUFFIX}" + def _get_mock_file_path(self, entity_type: str): + return ( + f"{self._test_case_folder}/{entity_type}.json{AUTO_GENERATED_FILE_SUFFIX}" + ) + pydantic_resource_instructions = TypeAdapter(ResourceInstructions) -def load_mock_dal(test_case_folder:Path, generate_mocks:bool): + +def load_mock_dal(test_case_folder: Path, generate_mocks: bool): issue_data_mock_path = test_case_folder.joinpath(Path("issue_data.json")) issue_data = None if issue_data_mock_path.exists(): issue_data = json.loads(read_file(issue_data_mock_path)) - resource_instructions_mock_path = test_case_folder.joinpath(Path("resource_instructions.json")) + resource_instructions_mock_path = test_case_folder.joinpath( + Path("resource_instructions.json") + ) resource_instructions = None if resource_instructions_mock_path.exists(): - resource_instructions = pydantic_resource_instructions.validate_json(read_file(Path(resource_instructions_mock_path))) + resource_instructions = pydantic_resource_instructions.validate_json( + read_file(Path(resource_instructions_mock_path)) + ) return MockSupabaseDal( test_case_folder=test_case_folder, issue_data=issue_data, resource_instructions=resource_instructions, - generate_mocks=generate_mocks + generate_mocks=generate_mocks, ) diff --git a/tests/llm/utils/mock_toolset.py b/tests/llm/utils/mock_toolset.py index 104cb26..f25bafd 100644 --- a/tests/llm/utils/mock_toolset.py +++ b/tests/llm/utils/mock_toolset.py @@ -1,5 +1,3 @@ - - from typing import Dict, List, Optional from holmes.core.tools import Tool, Toolset, ToolsetStatusEnum from holmes.plugins.toolsets import load_builtin_toolsets @@ -9,14 +7,18 @@ from tests.llm.utils.constants import AUTO_GENERATED_FILE_SUFFIX -ansi_escape = re.compile(r'\x1B\[([0-9]{1,3}(;[0-9]{1,2};?)?)?[mGK]') +ansi_escape = re.compile(r"\x1B\[([0-9]{1,3}(;[0-9]{1,2};?)?)?[mGK]") + + def strip_ansi(text): - return ansi_escape.sub('', text) + return ansi_escape.sub("", text) + class MockMetadata(BaseModel): - toolset_name:str + toolset_name: str tool_name: str - match_params: Optional[Dict] = None # None will match all params + match_params: Optional[Dict] = None # None will match all params + class ToolMock(MockMetadata): source_file: str @@ -28,38 +30,40 @@ class SaveMockTool(Tool): Tool that raises an exception if invoked. It is used to fail tests if not all invoked tool calls are mocked. This ensures stable test conditions """ + toolset_name: str unmocked_tool: Tool test_case_folder: str - def __init__(self, unmocked_tool:Tool, test_case_folder:str, toolset_name:str = "Unknown"): + + def __init__( + self, unmocked_tool: Tool, test_case_folder: str, toolset_name: str = "Unknown" + ): super().__init__( - name = unmocked_tool.name, - description = unmocked_tool.description, - parameters = unmocked_tool.parameters, - user_description = unmocked_tool.user_description, - toolset_name = toolset_name, - unmocked_tool = unmocked_tool, - test_case_folder = test_case_folder, + name=unmocked_tool.name, + description=unmocked_tool.description, + parameters=unmocked_tool.parameters, + user_description=unmocked_tool.user_description, + toolset_name=toolset_name, + unmocked_tool=unmocked_tool, + test_case_folder=test_case_folder, ) def _get_mock_file_path(self): return f"{self.test_case_folder}/{self.name}.txt{AUTO_GENERATED_FILE_SUFFIX}" - def _auto_generate_mock_file(self, params:Dict): + def _auto_generate_mock_file(self, params: Dict): mock_file_path = self._get_mock_file_path() logging.warning(f"Writing mock file for your convenience at {mock_file_path}") mock_metadata_json = MockMetadata( - toolset_name=self.toolset_name, - tool_name=self.name, - match_params=params + toolset_name=self.toolset_name, tool_name=self.name, match_params=params ).model_dump_json() logging.info(f"Invoking tool {self.unmocked_tool}") output = self.unmocked_tool.invoke(params) output = strip_ansi(output) - with open(mock_file_path, 'w') as f: - f.write(mock_metadata_json + '\n') + with open(mock_file_path, "w") as f: + f.write(mock_metadata_json + "\n") f.write(output) return output @@ -67,33 +71,35 @@ def _auto_generate_mock_file(self, params:Dict): def invoke(self, params) -> str: return self._auto_generate_mock_file(params) - def get_parameterized_one_liner(self, params) -> str: return self.unmocked_tool.get_parameterized_one_liner(params) class MockToolWrapper(Tool): - unmocked_tool:Tool + unmocked_tool: Tool mocks: List[ToolMock] = [] - def __init__(self, unmocked_tool:Tool): + + def __init__(self, unmocked_tool: Tool): super().__init__( name=unmocked_tool.name, description=unmocked_tool.description, parameters=unmocked_tool.parameters, user_description=unmocked_tool.user_description, - unmocked_tool=unmocked_tool + unmocked_tool=unmocked_tool, ) - def find_matching_mock(self, params:Dict) -> Optional[ToolMock]: + def find_matching_mock(self, params: Dict) -> Optional[ToolMock]: for mock in self.mocks: - if not mock.match_params: # wildcard + if not mock.match_params: # wildcard return mock - match = all(key in params and params[key] == mock_val or mock_val == "*" for key, mock_val in mock.match_params.items()) + match = all( + key in params and params[key] == mock_val or mock_val == "*" + for key, mock_val in mock.match_params.items() + ) if match: return mock - def invoke(self, params) -> str: mock = self.find_matching_mock(params) if mock: @@ -104,6 +110,7 @@ def invoke(self, params) -> str: def get_parameterized_one_liner(self, params) -> str: return self.unmocked_tool.get_parameterized_one_liner(params) + class MockToolsets: unmocked_toolsets: List[Toolset] mocked_toolsets: List[Toolset] @@ -111,7 +118,7 @@ class MockToolsets: generate_mocks: bool test_case_folder: str - def __init__(self, test_case_folder:str, generate_mocks: bool = True) -> None: + def __init__(self, test_case_folder: str, generate_mocks: bool = True) -> None: self.unmocked_toolsets = load_builtin_toolsets() self.generate_mocks = generate_mocks self.test_case_folder = test_case_folder @@ -119,20 +126,29 @@ def __init__(self, test_case_folder:str, generate_mocks: bool = True) -> None: self.mocked_toolsets = [] self._update() - def mock_tool(self, tool_mock:ToolMock): + def mock_tool(self, tool_mock: ToolMock): self._mocks.append(tool_mock) self._update() - def _find_mocks_for_tool(self, toolset_name:str, tool_name:str) -> List[ToolMock]: + def _find_mocks_for_tool(self, toolset_name: str, tool_name: str) -> List[ToolMock]: found_mocks = [] for tool_mock in self._mocks: - if tool_mock.toolset_name == toolset_name and tool_mock.tool_name == tool_name: + if ( + tool_mock.toolset_name == toolset_name + and tool_mock.tool_name == tool_name + ): found_mocks.append(tool_mock) return found_mocks - def _wrap_tool_with_exception_if_required(self, tool:Tool, toolset_name:str) -> Tool: + def _wrap_tool_with_exception_if_required( + self, tool: Tool, toolset_name: str + ) -> Tool: if self.generate_mocks: - return SaveMockTool(unmocked_tool=tool, toolset_name=toolset_name, test_case_folder=self.test_case_folder) + return SaveMockTool( + unmocked_tool=tool, + toolset_name=toolset_name, + test_case_folder=self.test_case_folder, + ) else: return tool @@ -142,8 +158,12 @@ def _update(self): mocked_tools = [] for i in range(len(toolset.tools)): tool = toolset.tools[i] - mocks = self._find_mocks_for_tool(toolset_name=toolset.name, tool_name=tool.name) - wrapped_tool = self._wrap_tool_with_exception_if_required(tool=tool, toolset_name=toolset.name) + mocks = self._find_mocks_for_tool( + toolset_name=toolset.name, tool_name=tool.name + ) + wrapped_tool = self._wrap_tool_with_exception_if_required( + tool=tool, toolset_name=toolset.name + ) if len(mocks) > 0: mock_tool = MockToolWrapper(unmocked_tool=wrapped_tool) @@ -153,10 +173,10 @@ def _update(self): mocked_tools.append(wrapped_tool) mocked_toolset = Toolset( - name = toolset.name, - prerequisites = toolset.prerequisites, - tools = toolset.tools, - description=toolset.description + name=toolset.name, + prerequisites=toolset.prerequisites, + tools=toolset.tools, + description=toolset.description, ) mocked_toolset.tools = mocked_tools mocked_toolset._status = ToolsetStatusEnum.ENABLED diff --git a/tests/llm/utils/mock_utils.py b/tests/llm/utils/mock_utils.py index a08d240..c509775 100644 --- a/tests/llm/utils/mock_utils.py +++ b/tests/llm/utils/mock_utils.py @@ -1,4 +1,3 @@ - import json from typing_extensions import Dict import yaml @@ -14,36 +13,46 @@ from tests.llm.utils.constants import AUTO_GENERATED_FILE_SUFFIX from tests.llm.utils.mock_toolset import MockMetadata, ToolMock -def read_file(file_path:Path): - with open(file_path, 'r', encoding='utf-8') as file: + +def read_file(file_path: Path): + with open(file_path, "r", encoding="utf-8") as file: return file.read().strip() -TEST_CASE_ID_PATTERN = r'^[\d+]_(?:[a-z]+_)*[a-z]+$' + +TEST_CASE_ID_PATTERN = r"^[\d+]_(?:[a-z]+_)*[a-z]+$" CONFIG_FILE_NAME = "test_case.yaml" + class LLMEvaluation(BaseModel): faithfulness: float = 0.3 correctness: float = 0.3 context: float = 0 + class Message(BaseModel): message: str -T = TypeVar('T') + +T = TypeVar("T") + class HolmesTestCase(BaseModel): id: str folder: str - generate_mocks: bool = False # If True, generate mocks - expected_output: Union[str, List[str]] # Whether an output is expected + generate_mocks: bool = False # If True, generate mocks + expected_output: Union[str, List[str]] # Whether an output is expected evaluation: LLMEvaluation = LLMEvaluation() - retrieval_context: List[str] = [] # Elements helping to evaluate the correctness of the LLM response + retrieval_context: List[ + str + ] = [] # Elements helping to evaluate the correctness of the LLM response tool_mocks: List[ToolMock] = [] before_test: Optional[str] = None after_test: Optional[str] = None + class AskHolmesTestCase(HolmesTestCase, BaseModel): - user_prompt: str # The user's question to ask holmes + user_prompt: str # The user's question to ask holmes + class InvestigateTestCase(HolmesTestCase, BaseModel): investigate_request: InvestigateRequest @@ -51,14 +60,16 @@ class InvestigateTestCase(HolmesTestCase, BaseModel): resource_instructions: Optional[ResourceInstructions] expected_sections: Optional[Dict[str, List[str]]] = None + pydantic_tool_mock = TypeAdapter(MockMetadata) + def parse_mock_metadata(text) -> Optional[MockMetadata]: """ Expects the mock metadata to be the first line of the text and be a JSON string. """ try: - match = re.match(r'^(.*)$', text, re.MULTILINE) + match = re.match(r"^(.*)$", text, re.MULTILINE) if match: first_line = match.group(0) metadata = json.loads(first_line) @@ -69,13 +80,11 @@ def parse_mock_metadata(text) -> Optional[MockMetadata]: return None -class MockHelper(): - - def __init__(self, test_cases_folder:Path) -> None: +class MockHelper: + def __init__(self, test_cases_folder: Path) -> None: super().__init__() self._test_cases_folder = test_cases_folder - def load_investigate_test_cases(self) -> List[InvestigateTestCase]: return cast(List[InvestigateTestCase], self.load_test_cases()) @@ -83,32 +92,43 @@ def load_ask_holmes_test_cases(self) -> List[AskHolmesTestCase]: return cast(List[AskHolmesTestCase], self.load_test_cases()) def load_test_cases(self) -> List[HolmesTestCase]: - - test_cases:List[HolmesTestCase] = [] - test_cases_ids:List[str] = os.listdir(self._test_cases_folder) + test_cases: List[HolmesTestCase] = [] + test_cases_ids: List[str] = os.listdir(self._test_cases_folder) for test_case_id in test_cases_ids: test_case_folder = self._test_cases_folder.joinpath(test_case_id) logging.info("Evaluating potential test case folder: {test_case_folder}") try: - config_dict = yaml.safe_load(read_file(test_case_folder.joinpath(CONFIG_FILE_NAME))) + config_dict = yaml.safe_load( + read_file(test_case_folder.joinpath(CONFIG_FILE_NAME)) + ) config_dict["id"] = test_case_id config_dict["folder"] = str(test_case_folder) if config_dict.get("user_prompt"): - test_case = TypeAdapter(AskHolmesTestCase).validate_python(config_dict) + test_case = TypeAdapter(AskHolmesTestCase).validate_python( + config_dict + ) else: - config_dict["investigate_request"] = load_investigate_request(test_case_folder) + config_dict["investigate_request"] = load_investigate_request( + test_case_folder + ) config_dict["issue_data"] = load_issue_data(test_case_folder) - config_dict["resource_instructions"] = load_resource_instructions(test_case_folder) + config_dict["resource_instructions"] = load_resource_instructions( + test_case_folder + ) config_dict["request"] = TypeAdapter(InvestigateRequest) - test_case = TypeAdapter(InvestigateTestCase).validate_python(config_dict) + test_case = TypeAdapter(InvestigateTestCase).validate_python( + config_dict + ) logging.info(f"Successfully loaded test case {test_case_id}") except FileNotFoundError: - logging.info(f"Folder {self._test_cases_folder}/{test_case_id} ignored because it is missing a {CONFIG_FILE_NAME} file.") + logging.info( + f"Folder {self._test_cases_folder}/{test_case_id} ignored because it is missing a {CONFIG_FILE_NAME} file." + ) continue - mock_file_names:List[str] = os.listdir(test_case_folder) + mock_file_names: List[str] = os.listdir(test_case_folder) for mock_file_name in mock_file_names: if mock_file_name == CONFIG_FILE_NAME: @@ -121,16 +141,18 @@ def load_test_cases(self) -> List[HolmesTestCase]: mock_text = read_file(mock_file_path) metadata = parse_mock_metadata(mock_text) - mock_value = mock_text[mock_text.find('\n') + 1:] # remove first line + mock_value = mock_text[mock_text.find("\n") + 1 :] # remove first line if not metadata: - logging.warning(f"Failed to parse metadata from test case file at {str(mock_file_path)}. It will be skipped") + logging.warning( + f"Failed to parse metadata from test case file at {str(mock_file_path)}. It will be skipped" + ) continue tool_mock = ToolMock( source_file=mock_file_name, - toolset_name= metadata.toolset_name, - tool_name= metadata.tool_name, - match_params= metadata.match_params, - return_value=mock_value + toolset_name=metadata.toolset_name, + tool_name=metadata.tool_name, + match_params=metadata.match_params, + return_value=mock_value, ) logging.info(f"Successfully loaded tool mock {tool_mock}") test_case.tool_mocks.append(tool_mock) @@ -140,22 +162,34 @@ def load_test_cases(self) -> List[HolmesTestCase]: return test_cases -def load_issue_data(test_case_folder:Path) -> Optional[Dict]: - +def load_issue_data(test_case_folder: Path) -> Optional[Dict]: issue_data_mock_path = test_case_folder.joinpath(Path("issue_data.json")) if issue_data_mock_path.exists(): return json.loads(read_file(issue_data_mock_path)) return None -def load_resource_instructions(test_case_folder:Path) -> Optional[ResourceInstructions]: - resource_instructions_mock_path = test_case_folder.joinpath(Path("resource_instructions.json")) +def load_resource_instructions( + test_case_folder: Path, +) -> Optional[ResourceInstructions]: + resource_instructions_mock_path = test_case_folder.joinpath( + Path("resource_instructions.json") + ) if resource_instructions_mock_path.exists(): - return TypeAdapter(ResourceInstructions).validate_json(read_file(Path(resource_instructions_mock_path))) + return TypeAdapter(ResourceInstructions).validate_json( + read_file(Path(resource_instructions_mock_path)) + ) return None -def load_investigate_request(test_case_folder:Path) -> InvestigateRequest: - investigate_request_path = test_case_folder.joinpath(Path("investigate_request.json")) + +def load_investigate_request(test_case_folder: Path) -> InvestigateRequest: + investigate_request_path = test_case_folder.joinpath( + Path("investigate_request.json") + ) if investigate_request_path.exists(): - return TypeAdapter(InvestigateRequest).validate_json(read_file(Path(investigate_request_path))) - raise Exception(f"Investigate test case declared in folder {str(test_case_folder)} should have an investigate_request.json file but none is present") + return TypeAdapter(InvestigateRequest).validate_json( + read_file(Path(investigate_request_path)) + ) + raise Exception( + f"Investigate test case declared in folder {str(test_case_folder)} should have an investigate_request.json file but none is present" + ) diff --git a/tests/llm/utils/system.py b/tests/llm/utils/system.py index 3b43b57..8167f87 100644 --- a/tests/llm/utils/system.py +++ b/tests/llm/utils/system.py @@ -8,8 +8,7 @@ def get_active_branch_name(): - - head_dir = Path(".",".git","HEAD") + head_dir = Path(".", ".git", "HEAD") with head_dir.open("r") as f: content = f.read().splitlines() @@ -19,6 +18,7 @@ def get_active_branch_name(): return "Unknown" + def get_machine_state_tags() -> Dict[str, str]: return { "username": pwd.getpwuid(os.getuid()).pw_name, @@ -27,5 +27,6 @@ def get_machine_state_tags() -> Dict[str, str]: "hostname": socket.gethostname(), } + def readable_timestamp(): return datetime.now().strftime("%Y%m%d_%H%M%S") diff --git a/tests/test_fetch_url.py b/tests/test_fetch_url.py index 2769daf..1050a2d 100644 --- a/tests/test_fetch_url.py +++ b/tests/test_fetch_url.py @@ -39,6 +39,7 @@ def parse_fixture_id(file_name: str) -> str: else: raise Exception(f"Could not find fixture id in filename {file_name}") + class Fixture(BaseModel): id: str input: str @@ -61,23 +62,28 @@ def load_all_fixtures() -> List[Fixture]: if output_file.exists(): input_content = read_file(input_file) output_content = read_file(output_file) - test_cases.append(Fixture(id=number, input=input_content, expected_output=output_content)) + test_cases.append( + Fixture(id=number, input=input_content, expected_output=output_content) + ) assert len(test_cases) > 0 return test_cases + def idfn(val): if isinstance(val, Fixture): return f"Fixture #{val.id}" else: return str(val) -@pytest.mark.parametrize("fixture", load_all_fixtures(),ids=idfn) -def test_html_to_markdown(fixture:Fixture): + +@pytest.mark.parametrize("fixture", load_all_fixtures(), ids=idfn) +def test_html_to_markdown(fixture: Fixture): actual_output = html_to_markdown(fixture.input) print(actual_output) assert actual_output.strip() == fixture.expected_output.strip() + def test_fetch_webpage(): toolset = InternetToolset() toolset._status = ToolsetStatusEnum.ENABLED diff --git a/tests/test_format_tags.py b/tests/test_format_tags.py index 01e4f40..22a88dc 100644 --- a/tests/test_format_tags.py +++ b/tests/test_format_tags.py @@ -2,40 +2,46 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags -@pytest.mark.parametrize("input, expected_output", [ - ( - 'What is the status of << { "type": "service", "namespace": "default", "kind": "Deployment", "name": "nginx" } >>?', - 'What is the status of service nginx (namespace=default, kind=Deployment)?' - ), - ( - 'why did << { "type": "job", "namespace": "my-namespace", "name": "my-job" } >> fail?', - 'why did job my-job (namespace=my-namespace) fail?' - ), - ( - 'why did << { "type": "pod", "namespace": "my-namespace", "name": "runner-2323" } >> fail?', - 'why did pod runner-2323 (namespace=my-namespace) fail?' - ), - ( - 'how many pods are running on << { "type": "node", "name": "my-node" } >>?', - 'how many pods are running on node my-node?' - ), - ( - 'What caused << { "type": "issue", "id": "issue-id", "name": "KubeJobFailed", "subject_namespace": "my-namespace", "subject_name": "my-pod" } >>?', - 'What caused issue issue-id (name=KubeJobFailed, subject_namespace=my-namespace, subject_name=my-pod)?' - ), - ( - 'tell me about << {"type":"service","namespace":"sock-shop","kind":"Deployment","name":"carts"} >> and << { "type": "node", "name": "my-node" } >> and << {"type":"service","namespace":"sock-shop","kind":"Deployment","name":"front-end"} >>', - 'tell me about service carts (namespace=sock-shop, kind=Deployment) and node my-node and service front-end (namespace=sock-shop, kind=Deployment)' - ) -]) + +@pytest.mark.parametrize( + "input, expected_output", + [ + ( + 'What is the status of << { "type": "service", "namespace": "default", "kind": "Deployment", "name": "nginx" } >>?', + "What is the status of service nginx (namespace=default, kind=Deployment)?", + ), + ( + 'why did << { "type": "job", "namespace": "my-namespace", "name": "my-job" } >> fail?', + "why did job my-job (namespace=my-namespace) fail?", + ), + ( + 'why did << { "type": "pod", "namespace": "my-namespace", "name": "runner-2323" } >> fail?', + "why did pod runner-2323 (namespace=my-namespace) fail?", + ), + ( + 'how many pods are running on << { "type": "node", "name": "my-node" } >>?', + "how many pods are running on node my-node?", + ), + ( + 'What caused << { "type": "issue", "id": "issue-id", "name": "KubeJobFailed", "subject_namespace": "my-namespace", "subject_name": "my-pod" } >>?', + "What caused issue issue-id (name=KubeJobFailed, subject_namespace=my-namespace, subject_name=my-pod)?", + ), + ( + 'tell me about << {"type":"service","namespace":"sock-shop","kind":"Deployment","name":"carts"} >> and << { "type": "node", "name": "my-node" } >> and << {"type":"service","namespace":"sock-shop","kind":"Deployment","name":"front-end"} >>', + "tell me about service carts (namespace=sock-shop, kind=Deployment) and node my-node and service front-end (namespace=sock-shop, kind=Deployment)", + ), + ], +) def test_format_tags_in_string(input, expected_output): assert format_tags_in_string(input) == expected_output + def test_parse_message_tags(): - assert parse_messages_tags([{ - "role": "user", - "content": 'how many pods are running on << { "type": "node", "name": "my-node" } >>?' - }])[0] == { - "role": "user", - "content": 'how many pods are running on node my-node?' - } + assert parse_messages_tags( + [ + { + "role": "user", + "content": 'how many pods are running on << { "type": "node", "name": "my-node" } >>?', + } + ] + )[0] == {"role": "user", "content": "how many pods are running on node my-node?"} diff --git a/tests/test_investigate_structured_output.py b/tests/test_investigate_structured_output.py index 5b97718..21453d9 100644 --- a/tests/test_investigate_structured_output.py +++ b/tests/test_investigate_structured_output.py @@ -1,22 +1,29 @@ -from holmes.core.investigation_structured_output import DEFAULT_SECTIONS, get_output_format_for_investigation +from holmes.core.investigation_structured_output import ( + DEFAULT_SECTIONS, + get_output_format_for_investigation, +) from holmes.plugins.prompts import load_and_render_prompt def test_prompt_sections_formatting(): - issue = { - "source_type": "prometheus" - } - prompt = load_and_render_prompt("builtin://generic_investigation.jinja2", {"issue": issue, "sections": DEFAULT_SECTIONS}) + issue = {"source_type": "prometheus"} + prompt = load_and_render_prompt( + "builtin://generic_investigation.jinja2", + {"issue": issue, "sections": DEFAULT_SECTIONS}, + ) assert len(DEFAULT_SECTIONS) > 0 for title, description in DEFAULT_SECTIONS.items(): expected_section = f"- {title}: {description}" - assert expected_section in prompt, f"Expected section \"{title}\" not found in formatted prompt" + assert ( + expected_section in prompt + ), f'Expected section "{title}" not found in formatted prompt' def test_get_output_format_for_investigation(): - - output_format = get_output_format_for_investigation({"Title1": "Description1", "Title2": "Description2"}) + output_format = get_output_format_for_investigation( + {"Title1": "Description1", "Title2": "Description2"} + ) assert output_format assert output_format["json_schema"] @@ -24,17 +31,8 @@ def test_get_output_format_for_investigation(): assert output_format["json_schema"]["schema"]["properties"] assert output_format["json_schema"]["schema"]["properties"] == { - "Title1": { - "type": ["string", "null"], - "description": "Description1" - }, - "Title2": { - "type": ["string", "null"], - "description": "Description2" - } + "Title1": {"type": ["string", "null"], "description": "Description1"}, + "Title2": {"type": ["string", "null"], "description": "Description2"}, } - assert output_format["json_schema"]["schema"]["required"] == [ - "Title1", - "Title2" - ] + assert output_format["json_schema"]["schema"]["required"] == ["Title1", "Title2"] diff --git a/tests/test_issue_investigator.py b/tests/test_issue_investigator.py index cefce21..33b3448 100644 --- a/tests/test_issue_investigator.py +++ b/tests/test_issue_investigator.py @@ -1,18 +1,19 @@ - from holmes.core.issue import Issue from holmes.core.models import InvestigateRequest -from holmes.core.tool_calling_llm import ResourceInstructionDocument, ResourceInstructions +from holmes.core.tool_calling_llm import ( + ResourceInstructionDocument, + ResourceInstructions, +) from rich.console import Console from holmes.config import Config -from holmes.common.env_vars import ( - HOLMES_POST_PROCESSING_PROMPT -) +from holmes.common.env_vars import HOLMES_POST_PROCESSING_PROMPT + def _test_investigate_issue_using_fetch_webpage(): investigate_request = InvestigateRequest( source="prometheus", title="starting container process caused", - description="starting container process caused \"exec: \"mycommand\": executable file not found in $PATH\"", + description='starting container process caused "exec: "mycommand": executable file not found in $PATH"', subject=dict(), context=dict(), source_instance_id="ApiRequest", @@ -24,14 +25,11 @@ def _test_investigate_issue_using_fetch_webpage(): runbook_url = "https://containersolutions.github.io/runbooks/posts/kubernetes/create-container-error/" resource_instructions = ResourceInstructions( - instructions=[], - documents=[ResourceInstructionDocument(url=runbook_url)] + instructions=[], documents=[ResourceInstructionDocument(url=runbook_url)] ) console = Console() config = Config.load_from_env() - ai = config.create_issue_investigator( - console, allowed_toolsets='*' - ) + ai = config.create_issue_investigator(console, allowed_toolsets="*") issue = Issue( id="", @@ -48,16 +46,22 @@ def _test_investigate_issue_using_fetch_webpage(): instructions=resource_instructions, ) - webpage_tool_calls = list(filter(lambda tool_call: tool_call.tool_name == "fetch_webpage", investigation.tool_calls)) + webpage_tool_calls = list( + filter( + lambda tool_call: tool_call.tool_name == "fetch_webpage", + investigation.tool_calls, + ) + ) assert len(webpage_tool_calls) == 1 assert runbook_url in webpage_tool_calls[0].description + def _test_investigate_issue_without_fetch_webpage(): investigate_request = InvestigateRequest( source="prometheus", title="starting container process caused", - description="starting container process caused \"exec: \"mycommand\": executable file not found in $PATH\"", + description='starting container process caused "exec: "mycommand": executable file not found in $PATH"', subject=dict(), context=dict(), source_instance_id="ApiRequest", @@ -66,15 +70,10 @@ def _test_investigate_issue_without_fetch_webpage(): prompt_template="builtin://generic_investigation.jinja2", ) raw_data = investigate_request.model_dump() - resource_instructions = ResourceInstructions( - instructions=[], - documents=[] - ) + resource_instructions = ResourceInstructions(instructions=[], documents=[]) console = Console() config = Config.load_from_env() - ai = config.create_issue_investigator( - console, allowed_toolsets='*' - ) + ai = config.create_issue_investigator(console, allowed_toolsets="*") issue = Issue( id="", @@ -92,6 +91,11 @@ def _test_investigate_issue_without_fetch_webpage(): instructions=resource_instructions, ) - webpage_tool_calls = list(filter(lambda tool_call: tool_call.tool_name == "fetch_webpage", investigation.tool_calls)) + webpage_tool_calls = list( + filter( + lambda tool_call: tool_call.tool_name == "fetch_webpage", + investigation.tool_calls, + ) + ) assert len(webpage_tool_calls) == 0