Skip to content

Commit

Permalink
Merge branch 'master' into fix_workload_health
Browse files Browse the repository at this point in the history
  • Loading branch information
nherment authored Jan 30, 2025
2 parents 234e6ba + 415f637 commit 3d8c877
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 63 deletions.
32 changes: 15 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -631,31 +631,31 @@ Using Grafana Loki

HolmesGPT can consult logs from [Loki](https://grafana.com/oss/loki/) by proxying through a [Grafana](https://grafana.com/oss/grafana/) instance.

There are 2 parts to configuring access to Grafana Loki: Access/Authentication and search terms.
To configure loki toolset:

For access and authentication, add the following environment variables:

* `GRAFANA_URL` - e.g. https://my-org.grafana.net
* `GRAFANA_API_KEY` - e.g. glsa_bsm6ZS_sdfs25f
```yaml
toolsets:
grafana/loki:
enabled: true
config:
api_key: "{{ env.GRAFANA_API_KEY }}"
url: "http://loki-url"
```
For search terms, you can optionally tweak the search terms used by the toolset.
This is done by appending the following to your Holmes configuration file:
This is done by appending the following to your Holmes grafana/loki configuration:
```yaml
grafana:
url: https://my-org.grafana.net #
api_key: glsa_bsm6ZS_sdfs25f
loki:
pod_name_search_key: "pod"
namespace_search_key: "namespace"
node_name_search_key: "node"
pod_name_search_key: "pod"
namespace_search_key: "namespace"
node_name_search_key: "node"
```
> You only need to tweak the configuration file if your Loki logs settings for pod, namespace and node differ from the above defaults.
The Loki toolset is configured the using the same Grafana settings as the Grafana Tempo toolset.
</details>
<details>
<summary>
Using Grafana Tempo
</summary>
Expand All @@ -664,8 +664,6 @@ HolmesGPT can fetch trace information from Grafana Tempo to debug performance re
Tempo is configured the using the same Grafana settings as the Grafana Loki toolset.
grafana:
url: https://my-org.grafana.net #
</details>
Expand Down Expand Up @@ -875,7 +873,7 @@ Configure Slack to send notifications to specific channels. Provide your Slack t
<summary>OpenSearch Integration</summary>

The OpenSearch toolset (`opensearch`) allows Holmes to consult an opensearch cluster for its health, settings and shards information.
The toolset supports multiple opensearch or elasticsearch clusters that are configured by editing Holmes' configuration file (or in cluster to the configuration secret):
The toolset supports multiple opensearch or elasticsearch clusters that are configured by editing Holmes' configuration file:

```
opensearch_clusters:
Expand Down
3 changes: 3 additions & 0 deletions holmes/core/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,9 @@ def check_prerequisites(self):

self._status = ToolsetStatusEnum.ENABLED

def get_example_config(self) -> Dict[str, Any]:
return {}


class YAMLToolset(Toolset):
tools: List[YAMLTool]
Expand Down
21 changes: 16 additions & 5 deletions holmes/plugins/toolsets/grafana/base_grafana_toolset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any
from typing import Any, ClassVar, Type
from holmes.core.tools import (
Tool,
Toolset,
Expand All @@ -11,17 +11,21 @@


class BaseGrafanaToolset(Toolset):
def __init__(self, name: str, description: str, icon_url: str, tools: list[Tool]):
config_class: ClassVar[Type[GrafanaConfig]] = GrafanaConfig

def __init__(self, name: str, description: str, icon_url: str, tools: list[Tool], doc_url: str):
super().__init__(
name=name,
description=description,
icon_url=icon_url,
docs_url=doc_url,
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
tools=tools,
tags=[
ToolsetTag.CORE,
],
enabled=False
enabled=False,
is_default=True,
)

def prerequisites_callable(self, config: dict[str, Any]) -> bool:
Expand All @@ -30,10 +34,17 @@ def prerequisites_callable(self, config: dict[str, Any]) -> bool:
return False

try:
self._grafana_config = GrafanaConfig(**config)
is_healthy = get_health(self._grafana_config.url, self._grafana_config.api_key)
self._grafana_config = BaseGrafanaToolset.config_class(**config)
is_healthy = get_health(
self._grafana_config.url, self._grafana_config.api_key
)
return is_healthy

except Exception:
logging.exception("Failed to set up grafana toolset")
return False

def get_example_config(self):
example_config = GrafanaConfig(api_key="YOUR API KEY", url="YOUR GRAFANA URL")
return example_config.model_dump()

13 changes: 1 addition & 12 deletions holmes/plugins/toolsets/grafana/common.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
from typing import Dict, Optional, Union
import uuid
import time
import os
from pydantic import BaseModel


GRAFANA_URL_ENV_NAME = "GRAFANA_URL"
GRAFANA_API_KEY_ENV_NAME = "GRAFANA_API_KEY"
ONE_HOUR_IN_SECONDS = 3600


class GrafanaLokiConfig(BaseModel):
pod_name_search_key: str = "pod"
namespace_search_key: str = "namespace"
node_name_search_key: str = "node"


class GrafanaConfig(BaseModel):
loki: GrafanaLokiConfig = GrafanaLokiConfig()
api_key: str
url: str

Expand Down Expand Up @@ -61,5 +50,5 @@ def get_datasource_id(dict: Dict, param: str) -> str:
return f"uid/{datasource_id}"
except:
pass

return datasource_id
21 changes: 18 additions & 3 deletions holmes/plugins/toolsets/grafana/toolset_grafana_loki.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from holmes.core.tools import Tool, ToolParameter
from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset
from holmes.plugins.toolsets.grafana.common import (
GrafanaConfig,
get_datasource_id,
get_param_or_raise,
process_timestamps,
Expand All @@ -17,6 +18,12 @@
)


class GrafanaLokiConfig(GrafanaConfig):
pod_name_search_key: str = "pod"
namespace_search_key: str = "namespace"
node_name_search_key: str = "node"


class ListLokiDatasources(Tool):

def __init__(self, toolset: BaseGrafanaToolset):
Expand Down Expand Up @@ -84,7 +91,7 @@ def invoke(self, params: Dict) -> str:
api_key=self._toolset._grafana_config.api_key,
loki_datasource_id=get_datasource_id(params, "loki_datasource_id"),
node_name=get_param_or_raise(params, "node_name"),
node_name_search_key=self._toolset._grafana_config.loki.node_name_search_key,
node_name_search_key=self._toolset._grafana_config.node_name_search_key,
start=start,
end=end,
limit=int(get_param_or_raise(params, "limit")),
Expand Down Expand Up @@ -208,8 +215,8 @@ def invoke(self, params: Dict) -> str:
loki_datasource_id=get_datasource_id(params, "loki_datasource_id"),
pod_regex=get_param_or_raise(params, "pod_regex"),
namespace=get_param_or_raise(params, "namespace"),
namespace_search_key=self._toolset._grafana_config.loki.namespace_search_key,
pod_name_search_key=self._toolset._grafana_config.loki.pod_name_search_key,
namespace_search_key=self._toolset._grafana_config.namespace_search_key,
pod_name_search_key=self._toolset._grafana_config.pod_name_search_key,
start=start,
end=end,
limit=int(get_param_or_raise(params, "limit")),
Expand All @@ -221,15 +228,23 @@ def get_parameterized_one_liner(self, params: Dict) -> str:


class GrafanaLokiToolset(BaseGrafanaToolset):
config_class = GrafanaLokiConfig

def __init__(self):
super().__init__(
name="grafana/loki",
description="Fetchs kubernetes pods and node logs from Loki",
icon_url="https://grafana.com/media/docs/loki/logo-grafana-loki.png",
doc_url="https://grafana.com/oss/loki/",
tools=[
ListLokiDatasources(self),
GetLokiLogsByNode(self),
GetLokiLogsByPod(self),
GetLokiLogsByLabel(self),
],
)

def get_example_config(self):
example_config = GrafanaLokiConfig(api_key="YOUR API KEY", url="YOUR GRAFANA URL")
return example_config.model_dump()

1 change: 1 addition & 0 deletions holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def __init__(self):
name="grafana/tempo",
description="Fetchs kubernetes traces from Tempo",
icon_url="https://grafana.com/static/assets/img/blog/tempo.png",
doc_url="https://grafana.com/oss/tempo/",
tools=[
ListAllDatasources(self),
GetTempoTracesByMinDuration(self),
Expand Down
81 changes: 64 additions & 17 deletions holmes/plugins/toolsets/opensearch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Any, Dict, List, Optional

from pydantic import ConfigDict
from pydantic import BaseModel, ConfigDict
from holmes.core.tools import (
CallablePrerequisite,
Tool,
Expand All @@ -12,18 +12,46 @@
from opensearchpy import OpenSearch


class OpenSearchHttpAuth(BaseModel):
username: str
password: str


class OpenSearchHost(BaseModel):
host: str
port: int = 9200


class OpenSearchCluster(BaseModel):
hosts: list[OpenSearchHost]
headers: Optional[dict[str, Any]] = None
use_ssl: bool = True
ssl_assert_hostname: bool = False
verify_certs: bool = False
ssl_show_warn: bool = False
http_auth: Optional[OpenSearchHttpAuth] = None


class OpenSearchConfig(BaseModel):
opensearch_clusters: list[OpenSearchCluster]


class OpenSearchClient:
def __init__(self, **kwargs):

# Handle http_auth explicitly
if "http_auth" in kwargs:
http_auth = kwargs.pop("http_auth")
if isinstance(http_auth, dict):
kwargs["http_auth"] = (http_auth.get("username"), http_auth.get("password"))
kwargs["http_auth"] = (
http_auth.get("username"),
http_auth.get("password"),
)
# Initialize OpenSearch client
self.client = OpenSearch(**kwargs)

def get_client(clients:List[OpenSearchClient], host:Optional[str]):

def get_client(clients: List[OpenSearchClient], host: Optional[str]):
if len(clients) == 1:
return clients[0]

Expand Down Expand Up @@ -133,7 +161,7 @@ def __init__(self):
enabled=False,
description="Provide cluster metadata information like health, shards, settings.",
docs_url="https://opensearch.org/docs/latest/clients/python-low-level/",
icon_url="https://upload.wikimedia.org/wikipedia/commons/9/91/Opensearch_Logo.svg",
icon_url="https://opensearch.org/assets/brand/PNG/Mark/opensearch_mark_default.png",
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
tools=[
ListShards(self),
Expand All @@ -143,21 +171,40 @@ def __init__(self):
tags=[
ToolsetTag.CORE,
],
is_default=False,
is_default=True,
)

def prerequisites_callable(self, config: dict[str, Any]) -> bool:
if not config:
return False

clusters_configs: list[dict[str, Any]] = config.get("opensearch_clusters", [])
for cluster in clusters_configs:
try:
logging.info(f"Setting up OpenSearch client")
client = OpenSearchClient(**cluster)
if client.client.cluster.health(params={"timeout": 5}):
self.clients.append(client)
except Exception:
logging.exception("Failed to set up opensearch client")

return len(self.clients) > 0
try:
os_config = OpenSearchConfig(**config)

for cluster in os_config.opensearch_clusters:
try:
logging.info(f"Setting up OpenSearch client")
cluster_kwargs = cluster.model_dump()
client = OpenSearchClient(**cluster_kwargs)
if client.client.cluster.health(params={"timeout": 5}):
self.clients.append(client)
except Exception:
logging.exception("Failed to set up opensearch client")

return len(self.clients) > 0
except Exception:
logging.exception("Failed to set up grafana toolset")
return False

def get_example_config(self) -> Dict[str, Any]:
example_config = OpenSearchConfig(
opensearch_clusters=[
OpenSearchCluster(
hosts=[OpenSearchHost(host="YOUR OPENSEACH HOST")],
headers={"Authorization": "{{ env.OPENSEARCH_BEARER_TOKEN }}"},
use_ssl=True,
ssl_assert_hostname=False,
)
]
)
return example_config.model_dump()
4 changes: 4 additions & 0 deletions holmes/utils/default_toolset_installation_guide.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ holmes:
toolsets:
{{toolset_name}}:
enabled: true
{% if example_config %}
config:
{{ example_config | indent(8) }}
{% endif %}
```

{% endif %}
Expand Down
19 changes: 10 additions & 9 deletions holmes/utils/holmes_sync_toolsets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from datetime import datetime
from typing import Any

import yaml


from holmes.config import Config
Expand Down Expand Up @@ -47,19 +50,17 @@ def holmes_sync_toolsets_status(dal: SupabaseDal, config: Config) -> None:

def render_default_installation_instructions_for_toolset(toolset: Toolset) -> str:
env_vars = toolset.get_environment_variables()
context = {
context: dict[str, Any] = {
"env_vars": env_vars if env_vars else [],
"toolset_name": toolset.name,
"enabled": toolset.enabled,
"default_toolset": toolset.is_default,
"example_config": yaml.dump(toolset.get_example_config()),
}
if toolset.is_default:
installation_instructions = load_and_render_prompt(
"file://holmes/utils/default_toolset_installation_guide.jinja2", context
)
return installation_instructions

installation_instructions = load_and_render_prompt(
"file://holmes/utils/installation_guide.jinja2", context
template = (
"file://holmes/utils/default_toolset_installation_guide.jinja2"
if toolset.is_default
else "file://holmes/utils/installation_guide.jinja2"
)
installation_instructions = load_and_render_prompt(template, context)
return installation_instructions

0 comments on commit 3d8c877

Please sign in to comment.