From 9f32e9524df147cd5fb39dfcff0ef31c5375570d Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 21 Sep 2023 16:46:16 -0500 Subject: [PATCH 01/12] use scheduled tasks to decide whether to check image and audio data --- rollup_health_and_sanity_metrics.py | 35 +++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 81ecb9d..048dae3 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -3,6 +3,7 @@ import pandas as pd import logging import sage_data_client +import requests from typing import NamedTuple from utils import ( load_node_table, @@ -196,6 +197,32 @@ } +def get_scheduled_tasks_by_node(): + """ + Queries the cloud scheduler and returns a map of VSN -> [Plugin names across all running jobs for VSN] + """ + r = requests.get("https://es.sagecontinuum.org/api/v1/jobs/list") + r.raise_for_status() + jobs = list(r.json().values()) + + # filter only running jobs + jobs = [job for job in jobs if job["state"]["last_state"] == "Running"] + + tasks_by_node = {} + + for job in jobs: + plugins = job.get("plugins") or [] + nodes = job.get("nodes") or {} + + for plugin in plugins: + for vsn in nodes.keys(): + if vsn not in tasks_by_node: + tasks_by_node[vsn] = [] + tasks_by_node[vsn].append(plugin["name"]) + + return tasks_by_node + + def get_health_records_for_window(nodes, start, end, window): records = [] @@ -242,6 +269,8 @@ def add_device_health_check_record(vsn, device, value): vsn_groups = df.groupby(["meta.vsn"]) + scheduled_tasks_by_node = get_scheduled_tasks_by_node() + for node in nodes: try: df_vsn = vsn_groups.get_group(node.vsn) @@ -261,9 +290,15 @@ def check_publishing_frequency_for_device(device, window): except KeyError: yield task, name, 0.0 + scheduled_tasks = scheduled_tasks_by_node.get(node.vsn, []) + def check_publishing_sla_for_device(device, window, sla): healthy = True + for task, name, f in check_publishing_frequency_for_device(device, window): + # skip image and audio sampler tasks which are not scheduled + if "sampler" in task and task not in scheduled_tasks: + continue if f < sla: healthy = False logging.info( From 018878590312ed7b063c2fb31ce73bc07601872c Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 21 Sep 2023 16:49:36 -0500 Subject: [PATCH 02/12] fixed formatting --- rollup_health_and_sanity_metrics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 048dae3..992aa29 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -311,6 +311,7 @@ def check_publishing_sla_for_device(device, window, sla): name, f, ) + return healthy node_healthy = True From e324d5e282999b7b4e0a6e0a30cfc8ba9622e620 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 21 Sep 2023 22:17:01 -0500 Subject: [PATCH 03/12] ignoring sys.cooling sys.freq sys.gps metrics until we can finish debugging collection --- rollup_health_and_sanity_metrics.py | 68 ++++++++++++++++------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 992aa29..a2d0391 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -15,24 +15,30 @@ ) +# these metrics are coming in inconsistently. we should debug later +# but to make the health report less red, we'll comment them out. +# sys.cooling* +# sys.freq* +# sys.gps* + sys_from_nxcore = { "sys.boot_time", - "sys.cooling", - "sys.cooling_max", + # "sys.cooling", + # "sys.cooling_max", "sys.cpu_seconds", - "sys.freq.ape", - "sys.freq.cpu", - "sys.freq.cpu_max", - "sys.freq.cpu_min", - "sys.freq.cpu_perc", - "sys.freq.emc", - "sys.freq.emc_max", - "sys.freq.emc_min", - "sys.freq.emc_perc", - "sys.freq.gpu", - "sys.freq.gpu_max", - "sys.freq.gpu_min", - "sys.freq.gpu_perc", + # "sys.freq.ape", + # "sys.freq.cpu", + # "sys.freq.cpu_max", + # "sys.freq.cpu_min", + # "sys.freq.cpu_perc", + # "sys.freq.emc", + # "sys.freq.emc_max", + # "sys.freq.emc_min", + # "sys.freq.emc_perc", + # "sys.freq.gpu", + # "sys.freq.gpu_max", + # "sys.freq.gpu_min", + # "sys.freq.gpu_perc", "sys.fs.avail", "sys.fs.size", "sys.hwmon", @@ -59,7 +65,7 @@ # "sys.gps.epy", # not sent with no GPS fix # "sys.gps.epv", # not sent with no GPS fix # "sys.gps.satellites", # not sent with no GPS fix - "sys.gps.mode", + # "sys.gps.mode", } sys_from_dellblade = { @@ -103,22 +109,22 @@ sys_from_nxagent = { "sys.boot_time", - "sys.cooling", - "sys.cooling_max", + # "sys.cooling", + # "sys.cooling_max", "sys.cpu_seconds", - "sys.freq.ape", - "sys.freq.cpu", - "sys.freq.cpu_max", - "sys.freq.cpu_min", - "sys.freq.cpu_perc", - "sys.freq.emc", - "sys.freq.emc_max", - "sys.freq.emc_min", - "sys.freq.emc_perc", - "sys.freq.gpu", - "sys.freq.gpu_max", - "sys.freq.gpu_min", - "sys.freq.gpu_perc", + # "sys.freq.ape", + # "sys.freq.cpu", + # "sys.freq.cpu_max", + # "sys.freq.cpu_min", + # "sys.freq.cpu_perc", + # "sys.freq.emc", + # "sys.freq.emc_max", + # "sys.freq.emc_min", + # "sys.freq.emc_perc", + # "sys.freq.gpu", + # "sys.freq.gpu_max", + # "sys.freq.gpu_min", + # "sys.freq.gpu_perc", "sys.fs.avail", "sys.fs.size", "sys.hwmon", From cb28142e0f6506d793f8e89ae39e26ad946335a1 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Tue, 26 Sep 2023 11:48:07 -0500 Subject: [PATCH 04/12] bug fix: split sys tasks per device --- rollup_health_and_sanity_metrics.py | 20 ++++++++++++-------- utils.py | 27 +++++++++++++++++---------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index a2d0391..6935936 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -4,7 +4,6 @@ import logging import sage_data_client import requests -from typing import NamedTuple from utils import ( load_node_table, parse_time, @@ -188,10 +187,10 @@ # the possible devices on a node. the frequency is the minimum expected # publishing frequency device_output_table = { - "nxcore": [("sys", name, "120s") for name in sys_from_nxcore], - "nxagent": [("sys", name, "120s") for name in sys_from_nxagent], - "rpi": [("sys", name, "120s") for name in sys_from_rpi], - "dell": [("sys", name, "60s") for name in sys_from_dellblade], + "nxcore": [("nxcore", name, "120s") for name in sys_from_nxcore], + "nxagent": [("nxagent", name, "120s") for name in sys_from_nxagent], + "rpi": [("rpi", name, "120s") for name in sys_from_rpi], + "dell": [("dell", name, "60s") for name in sys_from_dellblade], "bme280": [("wes-iio-bme280", name, "30s") for name in outputs_from_bme], "bme680": [("wes-iio-bme680", name, "30s") for name in outputs_from_bme], "raingauge": [("wes-raingauge", name, "30s") for name in outputs_from_raingauge], @@ -269,9 +268,14 @@ def add_device_health_check_record(vsn, device, value): } ) - # NOTE metrics agent doesn't add a task name, so we set task name - # to system for system metrics. - df.loc[df["name"].str.startswith("sys."), "meta.task"] = "sys" + # NOTE derive task name from sys metrics using host + is_sys = df["name"].str.startswith("sys.") + df.loc[is_sys & df["meta.host"].str.endswith("nxcore"), "meta.task"] = "nxcore" + df.loc[is_sys & df["meta.host"].str.endswith("nxagent"), "meta.task"] = "nxagent" + # NOTE this will not really work for nodes with multiple rpis. we need to rethink this a bit + # in the future. for now, we want to fix the urgent problem of differentiating most sys metrics. + df.loc[is_sys & df["meta.host"].str.endswith("rpi"), "meta.task"] = "rpi" + df.loc[is_sys & df["meta.host"].str.endswith("sbcore"), "meta.task"] = "dell" vsn_groups = df.groupby(["meta.vsn"]) diff --git a/utils.py b/utils.py index f60d3ab..c006de9 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,13 @@ - import influxdb_client -from influxdb_client.client.write_api import WriteOptions, WritePrecision, Point, WriteType +from influxdb_client.client.write_api import ( + WriteOptions, + WritePrecision, + Point, + WriteType, +) import pandas as pd import requests -from typing import NamedTuple +from dataclasses import dataclass def write_results_to_influxdb(url, token, org, bucket, records): @@ -18,9 +22,14 @@ def write_results_to_influxdb(url, token, org, bucket, records): p = p.time(int(r["timestamp"].timestamp()), write_precision=WritePrecision.S) data.append(p) - with influxdb_client.InfluxDBClient(url=url, token=token, org=org) as client, \ - client.write_api(write_options=WriteOptions(batch_size=10000)) as write_api: - write_api.write(bucket=bucket, org=org, record=data, write_precision=WritePrecision.S) + with influxdb_client.InfluxDBClient( + url=url, token=token, org=org + ) as client, client.write_api( + write_options=WriteOptions(batch_size=10000) + ) as write_api: + write_api.write( + bucket=bucket, org=org, record=data, write_precision=WritePrecision.S + ) def check_publishing_frequency(df, freq, window): @@ -29,7 +38,8 @@ def check_publishing_frequency(df, freq, window): return total_samples / expected_samples -class Node(NamedTuple): +@dataclass +class Node: id: str vsn: str type: str @@ -54,11 +64,8 @@ def load_node_table_item(item): devices.add("nxagent") if item["shield"] is True: devices.add("rpi") - if item["shield"] is True: devices.add("raingauge") - if item["shield"] is True: devices.add("bme680") - if item["shield"] is True: devices.add("microphone") # add cameras From 5e1e4906eca02f645c9f9a35e90a080543fadf5e Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 28 Sep 2023 17:07:26 -0500 Subject: [PATCH 05/12] bumped sage-data-client to 0.7.1 to address loading large / small numbers --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e4b399b..b088a0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -sage-data-client==0.6.0 +sage-data-client==0.7.1 slackclient influxdb-client[ciso] requests From 00d949d9085b2ba13dd2435f48100dda313fd34f Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Thu, 28 Sep 2023 17:13:53 -0500 Subject: [PATCH 06/12] upped version python 3.11 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 2305cac..76602e3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8 +FROM python:3.11 WORKDIR /app COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt From 2664a415e553a0f3c04139c5fa4d3819d287a060 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:30:20 -0500 Subject: [PATCH 07/12] bumped sage-data-client version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b088a0f..783fbab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -sage-data-client==0.7.1 +sage-data-client==0.8.0 slackclient influxdb-client[ciso] requests From 3cfccda3ac9580ffd4994e658f128df9dbbefd05 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:32:17 -0500 Subject: [PATCH 08/12] INFLUXDB_ vars can use env vars. rename time_windows to get_time_windows and return list. added reverse flag. using experimental count func for plugin stats. --- rollup_health_and_sanity_metrics.py | 13 +++- rollup_plugin_counts.py | 110 ++++++++++++++++++---------- test_utils.py | 16 ++-- utils.py | 4 +- 4 files changed, 95 insertions(+), 48 deletions(-) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 6935936..70dc312 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -1,5 +1,6 @@ import argparse import os +from os import getenv import pandas as pd import logging import sage_data_client @@ -8,7 +9,7 @@ load_node_table, parse_time, get_rollup_range, - time_windows, + get_time_windows, write_results_to_influxdb, check_publishing_frequency, ) @@ -183,6 +184,10 @@ "env.raingauge.total_acc", } +# add a stuct here which either has count or interval so we can check this + +"nxcore": Check("nxcore", name, mean_publish_interval="3min") + # device_output_table describes the output publishing policy for each of # the possible devices on a node. the frequency is the minimum expected # publishing frequency @@ -426,8 +431,8 @@ def time_arg(s): ) if not args.dry_run: - INFLUXDB_URL = "https://influxdb.sagecontinuum.org" - INFLUXDB_ORG = "waggle" + INFLUXDB_URL = getenv("INFLUXDB_URL", "https://influxdb.sagecontinuum.org") + INFLUXDB_ORG = getenv("INFLUXDB_ORG", "waggle") INFLUXDB_TOKEN = os.environ["INFLUXDB_TOKEN"] nodes = load_node_table() @@ -436,7 +441,7 @@ def time_arg(s): logging.info("current time is %s", now) - for start, end in time_windows(start, end, window): + for start, end in get_time_windows(start, end, window): logging.info("getting health records in %s %s", start, end) health_records = get_health_records_for_window(nodes, start, end, window) diff --git a/rollup_plugin_counts.py b/rollup_plugin_counts.py index ccaa7f5..f0d6cef 100644 --- a/rollup_plugin_counts.py +++ b/rollup_plugin_counts.py @@ -1,70 +1,100 @@ import argparse import os +from os import getenv import pandas as pd import logging import sage_data_client -import json -from utils import load_node_table, parse_time, get_rollup_range, time_windows, write_results_to_influxdb +from utils import ( + load_node_table, + parse_time, + get_rollup_range, + get_time_windows, + write_results_to_influxdb, +) def get_plugin_counts_for_window(nodes, start, end, convert_timestamps=False): - df = sage_data_client.query(start=start, end=end, filter={ - "plugin": ".*" - }) + df = sage_data_client.query( + start=start, + end=end, + filter={"plugin": ".*"}, + experimental_func="count", + ) df["timestamp"] = df["timestamp"].dt.round("1h") - df["total"] = 1 + # experimental_func count returns the total counts as the value field + df["total"] = df["value"] # ignore records with "plugin.duration" for now df = df[df["name"].str.contains("plugin.duration") == False] - table = df.groupby(["meta.node", "meta.vsn", 'meta.plugin'])[["total"]].sum() + table = df.groupby(["meta.node", "meta.vsn", "meta.plugin"])[["total"]].sum() records = [] for node in nodes: try: r = table.loc[(node.id, node.vsn)] - plugin_totals = r["total"] + plugin_totals = r["total"] except KeyError: - continue # ignore if id & vsn combination not found + continue # ignore if id & vsn combination not found for plugin_name, total in plugin_totals.items(): - records.append({ - "measurement": "total", - "tags": { - "vsn": node.vsn, - "node": node.id, - "plugin": plugin_name - }, - "fields": { - "value": int(total), # is this necessary? - }, - "timestamp": start.isoformat()+'Z' if convert_timestamps else start, - }) + records.append( + { + "measurement": "total", + "tags": {"vsn": node.vsn, "node": node.id, "plugin": plugin_name}, + "fields": { + "value": int(total), # is this necessary? + }, + "timestamp": ( + start.isoformat() + "Z" if convert_timestamps else start + ), + } + ) return records def main(): - INFLUXDB_URL = "https://influxdb.sagecontinuum.org" - INFLUXDB_ORG = "waggle" - INFLUXDB_TOKEN = os.environ["INFLUXDB_TOKEN"] - now = pd.to_datetime("now", utc=True) + def time_arg(s): return parse_time(s, now=now) parser = argparse.ArgumentParser() - parser.add_argument("--start", default="-1h", type=time_arg, help="relative start time") + parser.add_argument( + "--dry-run", + action="store_true", + help="perform dry run to view logs. will skip writing results to influxdb.", + ) + parser.add_argument( + "--start", default="-1h", type=time_arg, help="relative start time" + ) parser.add_argument("--end", default="now", type=time_arg, help="relative end time") - parser.add_argument("--window", default="1h", type=pd.Timedelta, help="window duration to aggreagate over") + parser.add_argument( + "--window", + default="1h", + type=pd.Timedelta, + help="window duration to aggreagate over", + ) + parser.add_argument( + "--reverse", + action="store_true", + help="reverse the rollup starting so it works from most recent to least recent", + ) args = parser.parse_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s %(message)s", - datefmt="%Y/%m/%d %H:%M:%S") + datefmt="%Y/%m/%d %H:%M:%S", + ) + + if not args.dry_run: + INFLUXDB_URL = getenv("INFLUXDB_URL", "https://influxdb.sagecontinuum.org") + INFLUXDB_ORG = getenv("INFLUXDB_ORG", "waggle") + INFLUXDB_TOKEN = os.environ["INFLUXDB_TOKEN"] nodes = load_node_table() start, end = get_rollup_range(args.start, args.end) @@ -72,19 +102,25 @@ def time_arg(s): logging.info("current time is %s", now) - for start, end in time_windows(start, end, window): + time_windows = get_time_windows(start, end, window) + + if args.reverse: + time_windows = reversed(time_windows) + + for start, end in time_windows: logging.info("getting plugin counts for %s %s", start, end) records = get_plugin_counts_for_window(nodes, start, end) - # print(json.dumps(records)) - logging.info("writing %d sanity health records...", len(records)) - write_results_to_influxdb( - url=INFLUXDB_URL, - org=INFLUXDB_ORG, - token=INFLUXDB_TOKEN, - bucket="plugin-stats", - records=records) + if not args.dry_run: + logging.info("writing %d plugin stats records...", len(records)) + write_results_to_influxdb( + url=INFLUXDB_URL, + org=INFLUXDB_ORG, + token=INFLUXDB_TOKEN, + bucket="plugin-stats", + records=records, + ) logging.info("done!") diff --git a/test_utils.py b/test_utils.py index 5c61765..15c5b53 100644 --- a/test_utils.py +++ b/test_utils.py @@ -1,4 +1,4 @@ -from utils import load_node_table, parse_time, get_rollup_range, time_windows +from utils import load_node_table, parse_time, get_rollup_range, get_time_windows import pandas as pd import unittest @@ -18,18 +18,24 @@ def test_parse_time(self): now = datetime("2021-10-11 10:34:23") self.assertEqual(parse_time("-3h", now=now), datetime("2021-10-11 07:34:23")) # check absolute time - self.assertEqual(parse_time("2021-10-11 11:22:33", now=now), datetime("2021-10-11 11:22:33")) + self.assertEqual( + parse_time("2021-10-11 11:22:33", now=now), datetime("2021-10-11 11:22:33") + ) def test_get_rollup_range(self): now = datetime("2021-10-11 10:34:23") - start, end = get_rollup_range(parse_time("-3h", now=now), parse_time("-1h", now=now)) + start, end = get_rollup_range( + parse_time("-3h", now=now), parse_time("-1h", now=now) + ) self.assertEqual(start, datetime("2021-10-11 07:00:00")) self.assertEqual(end, datetime("2021-10-11 09:00:00")) def test_time_windows(self): now = datetime("2021-10-11 10:01:23") - start, end = get_rollup_range(parse_time("-3h", now=now), parse_time("-1h", now=now)) - windows = list(time_windows(start, end, "1h")) + start, end = get_rollup_range( + parse_time("-3h", now=now), parse_time("-1h", now=now) + ) + windows = list(get_time_windows(start, end, "1h")) expect = [ (datetime("2021-10-11 07:00:00"), datetime("2021-10-11 08:00:00")), (datetime("2021-10-11 08:00:00"), datetime("2021-10-11 09:00:00")), diff --git a/utils.py b/utils.py index c006de9..402d000 100644 --- a/utils.py +++ b/utils.py @@ -82,9 +82,9 @@ def load_node_table_item(item): ) -def time_windows(start, end, freq): +def get_time_windows(start, end, freq): windows = pd.date_range(start, end, freq=freq) - return zip(windows[:-1], windows[1:]) + return list(zip(windows[:-1], windows[1:])) def parse_time(s, now=None): From a6776a827cdc7e0a50f8ef183a4b4418639087a2 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:37:32 -0500 Subject: [PATCH 09/12] added bucket env vars (with same defaults as before). added --reverse flag to health / sanity rollup --- rollup_health_and_sanity_metrics.py | 20 ++++++++++++++++---- rollup_plugin_counts.py | 3 ++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 70dc312..19468fd 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -422,6 +422,11 @@ def time_arg(s): type=pd.Timedelta, help="window duration to aggreagate over", ) + parser.add_argument( + "--reverse", + action="store_true", + help="reverse the rollup starting so it works from most recent to least recent", + ) args = parser.parse_args() logging.basicConfig( @@ -434,6 +439,8 @@ def time_arg(s): INFLUXDB_URL = getenv("INFLUXDB_URL", "https://influxdb.sagecontinuum.org") INFLUXDB_ORG = getenv("INFLUXDB_ORG", "waggle") INFLUXDB_TOKEN = os.environ["INFLUXDB_TOKEN"] + INFLUXDB_BUCKET_HEALTH = getenv("INFLUXDB_BUCKET_HEALTH", "health-check-test") + INFLUXDB_BUCKET_SANITY = getenv("INFLUXDB_BUCKET_SANITY", "downsampled-test") nodes = load_node_table() start, end = get_rollup_range(args.start, args.end) @@ -441,7 +448,12 @@ def time_arg(s): logging.info("current time is %s", now) - for start, end in get_time_windows(start, end, window): + time_windows = get_time_windows(start, end, window) + + if args.reverse: + time_windows = reversed(time_windows) + + for start, end in time_windows: logging.info("getting health records in %s %s", start, end) health_records = get_health_records_for_window(nodes, start, end, window) @@ -451,7 +463,7 @@ def time_arg(s): url=INFLUXDB_URL, org=INFLUXDB_ORG, token=INFLUXDB_TOKEN, - bucket="health-check-test", + bucket=INFLUXDB_BUCKET_HEALTH, records=health_records, ) @@ -459,12 +471,12 @@ def time_arg(s): sanity_records = get_sanity_records_for_window(nodes, start, end) if not args.dry_run: - logging.info("writing %d sanity health records...", len(sanity_records)) + logging.info("writing %d sanity records...", len(sanity_records)) write_results_to_influxdb( url=INFLUXDB_URL, org=INFLUXDB_ORG, token=INFLUXDB_TOKEN, - bucket="downsampled-test", + bucket=INFLUXDB_BUCKET_SANITY, records=sanity_records, ) diff --git a/rollup_plugin_counts.py b/rollup_plugin_counts.py index f0d6cef..c5c38a6 100644 --- a/rollup_plugin_counts.py +++ b/rollup_plugin_counts.py @@ -95,6 +95,7 @@ def time_arg(s): INFLUXDB_URL = getenv("INFLUXDB_URL", "https://influxdb.sagecontinuum.org") INFLUXDB_ORG = getenv("INFLUXDB_ORG", "waggle") INFLUXDB_TOKEN = os.environ["INFLUXDB_TOKEN"] + INFLUXDB_BUCKET = getenv("INFLUXDB_BUCKET", "plugin-stats") nodes = load_node_table() start, end = get_rollup_range(args.start, args.end) @@ -118,7 +119,7 @@ def time_arg(s): url=INFLUXDB_URL, org=INFLUXDB_ORG, token=INFLUXDB_TOKEN, - bucket="plugin-stats", + bucket=INFLUXDB_BUCKET, records=records, ) From a85c40c90461349951b076eccbf13e7afdd33995 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:47:43 -0500 Subject: [PATCH 10/12] fixed line which was supposed to be a comment --- rollup_health_and_sanity_metrics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rollup_health_and_sanity_metrics.py b/rollup_health_and_sanity_metrics.py index 19468fd..932dda0 100644 --- a/rollup_health_and_sanity_metrics.py +++ b/rollup_health_and_sanity_metrics.py @@ -185,8 +185,7 @@ } # add a stuct here which either has count or interval so we can check this - -"nxcore": Check("nxcore", name, mean_publish_interval="3min") +# "nxcore": Check("nxcore", name, mean_publish_interval="3min") # device_output_table describes the output publishing policy for each of # the possible devices on a node. the frequency is the minimum expected From c0f0aa3b2605625e8ad72c7cfae22b0cf2692ab4 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:48:54 -0500 Subject: [PATCH 11/12] added gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7275bb --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ From fd2148e08584e9815edb3072f99b5f4ca3d3fe25 Mon Sep 17 00:00:00 2001 From: Sean Shahkarami Date: Wed, 17 Jul 2024 13:49:26 -0500 Subject: [PATCH 12/12] update .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f7275bb..d0ee3b1 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ venv/ +__pycache__/ +*.pyc