diff --git a/docs/book/src/plugins/grafana-v1-alpha.md b/docs/book/src/plugins/grafana-v1-alpha.md index 92ca74e5c27..9f8c277be0b 100644 --- a/docs/book/src/plugins/grafana-v1-alpha.md +++ b/docs/book/src/plugins/grafana-v1-alpha.md @@ -59,9 +59,9 @@ See an example of how to use the plugin in your project: 1. Copy the JSON file 2. Visit `/dashboard/import` to [import a new dashboard](https://grafana.com/docs/grafana/latest/dashboards/export-import/#import-dashboard). 3. Paste the JSON content to `Import via panel json`, then press `Load` button - Screen Shot 2022-06-28 at 3 40 22 AM + 4. Select the data source for Prometheus metrics - Screen Shot 2022-06-28 at 3 41 26 AM + 5. Once the json is imported in Grafana, the dashboard is ready. ### Grafana Dashboard @@ -77,7 +77,7 @@ See an example of how to use the plugin in your project: - Description: - Per-second rate of total reconciliation as measured over the last 5 minutes - Per-second rate of reconciliation errors as measured over the last 5 minutes -- Sample: +- Sample: #### Controller CPU & Memory Usage @@ -90,7 +90,7 @@ See an example of how to use the plugin in your project: - Description: - Per-second rate of CPU usage as measured over the last 5 minutes - Allocated Memory for the running controller -- Sample: +- Sample: #### Seconds of P50/90/99 Items Stay in Work Queue @@ -100,7 +100,7 @@ See an example of how to use the plugin in your project: - histogram_quantile(0.50, sum(rate(workqueue_queue_duration_seconds_bucket{job="$job", namespace="$namespace"}[5m])) by (instance, name, le)) - Description - Seconds an item stays in workqueue before being requested. -- Sample: +- Sample: #### Seconds of P50/90/99 Items Processed in Work Queue @@ -120,7 +120,7 @@ See an example of how to use the plugin in your project: - sum(rate(workqueue_adds_total{job="$job", namespace="$namespace"}[5m])) by (instance, name) - Description - Per-second rate of items added to work queue -- Sample: +- Sample: #### Retries Rate in Work Queue @@ -130,7 +130,37 @@ See an example of how to use the plugin in your project: - sum(rate(workqueue_retries_total{job="$job", namespace="$namespace"}[5m])) by (instance, name) - Description - Per-second rate of retries handled by workqueue -- Sample: +- Sample: + +#### Number of Workers in Use + +- Metrics + - controller_runtime_active_workers +- Query: + - controller_runtime_active_workers{job="$job", namespace="$namespace"} +- Description + - The number of active controller workers +- Sample: + +#### WorkQueue Depth + +- Metrics + - workqueue_depth +- Query: + - workqueue_depth{job="$job", namespace="$namespace"} +- Description + - Current depth of workqueue +- Sample: + +#### Unfinished Seconds + +- Metrics + - workqueue_unfinished_work_seconds +- Query: + - rate(workqueue_unfinished_work_seconds{job="$job", namespace="$namespace"}[5m]) +- Description + - How many seconds of work has done that is in progress and hasn't been observed by work_duration. +- Sample: ### Visualize Custom Metrics diff --git a/pkg/plugins/optional/grafana/v1alpha/scaffolds/internal/templates/runtime.go b/pkg/plugins/optional/grafana/v1alpha/scaffolds/internal/templates/runtime.go index 85321d63005..01c2e856fe2 100644 --- a/pkg/plugins/optional/grafana/v1alpha/scaffolds/internal/templates/runtime.go +++ b/pkg/plugins/optional/grafana/v1alpha/scaffolds/internal/templates/runtime.go @@ -105,6 +105,62 @@ const controllerRuntimeTemplate = `{ "title": "Reconciliation Metrics", "type": "row" }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 24, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "controller_runtime_active_workers{job=\"$job\", namespace=\"$namespace\"}", + "interval": "", + "legendFormat": "{{controller}} {{instance}}", + "refId": "A" + } + ], + "title": "Number of workers in use", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "Total number of reconciliations per controller", @@ -114,6 +170,8 @@ const controllerRuntimeTemplate = `{ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -160,17 +218,18 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, + "h": 8, + "w": 11, + "x": 3, "y": 1 }, "id": 7, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -201,6 +260,8 @@ const controllerRuntimeTemplate = `{ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -247,17 +308,18 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, + "h": 8, + "w": 10, + "x": 14, "y": 1 }, "id": 6, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -285,13 +347,69 @@ const controllerRuntimeTemplate = `{ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 9 }, "id": 11, "panels": [], "title": "Work Queue Metrics", "type": "row" }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 10 + }, + "id": 22, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "workqueue_depth{job=\"$job\", namespace=\"$namespace\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "WorkQueue Depth", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "How long in seconds an item stays in workqueue before being requested", @@ -301,6 +419,8 @@ const controllerRuntimeTemplate = `{ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -347,10 +467,10 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 9 + "h": 8, + "w": 11, + "x": 3, + "y": 10 }, "id": 13, "options": { @@ -359,8 +479,9 @@ const controllerRuntimeTemplate = `{ "max", "mean" ], - "displayMode": "list", - "placement": "right" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -406,6 +527,8 @@ const controllerRuntimeTemplate = `{ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -452,17 +575,18 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 9 + "h": 8, + "w": 10, + "x": 14, + "y": 10 }, "id": 15, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -483,6 +607,64 @@ const controllerRuntimeTemplate = `{ "title": "Work Queue Add Rate", "type": "timeseries" }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "How many seconds of work has done that is in progress and hasn't been observed by work_duration.\nLarge values indicate stuck threads.\nOne can deduce the number of stuck threads by observing the rate at which this increases.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 0, + "y": 18 + }, + "id": 23, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "rate(workqueue_unfinished_work_seconds{job=\"$job\", namespace=\"$namespace\"}[5m])", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Unfinished Seconds", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "How long in seconds processing an item from workqueue takes.", @@ -492,6 +674,8 @@ const controllerRuntimeTemplate = `{ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -538,10 +722,10 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 16 + "h": 9, + "w": 11, + "x": 3, + "y": 18 }, "id": 19, "options": { @@ -551,7 +735,8 @@ const controllerRuntimeTemplate = `{ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -598,6 +783,8 @@ const controllerRuntimeTemplate = `{ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -644,17 +831,18 @@ const controllerRuntimeTemplate = `{ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 16 + "h": 9, + "w": 10, + "x": 14, + "y": 18 }, "id": 17, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", diff --git a/testdata/project-v4-with-grafana/grafana/controller-runtime-metrics.json b/testdata/project-v4-with-grafana/grafana/controller-runtime-metrics.json index 70023a42d82..c8eea4cb434 100644 --- a/testdata/project-v4-with-grafana/grafana/controller-runtime-metrics.json +++ b/testdata/project-v4-with-grafana/grafana/controller-runtime-metrics.json @@ -58,6 +58,62 @@ "title": "Reconciliation Metrics", "type": "row" }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 24, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "controller_runtime_active_workers{job=\"$job\", namespace=\"$namespace\"}", + "interval": "", + "legendFormat": "{{controller}} {{instance}}", + "refId": "A" + } + ], + "title": "Number of workers in use", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "Total number of reconciliations per controller", @@ -67,6 +123,8 @@ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -113,17 +171,18 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, + "h": 8, + "w": 11, + "x": 3, "y": 1 }, "id": 7, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -154,6 +213,8 @@ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -200,17 +261,18 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, + "h": 8, + "w": 10, + "x": 14, "y": 1 }, "id": 6, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -238,13 +300,69 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 9 }, "id": 11, "panels": [], "title": "Work Queue Metrics", "type": "row" }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 3, + "x": 0, + "y": 10 + }, + "id": 22, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "workqueue_depth{job=\"$job\", namespace=\"$namespace\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "WorkQueue Depth", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "How long in seconds an item stays in workqueue before being requested", @@ -254,6 +372,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -300,10 +420,10 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 9 + "h": 8, + "w": 11, + "x": 3, + "y": 10 }, "id": 13, "options": { @@ -312,8 +432,9 @@ "max", "mean" ], - "displayMode": "list", - "placement": "right" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -359,6 +480,8 @@ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -405,17 +528,18 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 9 + "h": 8, + "w": 10, + "x": 14, + "y": 10 }, "id": 15, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -436,6 +560,64 @@ "title": "Work Queue Add Rate", "type": "timeseries" }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "How many seconds of work has done that is in progress and hasn't been observed by work_duration.\nLarge values indicate stuck threads.\nOne can deduce the number of stuck threads by observing the rate at which this increases.", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 3, + "x": 0, + "y": 18 + }, + "id": 23, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.5.3", + "targets": [ + { + "datasource": "${DS_PROMETHEUS}", + "exemplar": true, + "expr": "rate(workqueue_unfinished_work_seconds{job=\"$job\", namespace=\"$namespace\"}[5m])", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Unfinished Seconds", + "type": "gauge" + }, { "datasource": "${DS_PROMETHEUS}", "description": "How long in seconds processing an item from workqueue takes.", @@ -445,6 +627,8 @@ "mode": "palette-classic" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -491,10 +675,10 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 16 + "h": 9, + "w": 11, + "x": 3, + "y": 18 }, "id": 19, "options": { @@ -504,7 +688,8 @@ "mean" ], "displayMode": "table", - "placement": "right" + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single", @@ -551,6 +736,8 @@ "mode": "continuous-GrYlRd" }, "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, @@ -597,17 +784,18 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 16 + "h": 9, + "w": 10, + "x": 14, + "y": 18 }, "id": 17, "options": { "legend": { "calcs": [], - "displayMode": "list", - "placement": "bottom" + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, "tooltip": { "mode": "single",