diff --git a/deployments/with-creds/metrics/README.md b/deployments/with-creds/metrics/README.md index 158b476..e984237 100644 --- a/deployments/with-creds/metrics/README.md +++ b/deployments/with-creds/metrics/README.md @@ -1,13 +1,16 @@ # Metrics -The `metrics` deployment consists of the whole configuration and stack for monitoring targets that run in the `hush-house` cluster. +The `metrics` deployment consists of the whole configuration and stack for monitoring targets that run in the `hush-house` Kubernetes cluster. It's composed of two dependent charts: - [prometheus](https://github.com/helm/charts/blob/master/stable/prometheus/README.md); and - [grafana](https://github.com/helm/charts/blob/master/stable/grafana/README.md). -And a publicly accessible URL for dashboards: https://metrics-hush-house.concourse-ci.org/dashboards +And a publicly accessible URL for dashboards: +- https://metrics-hush-house.concourse-ci.org/dashboards + +ps.: this stack is supposed to monitor *ALL* deployments in the cluster, not only `hush-house`. ## Prometheus @@ -25,21 +28,15 @@ To have **services** from your deployment scraped by the Prometheus instance man - `prometheus.io/path`: If the metrics path is not `/metrics` override this; and - `prometheus.io/port`: If the metrics are exposed on a different port to the service then set this appropriately. -To have **pods** scraped: - -- `prometheus.io/scrape`: Only scrape pods that have a value of `true`; -- `prometheus.io/path`: If the metrics path is not `/metrics` override this; and -- `prometheus.io/port`: Scrape the pod on the indicated port instead of the default of `9102`. - ### Accessing the console Not being publicly exposed, its console can be accessed through port-forwarding: ```sh -# Set up a local proxy that allows us to connect -# to the `metrics-prometheus-server` service by -# hitting `127.0.0.1:9090`. +# Set up a local proxy that allows us to connect # to the +# `metrics-prometheus-server` service by # hitting `127.0.0.1:9090`. +# kubectl port-forward \ --namespace metrics \ service/metrics-prometheus-server \ @@ -70,5 +67,3 @@ The dashboards can be found under [`./dashboards`](./dashboards), while the temp Given that all of the state lives under [`./dashboards`](./dashboards) and in-place updates are not allowed in the Grafana Web UI, to update a dashboard, copy the JSON configuration and paste it under the corresponding dashboard. Once a revision gets created, the `ConfigMap` update will be noticed by a [sidecar](https://kubernetes.io/docs/concepts/workloads/pods/pod-overview/) container under the Grafana pod and update the instance to catch up with the update. - -NOTE: There's an intermittent [bug](https://github.com/helm/helm/issues/3785) with helm not correctly calculating the patch for configMaps. You'll have to delete the configMap for the dashboard you want to update before running `make deploy-metrics`. If grafana still doesn't show your changes then delete the grafana pod and let k8s controller bring it back. diff --git a/deployments/with-creds/metrics/dashboards/concourse/concourse.json b/deployments/with-creds/metrics/dashboards/concourse/concourse.json index 6923f05..6ae7379 100644 --- a/deployments/with-creds/metrics/dashboards/concourse/concourse.json +++ b/deployments/with-creds/metrics/dashboards/concourse/concourse.json @@ -17,7 +17,8 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "iteration": 1558180667167, + "id": 4, + "iteration": 1574455228918, "links": [ { "icon": "info", @@ -32,6 +33,7 @@ "panels": [ { "collapsed": false, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -45,6 +47,7 @@ }, { "content": "1. **Overall**: not specific to a particular instance, represents the internal work that Concourse as a service performs;\n2. **Web**: per-ATC statistics for throubleshooting ATC problems on both container and application level; and\n3. **Workers**: per-worker container-level statistics.\n\nRegardless of the section you're looking at, you should expect:\n\n- Each panel contains a description associated with it - hover over the `i` icon on the top left of each panel to know more about the metric being kept track of;\n- A threshold that identifies what is considered *good* behavior for such telemetry (if none, see the panel description)\n", + "datasource": null, "gridPos": { "h": 6, "w": 12, @@ -54,12 +57,14 @@ "id": 41, "links": [], "mode": "markdown", + "options": {}, "title": "About the sections", "transparent": true, "type": "text" }, { "content": "This row is intended for the operator to better understand at a glance what is the load that each worker is under at.\n\nThe metrics displayed there are from the perspective of both the [pod][k8s-pod] and/or container (not the host).\n\n**ATTENTION**: For workers that are external to the cluster, K8S metrics (like CPU usage) can't be retrieved - only those that are exported by ATC can be fetched.\n\n\n[k8s-pod]: https://kubernetes.io/docs/concepts/workloads/pods/pod/\n\n", + "datasource": null, "gridPos": { "h": 4, "w": 12, @@ -69,12 +74,14 @@ "id": 15, "links": [], "mode": "markdown", + "options": {}, "title": "Workers section", "transparent": true, "type": "text" }, { "content": "At this moment, the environments accessible here are being maintained by the Concourse for PCF team in Toronto.\n\nYou can reach us at the `concourse-hush-house` Slack channel.", + "datasource": null, "gridPos": { "h": 4, "w": 12, @@ -84,12 +91,14 @@ "id": 44, "links": [], "mode": "markdown", + "options": {}, "title": "Contact", "transparent": true, "type": "text" }, { "content": "Metrics like memory and CPU usage are always relative to the total amount of resources conceded to the container, thus, \ncontainers without resource limits **don't** have their resources tracked. ", + "datasource": null, "gridPos": { "h": 3, "w": 12, @@ -99,12 +108,14 @@ "id": 51, "links": [], "mode": "markdown", + "options": {}, "title": "Container resource metrics", "transparent": true, "type": "text" }, { "collapsed": false, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -122,45 +133,63 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 1, + "fillGradient": 3, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 11 }, - "id": 28, + "id": 56, "legend": { - "alignAsTable": true, "avg": false, - "current": true, - "hideEmpty": true, - "max": true, - "min": true, + "current": false, + "max": false, + "min": false, "show": false, - "sort": "current", - "sortDesc": true, "total": false, - "values": true + "values": false }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, - "pointradius": 0.5, - "points": true, + "pointradius": 1, + "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "succeeded", + "color": "#73BF69" + }, + { + "alias": "aborted", + "color": "rgb(255, 151, 0)" + }, + { + "alias": "errored", + "color": "#F2495C" + }, + { + "alias": "failed", + "color": "#FADE2A" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.995, sum by(route,le)(rate(concourse_http_responses_duration_seconds_bucket{kubernetes_namespace=~\"$namespace\"}[10m])))", + "expr": "sum by (status) (increase(concourse_builds_finished{kubernetes_pod_name=~\"$atc\",kubernetes_namespace=~\"$namespace\"}[$period]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "{{ route }}", + "legendFormat": "{{ status }}", "refId": "A" } ], @@ -168,7 +197,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "q99 response times", + "title": "Builds /$period", "tooltip": { "shared": true, "sort": 2, @@ -184,12 +213,11 @@ }, "yaxes": [ { - "decimals": 2, - "format": "s", + "format": "short", "label": null, - "logBase": 1, + "logBase": 2, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -211,43 +239,57 @@ "bars": false, "dashLength": 10, "dashes": false, - "description": "", - "fill": 1, + "datasource": null, + "fill": 0, + "fillGradient": 0, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 11 }, - "id": 20, + "id": 50, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, + "hideZero": true, "max": true, "min": true, "show": false, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, - "pointradius": 0.5, - "points": true, + "pointradius": 1, + "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/^avg.*/", + "color": "rgba(0, 255, 87, 0.33)", + "dashes": true + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "max by (team, pipeline) (rate(concourse_scheduling_loading_duration_seconds_total{kubernetes_namespace=~\"$namespace\"}[5m]))", + "expr": "sum by (kubernetes_pod_name) (increase(concourse_scheduling_total{kubernetes_namespace=~\"$namespace\"}[$period]))", "format": "time_series", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{ kubernetes_pod_name }}", "refId": "A" } ], @@ -255,7 +297,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Loading versions duration", + "title": "Schedulings /$period", "tooltip": { "shared": true, "sort": 2, @@ -271,9 +313,9 @@ }, "yaxes": [ { - "decimals": 3, - "format": "s", - "label": null, + "decimals": 1, + "format": "short", + "label": "", "logBase": 1, "max": null, "min": "0", @@ -298,31 +340,39 @@ "bars": false, "dashLength": 10, "dashes": false, - "fill": 1, + "datasource": null, + "fill": 0, + "fillGradient": 0, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 11 }, - "id": 19, + "id": 28, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": true, "max": true, "min": true, "show": false, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 0.5, - "points": true, + "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, @@ -330,11 +380,10 @@ "steppedLine": false, "targets": [ { - "expr": "max by (team, pipeline) (rate(concourse_scheduling_full_duration_seconds_total{kubernetes_namespace=~\"$namespace\"}[5m]))", + "expr": "topk($top, histogram_quantile(\n\t0.995, \n\tsum by (route,le) (rate(\n\t\tconcourse_http_responses_duration_seconds_bucket{\n\t\t\tkubernetes_namespace=~\"$namespace\"\n\t\t}[10m]))\n\t)\n)", "format": "time_series", - "instant": false, "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{ route }}", "refId": "A" } ], @@ -342,7 +391,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Full scheduling duration", + "title": "q99 response times (top$top)", "tooltip": { "shared": true, "sort": 2, @@ -385,14 +434,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "fill": 0, + "datasource": "stackdriver", + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 19 }, - "id": 56, + "id": 59, "legend": { "avg": false, "current": false, @@ -406,469 +457,183 @@ "linewidth": 1, "links": [], "nullPointMode": "null", - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "succeeded", - "color": "#73BF69" - }, - { - "alias": "aborted", - "color": "rgb(255, 151, 0)" - }, - { - "alias": "errored", - "color": "#F2495C" - }, - { - "alias": "failed", - "color": "#FADE2A" - }, - { - "alias": "/^avg success.*/", - "color": "rgba(0, 255, 56, 0.36)", - "dashes": true - }, - { - "alias": "/^avg failure.*/", - "color": "rgba(255, 245, 0, 0.45)", - "dashes": true - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (status) (irate(concourse_builds_finished{kubernetes_pod_name=~\"$atc\",kubernetes_namespace=~\"$namespace\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ status }}", - "refId": "A" - }, - { - "expr": "sum (rate(concourse_builds_finished{kubernetes_namespace=~\"$namespace\",status=\"succeeded\"}[24h]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg success (24h)", - "refId": "B" - }, - { - "expr": "sum (rate(concourse_builds_finished{kubernetes_namespace=~\"$namespace\",status=\"failed\"}[24h]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg failure (24h)", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Builds /s", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "fill": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 19 - }, - "id": 49, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true + "options": { + "dataLinks": [] }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", "percentage": false, - "pointradius": 1, + "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/^avg.*/", - "color": "rgba(0, 255, 117, 0.35)", - "dashes": true - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (kubernetes_pod_name) (irate(concourse_resource_checks_total{kubernetes_namespace=~\"$namespace\"}[3m]))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "{{ kubernetes_pod_name }}", - "refId": "A" - }, - { - "expr": "sum (rate(concourse_resource_checks_total{kubernetes_namespace=~\"$namespace\"}[24h]))\n /\ncount(count by (kubernetes_pod_name) (concourse_resource_checks_total))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "avg (24h)", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Resource checks /s", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "fill": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 19 - }, - "id": 48, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": false, - "max": true, - "min": true, - "show": false, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 1, - "points": true, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.999, sum by(pipeline,team,le)(rate(concourse_builds_duration_seconds_bucket{kubernetes_namespace=~\"$namespace\"}[3m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "q999 build finish times", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "fill": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 27 - }, - "id": 50, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "/^avg.*/", - "color": "rgba(0, 255, 87, 0.33)", - "dashes": true - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (kubernetes_pod_name) (irate(concourse_scheduling_total{kubernetes_namespace=~\"$namespace\"}[3m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ kubernetes_pod_name }}", - "refId": "A" - }, - { - "expr": "sum (rate(concourse_scheduling_total{kubernetes_namespace=~\"$namespace\"}[24h]))\n /\ncount(count by (kubernetes_pod_name) (concourse_resource_checks_total))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "avg (24h)", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Schedulings /s", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "fill": 0, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 27 - }, - "id": 53, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "show": false, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "bars": false, - "color": "rgba(6, 255, 221, 0.29)", - "lines": true - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": true, - "targets": [ - { - "expr": "sum by (message) (rate(concourse_error_logs{kubernetes_pod_name=~\"$atc\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ message }}", - "refId": "A" - }, - { - "expr": "sum (rate(concourse_error_logs{kubernetes_pod_name=~\"$atc\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "total", - "refId": "B" + "aliasBy": "{{ metric.label.action }}", + "alignOptions": [ + { + "expanded": true, + "label": "Alignment options", + "options": [ + { + "label": "delta", + "metricKinds": [ + "CUMULATIVE", + "DELTA" + ], + "text": "delta", + "value": "ALIGN_DELTA", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY", + "DISTRIBUTION" + ] + }, + { + "label": "rate", + "metricKinds": [ + "CUMULATIVE", + "DELTA" + ], + "text": "rate", + "value": "ALIGN_RATE", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + }, + { + "label": "min", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "min", + "value": "ALIGN_MIN", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + }, + { + "label": "max", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "max", + "value": "ALIGN_MAX", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + }, + { + "label": "mean", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "mean", + "value": "ALIGN_MEAN", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + }, + { + "label": "count", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "count", + "value": "ALIGN_COUNT", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY", + "BOOL" + ] + }, + { + "label": "sum", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "sum", + "value": "ALIGN_SUM", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY", + "DISTRIBUTION" + ] + }, + { + "label": "stddev", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "stddev", + "value": "ALIGN_STDDEV", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + }, + { + "label": "percent change", + "metricKinds": [ + "GAUGE", + "DELTA" + ], + "text": "percent change", + "value": "ALIGN_PERCENT_CHANGE", + "valueTypes": [ + "INT64", + "DOUBLE", + "MONEY" + ] + } + ] + } + ], + "alignmentPeriod": "stackdriver-auto", + "crossSeriesReducer": "REDUCE_MEAN", + "defaultProject": "loading project...", + "filters": [], + "groupBys": [ + "metric.label.action" + ], + "lastQuery": "aggregation.alignmentPeriod=+60s&aggregation.crossSeriesReducer=REDUCE_MEAN&aggregation.groupByFields=metric.label.action&aggregation.perSeriesAligner=ALIGN_DELTA&filter=metric.type=\"logging.googleapis.com/user/worker-errors\"&interval.endTime=2019-05-18T12:02:01Z&interval.startTime=2019-05-18T06:02:01Z&view=FULL", + "lastQueryError": "", + "metricKind": "DELTA", + "metricType": "logging.googleapis.com/user/worker-errors", + "perSeriesAligner": "ALIGN_DELTA", + "refId": "A", + "service": "", + "usedAlignmentPeriod": 60, + "valueType": "INT64" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Web error rate (/s)", + "title": "Web errors", "tooltip": { "shared": true, - "sort": 2, + "sort": 0, "value_type": "individual" }, "type": "graph", @@ -881,12 +646,11 @@ }, "yaxes": [ { - "decimals": 1, "format": "short", - "label": "", + "label": null, "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -910,11 +674,12 @@ "dashes": false, "datasource": "stackdriver", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 8, - "x": 16, - "y": 27 + "x": 8, + "y": 19 }, "id": 58, "legend": { @@ -930,6 +695,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 2, "points": false, @@ -1137,8 +905,203 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 19 + }, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "show": false, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^avg.*/", + "color": "rgba(0, 255, 117, 0.35)", + "dashes": true + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (kubernetes_pod_name) (irate(concourse_resource_checks_total{kubernetes_namespace=~\"$namespace\"}[3m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{ kubernetes_pod_name }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Resource checks /s", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 0, + "fillGradient": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 27 + }, + "id": 19, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "show": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($top, max by (pipeline) (\n\trate(concourse_scheduling_full_duration_seconds_total{\n\t\tkubernetes_namespace=~\"$namespace\"\n\t}[$period]))\n)", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "{{pipeline}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scheduling Duration (top$top)", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 2, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -1152,12 +1115,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 27 + "y": 12 }, "id": 32, "legend": { @@ -1176,6 +1141,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1281,12 +1249,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 27 + "y": 12 }, "id": 11, "legend": { @@ -1303,6 +1273,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1419,12 +1392,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 27 + "y": 12 }, "id": 29, "legend": { @@ -1443,6 +1418,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1524,12 +1502,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 34 + "y": 19 }, "id": 37, "legend": { @@ -1546,6 +1526,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1610,12 +1593,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 34 + "y": 19 }, "id": 33, "legend": { @@ -1632,6 +1617,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1697,12 +1685,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 34 + "y": 19 }, "id": 36, "legend": { @@ -1719,6 +1709,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1783,12 +1776,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 41 + "y": 26 }, "id": 7, "legend": { @@ -1808,6 +1803,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1820,6 +1818,10 @@ { "alias": "/^backend: .*/", "color": "#FADE2A" + }, + { + "alias": "/^gc: .*/", + "color": "#73BF69" } ], "spaceLength": 10, @@ -1882,12 +1884,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 41 + "y": 26 }, "id": 38, "legend": { @@ -1904,6 +1908,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1968,12 +1975,14 @@ "bars": true, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 41 + "y": 26 }, "id": 52, "legend": { @@ -1990,6 +1999,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 1, "points": false, @@ -2056,6 +2068,7 @@ }, { "collapsed": true, + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -2103,12 +2116,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 28 + "y": 13 }, "id": 13, "legend": { @@ -2127,6 +2142,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, "pointradius": 5, @@ -2209,12 +2227,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 28 + "y": 13 }, "id": 3, "legend": { @@ -2231,6 +2251,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, "pointradius": 1, @@ -2335,12 +2358,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 28 + "y": 13 }, "id": 2, "legend": { @@ -2357,6 +2382,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, "pointradius": 1, @@ -2437,12 +2465,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 0, - "y": 35 + "y": 20 }, "id": 30, "legend": { @@ -2461,25 +2491,37 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/^TX: .*/", + "color": "#FADE2A" + }, + { + "alias": "/^RX: .*/", + "color": "#5794F2" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name=~\"$worker\",image!=\"\"}[2m])) * 8", + "expr": "topk($top,\n\tsum by (pod_name) (irate(container_network_receive_bytes_total{\n\t\tpod_name=~\"$worker\",\n\t\timage!=\"\"\n\t}[2m])) * 8\n)", "format": "time_series", "intervalFactor": 2, "legendFormat": "RX: {{ pod_name }}", "refId": "A" }, { - "expr": "sum by (pod_name) (irate(container_network_transmit_bytes_total{pod_name=~\"$worker\",image!=\"\"}[2m])) * 8", + "expr": "topk($top,\n\tsum by (pod_name) (irate(container_network_transmit_bytes_total{\n\t\tpod_name=~\"$worker\",\n\t\timage!=\"\"\n\t}[2m])) * 8\n)", "format": "time_series", "intervalFactor": 2, "legendFormat": "TX: {{ pod_name }}", @@ -2490,7 +2532,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Network RX/TX", + "title": "Network RX/TX (top$top)", "tooltip": { "shared": true, "sort": 2, @@ -2572,14 +2614,16 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "decimals": null, "description": "[Persistent Volume][pv] disk utilization as seen by the [Kubelet][kubelet] in the node that runs the Concourse worker [pod][pod].\n\n[pod]: https://kubernetes.io/docs/concepts/workloads/pods/pod/\n\n[pv]: https://kubernetes.io/docs/concepts/storage/persistent-volumes/\n\n[kubelet]: https://kubernetes.io/docs/concepts/overview/components/#kubelet", "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 8, - "y": 35 + "y": 20 }, "id": 46, "legend": { @@ -2596,6 +2640,9 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, "pointradius": 5, @@ -2607,7 +2654,7 @@ "steppedLine": false, "targets": [ { - "expr": "1 - (kubelet_volume_stats_available_bytes{persistentvolumeclaim=~\"concourse-work-dir-($worker)\"} / kubelet_volume_stats_capacity_bytes)", + "expr": "topk($top,\n\t1 - (\n\t\tkubelet_volume_stats_available_bytes{\n\t\t\tpersistentvolumeclaim=~\"concourse-work-dir-($worker)\"\n\t\t} / kubelet_volume_stats_capacity_bytes\n\t)\n)", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -2677,12 +2724,14 @@ "bars": false, "dashLength": 10, "dashes": false, + "datasource": null, "fill": 0, + "fillGradient": 0, "gridPos": { "h": 7, "w": 8, "x": 16, - "y": 35 + "y": 20 }, "id": 39, "legend": { @@ -2699,9 +2748,12 @@ "linewidth": 1, "links": [], "nullPointMode": "null", + "options": { + "dataLinks": [] + }, "paceLength": 10, "percentage": false, - "pointradius": 5, + "pointradius": 1, "points": false, "renderer": "flot", "seriesOverrides": [], @@ -2710,7 +2762,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (pod_name) (irate(container_cpu_usage_seconds_total{pod_name=~\"$worker\",container_name!=\"POD\",image!=\"\"}[2m])) \n / \n(sum by (pod_name) (container_spec_cpu_quota{pod_name=~\"$worker\",container_name!=\"POD\",image!=\"\"})/100000)", + "expr": "topk($top,\n\tsum by (pod_name) (rate(container_cpu_usage_seconds_total{pod_name=~\"$worker\",container_name!=\"POD\",image!=\"\"}[5m])) \n\t\t/ \n\t(sum by (pod_name) (container_spec_cpu_quota{pod_name=~\"$worker\",container_name!=\"POD\",image!=\"\"})/100000)\n)", "format": "time_series", "instant": false, "intervalFactor": 2, @@ -2722,7 +2774,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "CPU Usage (top$top)", "tooltip": { "shared": true, "sort": 2, @@ -2767,7 +2819,7 @@ } ], "refresh": "1m", - "schemaVersion": 18, + "schemaVersion": 20, "style": "dark", "tags": [], "templating": { @@ -2775,15 +2827,16 @@ { "allValue": null, "current": { - "text": "hush-house", - "value": "hush-house" + "tags": [], + "text": "ci", + "value": "ci" }, "datasource": "prometheus", "definition": "", "hide": 0, "includeAll": false, "label": "Namespace", - "multi": true, + "multi": false, "name": "namespace", "options": [], "query": "label_values(concourse_db_connections, kubernetes_namespace)", @@ -2850,14 +2903,106 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "15m", + "value": "15m" + }, + "hide": 0, + "includeAll": false, + "label": "period", + "multi": false, + "name": "period", + "options": [ + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": true, + "text": "15m", + "value": "15m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "3h", + "value": "3h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "24h", + "value": "24h" + } + ], + "query": "5m,15m,30m,1h,3h,6h,12h,24h", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "5", + "value": "5" + }, + "hide": 0, + "includeAll": false, + "label": "top", + "multi": false, + "name": "top", + "options": [ + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "30", + "value": "30" + } + ], + "query": "5,15,30", + "skipUrlSync": false, + "type": "custom" } ] }, "time": { "from": "now-3h", - "to": "now" + "to": "now-1m" }, "timepicker": { + "nowDelay": "1m", "refresh_intervals": [ "5s", "10s", diff --git a/deployments/with-creds/metrics/dashboards/concourse/postgres.json b/deployments/with-creds/metrics/dashboards/concourse/postgres.json deleted file mode 100644 index ecae136..0000000 --- a/deployments/with-creds/metrics/dashboards/concourse/postgres.json +++ /dev/null @@ -1,1569 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "", - "editable": true, - "gnetId": 455, - "graphTooltip": 1, - "iteration": 1544360840068, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 24, - "panels": [], - "repeat": null, - "title": "Application", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 19, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "pg_locks_count{datname=~\"$db\",kubernetes_namespace=~\"$namespace\",kubernetes_name=~\"$container\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ mode }} ({{ datname }})", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Locks", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 13, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": true, - "hideZero": true, - "max": true, - "min": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(pg_stat_database_numbackends{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}) by (datname)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ datname }}", - "refId": "A", - "step": 240 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Active Connections", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 15, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[3m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "commit", - "refId": "A" - }, - { - "expr": "sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[3m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "rollback", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "QPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "fetched", - "measurement": "postgresql", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "returned", - "measurement": "postgresql", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "inserted", - "measurement": "postgresql", - "policy": "default", - "refId": "C", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "updated", - "measurement": "postgresql", - "policy": "default", - "refId": "D", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",kubernetes_namespace=~\"$namespace\"}[5m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "deleted", - "measurement": "postgresql", - "policy": "default", - "refId": "E", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Rows", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 26, - "panels": [], - "title": "Container", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 16 - }, - "id": 17, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod_name) (irate(container_cpu_usage_seconds_total{container_name=~\"$container\",namespace=~\"$namespace\",image!=\"\"}[2m])) \n / \n(sum by (pod_name) (container_spec_cpu_quota{container_name=~\"$container\",namespace=~\"$namespace\",image!=\"\"})/100000)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ pod_name }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 16 - }, - "id": 16, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "container_memory_working_set_bytes{container_name=~\"$container\",namespace=~\"$namespace\"} / container_spec_memory_limit_bytes", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "measurement": "postgresql", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 16 - }, - "id": 20, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum by (pod_name) (irate(container_network_receive_bytes_total{pod_name=~\"[[container]]-.*\",namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "RX: {{ pod_name }}", - "measurement": "postgresql", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum by (pod_name) (irate(container_network_transmit_bytes_total{pod_name=~\"[[container]]-.*\",namespace=~\"$namespace\"}[2m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "TX: {{ pod_name }}", - "measurement": "postgresql", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Utilization", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 23 - }, - "id": 21, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum by (pod_name) (irate(container_fs_writes_bytes_total{container_name=~\"$container\"}[2m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "TX: {{ pod_name }}", - "measurement": "postgresql", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - }, - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "sum by (pod_name) (irate(container_fs_reads_bytes_total{container_name=~\"$container\"}[2m]))", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "RX: {{ pod_name }}", - "measurement": "postgresql", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "IO Utilization", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "editable": true, - "error": false, - "fill": 0, - "grid": {}, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 23 - }, - "id": 22, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": true, - "min": true, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "fetched", - "dsType": "prometheus", - "expr": "1 - (kubelet_volume_stats_available_bytes{persistentvolumeclaim=~\"$container\"} / kubelet_volume_stats_capacity_bytes)", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 2, - "legendFormat": "", - "measurement": "postgresql", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "tup_fetched" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - }, - { - "params": [ - "10s" - ], - "type": "non_negative_derivative" - } - ] - ], - "step": 120, - "tags": [ - { - "key": "host", - "operator": "=~", - "value": "/^$host$/" - } - ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Persistent Volume Utilization", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 1, - "format": "percentunit", - "label": null, - "logBase": 1, - "max": "1", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": "1m", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": "", - "current": { - "text": "All", - "value": [ - "$__all" - ] - }, - "datasource": "prometheus", - "hide": 0, - "includeAll": true, - "label": "Database", - "multi": true, - "name": "db", - "options": [], - "query": "label_values(pg_stat_database_tup_fetched, datname)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "hush-house", - "value": "hush-house" - }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - "query": "label_values(pg_exporter_scrapes_total, kubernetes_namespace)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod-postgresql", - "value": "prod-postgresql" - }, - "datasource": "prometheus", - "hide": 0, - "includeAll": false, - "label": "container", - "multi": false, - "name": "container", - "options": [], - "query": "label_values(pg_exporter_scrapes_total, kubernetes_name)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Postgres", - "uid": "wGgaPlciz", - "version": 6 -} \ No newline at end of file diff --git a/deployments/with-creds/metrics/requirements.lock b/deployments/with-creds/metrics/requirements.lock index 3fcdfea..888728f 100644 --- a/deployments/with-creds/metrics/requirements.lock +++ b/deployments/with-creds/metrics/requirements.lock @@ -1,9 +1,9 @@ dependencies: - name: prometheus repository: https://kubernetes-charts.storage.googleapis.com/ - version: 8.10.3 + version: 9.3.1 - name: grafana repository: https://kubernetes-charts.storage.googleapis.com/ - version: 3.3.10 -digest: sha256:92944b3f98fdaf43bb63bde7d994d2c79dc58705f4fb2b29ccab7db235ad212e -generated: 2019-05-29T11:31:38.381872-04:00 + version: 4.0.4 +digest: sha256:926e89c882d860fabd502e9437853e03e0f34a46d9d5157a2a17335277f59bfc +generated: "2019-11-22T15:18:47.470062-05:00" diff --git a/deployments/with-creds/metrics/requirements.yaml b/deployments/with-creds/metrics/requirements.yaml index e07264c..6b68bb7 100644 --- a/deployments/with-creds/metrics/requirements.yaml +++ b/deployments/with-creds/metrics/requirements.yaml @@ -1,11 +1,11 @@ --- dependencies: - name: prometheus - version: 8.10.3 + version: 9.3.1 repository: https://kubernetes-charts.storage.googleapis.com/ condition: prometheus.enabled - name: grafana - version: 3.3.10 + version: 4.0.4 repository: https://kubernetes-charts.storage.googleapis.com/ condition: grafana.enabled diff --git a/deployments/with-creds/metrics/values.yaml b/deployments/with-creds/metrics/values.yaml index 312fe8e..4e73e23 100644 --- a/deployments/with-creds/metrics/values.yaml +++ b/deployments/with-creds/metrics/values.yaml @@ -10,14 +10,31 @@ tls: ## Configuration for the prometheus dependency. ## prometheus: - enabled: true - pushgateway: { enabled: false } - alertmanager: { enabled: false } + pushgateway: + enabled: false + alertmanager: + enabled: false nodeExporter: enabled: true + kubeStateMetrics: - nodeSelector: { cloud.google.com/gke-nodepool: generic-1 } enabled: true + nodeSelector: { cloud.google.com/gke-nodepool: generic-1 } + + server: + nodeSelector: { cloud.google.com/gke-nodepool: generic-1 } + persistentVolume: + enabled: true + size: 300Gi + storageClass: ssd + resources: + limits: + cpu: 2000m + memory: 8Gi + requests: + cpu: 2000m + memory: 8Gi + serverFiles: prometheus.yml: scrape_configs: @@ -45,20 +62,15 @@ prometheus: target_label: __address__ regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 - - action: labelmap - regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - - source_labels: [__meta_kubernetes_service_name] + - source_labels: [__meta_kubernetes_pod_name] action: replace - target_label: kubernetes_name + target_label: kubernetes_pod_name - source_labels: [__meta_kubernetes_pod_node_name] action: replace target_label: kubernetes_node - - source_labels: [__meta_kubernetes_pod_name] - action: replace - target_label: kubernetes_pod_name - job_name: kubernetes-nodes-cadvisor scheme: https @@ -67,9 +79,13 @@ prometheus: insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: [{role: node}] + + metric_relabel_configs: + - source_labels: [namespace] + regex: '(ci|ci-pr|hush-house|vault|workers)' + action: keep + relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] @@ -85,8 +101,6 @@ prometheus: bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: [{role: node}] relabel_configs: - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] @@ -100,19 +114,6 @@ prometheus: rules: - record: node:node_num_cpu:sum expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode) - server: - nodeSelector: { cloud.google.com/gke-nodepool: generic-1 } - persistentVolume: - enabled: true - size: 30Gi - storageClass: ssd - resources: - limits: - cpu: 2000m - memory: 8Gi - requests: - cpu: 2000m - memory: 8Gi ## Configuration for the Grafana dependency.