From f9c69937564fae6f621e4c0ffc9071a464413b89 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 6 Dec 2023 14:24:20 +0530 Subject: [PATCH 01/18] API-Performance-Overview Dashboard --- .../api-performance-overview/panels.libsonnet | 61 ++++++ .../queries.libsonnet | 190 ++++++++++++++++++ .../variables.libsonnet | 75 +++++++ .../CPT/api-performance-overview-v2.jsonnet | 52 +++++ 4 files changed, 378 insertions(+) create mode 100644 assets/api-performance-overview/panels.libsonnet create mode 100644 assets/api-performance-overview/queries.libsonnet create mode 100644 assets/api-performance-overview/variables.libsonnet create mode 100644 templates/CPT/api-performance-overview-v2.jsonnet diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet new file mode 100644 index 0000000..5ceff92 --- /dev/null +++ b/assets/api-performance-overview/panels.libsonnet @@ -0,0 +1,61 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + timeSeries: { + local timeSeries = g.panel.timeSeries, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + base(title, unit, targets, gridPos): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withUid('$Datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withDrawStyle("line") + + custom.withLineInterpolation("linear") + + custom.withBarAlignment(0) + + custom.withLineWidth(1) + + custom.withFillOpacity(10) + + custom.withGradientMode("none") + + custom.withSpanNulls(false) + + custom.withPointSize(5) + + custom.withSpanNulls(false) + + custom.stacking.withGroup("A") + + custom.stacking.withMode("none") + + custom.withShowPoints('never'), + + withCommonAggregations(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([ + 'lastNotNull' + ]) + + options.legend.withShowLegend(true) + + options.legend.withDisplayMode('table') + + options.legend.withPlacement('right') + + options.tooltip.withMode('multi'), + + withReadWriteSettings(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.tooltip.withMode('multi') + + options.legend.withShowLegend(true) + + options.legend.withDisplayMode('list') + + options.legend.withPlacement('bottom') + + options.tooltip.withMode('multi'), + + withRequestWaitDurationAggregations(title, unit, targets, gridPos): + self.withCommonAggregations(title, unit, targets, gridPos) + + options.legend.withCalcs([ + 'mean', + 'max', + 'lastNotNull' + ]) + + + + } +} \ No newline at end of file diff --git a/assets/api-performance-overview/queries.libsonnet b/assets/api-performance-overview/queries.libsonnet new file mode 100644 index 0000000..4c2c7eb --- /dev/null +++ b/assets/api-performance-overview/queries.libsonnet @@ -0,0 +1,190 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; +local prometheus = g.query.prometheus; + +{ + requestDuration99thQuantile: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",subresource!=\"log\",verb!~\"WATCH|WATCHLIST|PROXY\"}[$interval])) by(verb,le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{verb}}') + + prometheus.withDatasource('$Datasource') + }, + + requestRateByInstance: { + query(): + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\",verb=~\"$verb\"}[$interval])) by(instance)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}}') + + prometheus.withDatasource('$Datasource') + }, + + requestDuarationByResource: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",subresource!=\"log\",verb!~\"WATCH|WATCHLIST|PROXY\"}[$interval])) by(resource,le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{resource}}') + + prometheus.withDatasource('$Datasource') + }, + + requestDurationBy99Quatile: { + query(): + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\",verb=~\"$verb\"}[$interval])) by(resource)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{resource}}') + + prometheus.withDatasource('$Datasource') + }, + + requestDurationReadWrite: { + query(): + [ prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"LIST|GET\"}[$interval])) by(le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('read') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"POST|PUT|PATCH|UPDATE|DELETE\"}[$interval])) by(le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('write') + + prometheus.withDatasource('$Datasource')] + }, + + requestRateReadWrite: { + query(): + [prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"LIST|GET\"}[$interval]))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('read') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"POST|PUT|PATCH|UPDATE|DELETE\"}[$interval]))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('write') + + prometheus.withDatasource('$Datasource') + ] + }, + + requestRateDropped: { + query(): + prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{instance=~\"$instance\"}[$interval])) by (requestKind)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('') + + prometheus.withDatasource('$Datasource') + }, + + requestRateTerminated: { + query(): + prometheus.withExpr('sum(rate(apiserver_request_terminations_total{instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\"}[$interval])) by(component)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('') + + prometheus.withDatasource('$Datasource') + }, + + requestRateStatus: { + query(): + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\",code=~\"$code\"}[$interval])) by(code)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{code}}') + + prometheus.withDatasource('$Datasource') + }, + + requestsLongRunning: { + query(): + prometheus.withExpr('sum(apiserver_longrunning_gauge{instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\"}) by(instance)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}}') + + prometheus.withDatasource('$Datasource') + }, + + requestInFlight: { + query(): + prometheus.withExpr('sum(apiserver_current_inflight_requests{instance=~\"$instance\"}) by (instance,requestKind)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{requestKind}}-{{instance}}') + + prometheus.withDatasource('$Datasource') + }, + + requestRejectPandF: { + query(): + prometheus.withExpr('sum(rate(apiserver_flowcontrol_rejected_requests_total{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by (reason)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('') + + prometheus.withDatasource('$Datasource') + }, + + responseSize99Quatile: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_response_sizes_bucket{instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\"}[$interval])) by(instance,le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}}') + + prometheus.withDatasource('$Datasource') + }, + + requestQueueLengthPandF: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema, priorityLevel, le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + + prometheus.withDatasource('$Datasource') + }, + + requestWaitDuration99QuatilePandF: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~\"$instance\"}[5m])) by(flow_schema, priority_level, le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('') + + prometheus.withDatasource('$Datasource') + }, + + requestDispatchRatePandF: { + query(): + prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema,priorityLevel)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + + prometheus.withDatasource('$Datasource') + }, + + requestExecutionDurationPandF: { + query(): + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema, priorityLevel, le))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + + prometheus.withDatasource('$Datasource') + }, + + PendingInQueuePandF: { + query(): + prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}) by (flowSchema,priorityLevel)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + + prometheus.withDatasource('$Datasource') + }, + + ConcurrencyLimitByKubeapiserverPandF: { + query(): + prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{instance=~\".*:6443\",priorityLevel=~\"$priorityLevel\"}) by (instance,priorityLevel)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('') + + prometheus.withDatasource('$Datasource') + } + +} \ No newline at end of file diff --git a/assets/api-performance-overview/variables.libsonnet b/assets/api-performance-overview/variables.libsonnet new file mode 100644 index 0000000..b18374a --- /dev/null +++ b/assets/api-performance-overview/variables.libsonnet @@ -0,0 +1,75 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + Datasource: + var.datasource.new('Datasource','prometheus') + + var.datasource.withRegex("") + + var.query.generalOptions.withLabel('Datasource') + + var.query.withRefresh(1), + apiserver: + var.query.new('apiserver','label_values(apiserver_request_duration_seconds_bucket, apiserver)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('apisever') + + var.query.withRefresh(2), + + instance: + var.query.new('instance','label_values(apiserver_request_total, instance)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('instance') + + var.query.withRefresh(2), + + resource: + var.query.new('resource','label_values(apiserver_request_duration_seconds_bucket, resource)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('resource') + + var.query.withRefresh(2), + + code: + var.query.new('code','label_values(code)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('code') + + var.query.withRefresh(2), + + verb: + var.query.new('verb','label_values(verb)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('verb') + + var.query.withRefresh(2), + + flowSchema: + var.query.new('flowSchema','label_values(flowSchema)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('flow-schema') + + var.query.withRefresh(2), + + priorityLevel: + var.query.new('priorityLevel','label_values(priorityLevel)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('priority-level') + + var.query.withRefresh(2), + + interval: + var.interval.new('interval',['1m','5m']) + + var.query.withDatasourceFromVariable(self.Datasource) + + var.interval.generalOptions.withLabel('interval') + + var.interval.withAutoOption(count=30, minInterval='10s') + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + +} \ No newline at end of file diff --git a/templates/CPT/api-performance-overview-v2.jsonnet b/templates/CPT/api-performance-overview-v2.jsonnet new file mode 100644 index 0000000..3730714 --- /dev/null +++ b/templates/CPT/api-performance-overview-v2.jsonnet @@ -0,0 +1,52 @@ +local panels = import '../../assets/api-performance-overview/panels.libsonnet'; +local queries = import '../../assets/api-performance-overview/queries.libsonnet'; +local variables = import '../../assets/api-performance-overview/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('API-Performance') ++ g.dashboard.withDescription(||| + Dashboard for Api-performance-overview +|||) ++ g.dashboard.withTags('Api-performance') ++ g.dashboard.time.withFrom('now-1h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('30s') ++ g.dashboard.withEditable(true) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([ + variables.Datasource, + variables.apiserver, + variables.instance, + variables.resource, + variables.code, + variables.verb, + variables.flowSchema, + variables.priorityLevel, + variables.interval, +]) + ++ g.dashboard.withPanels([ + + panels.timeSeries.withCommonAggregations('request duration - 99th quantile', 'short', queries.requestDuration99thQuantile.query(), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('request rate - by instance', 'short', queries.requestRateByInstance.query(), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('request duration - 99th quantile - by resource', 'short', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('request duration - 99th quantile', 'short', queries.requestDurationBy99Quatile.query(), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.withReadWriteSettings('request duration - read vs write', 'short', queries.requestDurationReadWrite.query(), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.withReadWriteSettings('request rate - read vs write', 'short', queries.requestRateReadWrite.query(), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('requests dropped rate', 'short', queries.requestRateDropped.query(), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('requests terminated rate', 'short', queries.requestRateTerminated.query(), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('requests status rate', 'short', queries.requestRateStatus.query(), { x: 0, y: 32, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('long running requests', 'short', queries.requestsLongRunning.query(), { x: 12, y: 32, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('request in flight', 'short', queries.requestInFlight.query(), { x: 0, y: 40, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - requests rejected', 'short', queries.requestRejectPandF.query(), { x: 12, y: 40, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('response size - 99th quantile', 'short', queries.responseSize99Quatile.query(), { x: 0, y: 48, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - request queue length', 'short', queries.requestQueueLengthPandF.query(), { x: 12, y: 48, w: 12, h: 8 }), + panels.timeSeries.withRequestWaitDurationAggregations('p&f - request wait duration - 99th quantile', 'short', queries.requestWaitDuration99QuatilePandF.query(), { x: 0, y: 56, w: 24, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - request dispatch rate', 'short', queries.requestDispatchRatePandF.query(), { x: 0, y: 64, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - pending in queue', 'short', queries.PendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - concurrency limit by kube-apiserver', 'short', queries.ConcurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), +]) \ No newline at end of file From 2f092c6840ca291ddbaee28c56386122c7429430 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Mon, 18 Dec 2023 15:11:13 +0530 Subject: [PATCH 02/18] Cilium K8s Performance Dashboard --- assets/cilium-k8s-perf/panels.libsonnet | 92 ++++++ assets/cilium-k8s-perf/queries.libsonnet | 364 +++++++++++++++++++++ assets/cilium-k8s-perf/variables.libsonnet | 53 +++ templates/CPT/cilium-k8s-perf-v2.jsonnet | 73 +++++ 4 files changed, 582 insertions(+) create mode 100644 assets/cilium-k8s-perf/panels.libsonnet create mode 100644 assets/cilium-k8s-perf/queries.libsonnet create mode 100644 assets/cilium-k8s-perf/variables.libsonnet create mode 100644 templates/CPT/cilium-k8s-perf-v2.jsonnet diff --git a/assets/cilium-k8s-perf/panels.libsonnet b/assets/cilium-k8s-perf/panels.libsonnet new file mode 100644 index 0000000..b7d9d6b --- /dev/null +++ b/assets/cilium-k8s-perf/panels.libsonnet @@ -0,0 +1,92 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + timeSeries: { + local timeSeries = g.panel.timeSeries, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + base(title, unit, targets, gridPos): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withUid('$Datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withDrawStyle("line") + + custom.withLineInterpolation("linear") + + custom.withBarAlignment(0) + + custom.withLineWidth(1) + + custom.withFillOpacity(10) + + custom.withGradientMode("none") + + custom.withSpanNulls(false) + + custom.withPointSize(5) + + custom.withSpanNulls(false) + + custom.stacking.withMode("none") + + custom.withShowPoints('never'), + + withCiliumAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withShowLegend(true) + + options.legend.withPlacement('bottom') + + options.legend.withDisplayMode('table') + + options.legend.withCalcs([ + 'mean', + 'max' + ]), + + withClusterAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withShowLegend(true) + + options.legend.withPlacement('bottom') + + options.legend.withDisplayMode('table') + + options.legend.withCalcs([]) + + + + + + + + + + }, + + stat: { + local stat = g.panel.stat, + local options = stat.options, + + base(title, unit, targets, gridPos): + stat.new(title) + + stat.datasource.withType('elasticsearch') + + stat.datasource.withUid('$Datasource') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withTargets(targets) + + stat.gridPos.withX(gridPos.x) + + stat.gridPos.withY(gridPos.y) + + stat.gridPos.withH(gridPos.h) + + stat.gridPos.withW(gridPos.w) + + options.withJustifyMode("auto") + + options.withGraphMode("area") + + options.text.withTitleSize(12), + + + withclusterAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.reduceOptions.withCalcs([ + 'last', + ]) + + stat.standardOptions.thresholds.withSteps([]), + + } + + + +} \ No newline at end of file diff --git a/assets/cilium-k8s-perf/queries.libsonnet b/assets/cilium-k8s-perf/queries.libsonnet new file mode 100644 index 0000000..6e76273 --- /dev/null +++ b/assets/cilium-k8s-perf/queries.libsonnet @@ -0,0 +1,364 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; +local prometheus = g.query.prometheus; + +{ + ciliumControllerFailures: { + query(): + prometheus.withExpr('cilium_controllers_failing') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') + + prometheus.withDatasource('$Datasource') + }, + + ciliumIPAddressAllocation: { + query(): + prometheus.withExpr('cilium_ip_addresses') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ pod }} - {{ family }}') + + prometheus.withDatasource('$Datasource') + }, + + ciliumContainerCPU: { + query(): + prometheus.withExpr('sum(irate(container_cpu_usage_seconds_total{container=~\"cilium.*\",container!=\"cilium-operator.*\",namespace!=\"\"}[$interval])) by (instance,pod,container,namespace,name,service) * 100') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') + + prometheus.withDatasource('$Datasource') + }, + + ciliumConatinerMemory: { + query(): + prometheus.withExpr('container_memory_rss{container=~\"cilium.*\",namespace!=\"\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') + + prometheus.withDatasource('$Datasource') + }, + + ciliumNetworkPolicesPerAgent: { + query(): + prometheus.withExpr('cilium_policy') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') + + prometheus.withDatasource('$Datasource') + }, + + ciliumBPFOperations: { + query(): + prometheus.withExpr('sum by (instance,map_name,operation,outcome)(rate(cilium_bpf_map_ops_total[2m]))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}} - {{map_name}} - {{operation}}') + + prometheus.withDatasource('$Datasource') + }, + + currentNodeCount: { + query(): + [ + prometheus.withExpr('sum(kube_node_info{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Number of nodes') + + prometheus.withDatasource('$Datasource') , + prometheus.withExpr('sum(kube_node_status_condition{status=\"true\"}) by (condition) > 0') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Node: {{ condition }}') + + prometheus.withDatasource('$Datasource') + ] + }, + + currentNamespaceCount: { + query(): + prometheus.withExpr('sum(kube_namespace_status_phase) by (phase)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ phase }}') + + prometheus.withDatasource('$Datasource') + }, + + currentPodCount: { + query(): + prometheus.withExpr('sum(kube_pod_status_phase{}) by (phase) > 0') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ phase }} Pods') + + prometheus.withDatasource('$Datasource') + }, + + numberOfNodes: { + query(): + [ + prometheus.withExpr('sum(kube_node_info{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Number of nodes') + + prometheus.withDatasource('$Datasource'), + prometheus.withExpr('sum(kube_node_status_condition{status=\"true\"}) by (condition) > 0') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Node: {{ condition }}') + + prometheus.withDatasource('$Datasource') + ] + }, + + namespaceCount: { + query(): + prometheus.withExpr('sum(kube_namespace_status_phase) by (phase) > 0') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ phase }} namespaces') + + prometheus.withDatasource('$Datasource') + }, + + podCount: { + query(): + prometheus.withExpr('sum(kube_pod_status_phase{}) by (phase)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{phase}} pods') + + prometheus.withDatasource('$Datasource') + }, + + secretConfigmapCount: { + query(): + [prometheus.withExpr('count(kube_secret_info{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('secrets') + + prometheus.withDatasource('$Datasource'), + prometheus.withExpr('count(kube_configmap_info{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Configmaps') + + prometheus.withDatasource('$Datasource') + ] + }, + + deploymentCount: { + query(): + + prometheus.withExpr('count(kube_deployment_labels{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Deployments') + + prometheus.withDatasource('$Datasource') + }, + + serviceCount: { + query(): + prometheus.withExpr('count(kube_service_info{})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Services') + + prometheus.withDatasource('$Datasource') + }, + + top10ContainerRSS: { + query(): + prometheus.withExpr('topk(10, container_memory_rss{namespace!=\"\",container!=\"POD\",name!=\"\"})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ namespace }} - {{ name }}') + + prometheus.withDatasource('$Datasource') + }, + + top10ContainerCPU: { + query(): + prometheus.withExpr('topk(10,irate(container_cpu_usage_seconds_total{namespace!=\"\",container!=\"POD\",name!=\"\"}[$interval])*100)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ namespace }} - {{ name }}') + + prometheus.withDatasource('$Datasource') + }, + + goroutinesCount: { + query(): + prometheus.withExpr('topk(10, sum(go_goroutines{}) by (job,instance))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ job }} - {{ instance }}') + + prometheus.withDatasource('$Datasource') + }, + + podDistribution: { + query(): + prometheus.withExpr('count(kube_pod_info{}) by (exported_node)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ node }}') + + prometheus.withDatasource('$Datasource') + }, + + CPUBasic: { + query(): + prometheus.withExpr('sum by (instance, mode)(rate(node_cpu_seconds_total{node=~\"$_worker_node\",job=~\".*\"}[$interval])) * 100') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Busy {{mode}}') + + prometheus.withDatasource('$Datasource') + }, + + SystemMemory: { + query(): + [ + prometheus.withExpr('node_memory_Active_bytes{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Active') + + prometheus.withDatasource('$Datasource') , + + prometheus.withExpr('node_memory_MemTotal_bytes{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Total') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('node_memory_Cached_bytes{node=~\"$_worker_node\"} + node_memory_Buffers_bytes{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Total') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('node_memory_MemAvailable_bytes{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('Total') + + prometheus.withDatasource('$Datasource') + + ] + }, + + DiskThroughput: { + query(): + [ + prometheus.withExpr('rate(node_disk_read_bytes_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ device }} - read') + + prometheus.withDatasource('$Datasource') , + + prometheus.withExpr('rate(node_disk_written_bytes_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ device }} - write') + + prometheus.withDatasource('$Datasource') + ] + }, + + DiskIOPS: { + query(): + [ + prometheus.withExpr('rate(node_disk_reads_completed_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ device }} - read') + + prometheus.withDatasource('$Datasource') , + + prometheus.withExpr('rate(node_disk_writes_completed_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ device }} - write') + + prometheus.withDatasource('$Datasource') + ] + }, + + networkUtilization: { + query(): + [ + prometheus.withExpr('rate(node_network_receive_bytes_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval]) * 8') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}} - {{device}} - RX') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('rate(node_network_transmit_bytes_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval]) * 8') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}} - {{device}} - TX') + + prometheus.withDatasource('$Datasource') + ] + }, + + networkPackets: { + query(): + [ + prometheus.withExpr('rate(node_network_receive_packets_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}} - {{device}} - RX') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('rate(node_network_transmit_packets_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval])') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{instance}} - {{device}} - TX') + + prometheus.withDatasource('$Datasource') + + + ] + }, + + networkPacketDrop: { + query(): + [ + prometheus.withExpr('topk(10, rate(node_network_receive_drop_total{node=~\"$_worker_node\"}[$interval]))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('rx-drop-{{ device }}') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('topk(10,rate(node_network_transmit_drop_total{node=~\"$_worker_node\"}[$interval]))') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('tx-drop-{{ device }}') + + prometheus.withDatasource('$Datasource') + ] + }, + + conntrackStats: { + query(): + [ + prometheus.withExpr('node_nf_conntrack_entries{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('conntrack_entries') + + prometheus.withDatasource('$Datasource'), + + prometheus.withExpr('node_nf_conntrack_entries_limit{node=~\"$_worker_node\"}') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('conntrack_limit') + + prometheus.withDatasource('$Datasource') + ] + }, + + top10ContainerCPUNode: { + query(): + prometheus.withExpr('topk(10, sum(irate(container_cpu_usage_seconds_total{container!=\"POD\",name!=\"\",instance=~\"$_worker_node\",namespace!=\"\",namespace=~\"$namespace\"}[$interval])) by (pod,container,namespace,name,service) * 100)') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ pod }}: {{ container }}') + + prometheus.withDatasource('$Datasource') + }, + + top10ContainerRSSNode: { + query(): + prometheus.withExpr('topk(10, container_memory_rss{container!=\"POD\",name!=\"\",instance=~\"$_worker_node\",namespace!=\"\",namespace=~\"$namespace\"})') + + prometheus.withFormat('time_series') + + prometheus.withIntervalFactor(2) + + prometheus.withLegendFormat('{{ pod }}: {{ container }}') + + prometheus.withDatasource('$Datasource') + }, + + + + + + +} \ No newline at end of file diff --git a/assets/cilium-k8s-perf/variables.libsonnet b/assets/cilium-k8s-perf/variables.libsonnet new file mode 100644 index 0000000..049d303 --- /dev/null +++ b/assets/cilium-k8s-perf/variables.libsonnet @@ -0,0 +1,53 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + Datasource: + var.datasource.new('Datasource','prometheus') + + var.datasource.withRegex("") + + var.query.generalOptions.withLabel('Datasource') + + var.query.withRefresh(1), + + _worker_node: + var.query.new('_worker_node','label_values(kube_node_labels{}, exported_node)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.generalOptions.withLabel('Worker') + + var.query.withRefresh(2), + + namespace: + var.query.new('namespace','label_values(kube_pod_info, exported_namespace)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Namespace') + + var.query.withRefresh(2), + + block_device: + var.query.new('block_device','label_values(node_disk_written_bytes_total,device)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.datasource.withRegex('/^(?:(?!dm|rb).)*$/') + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Block device') + + var.query.withRefresh(2), + + net_device: + var.query.new('net_device','label_values(node_network_receive_bytes_total,device)') + + var.query.withDatasourceFromVariable(self.Datasource) + + var.datasource.withRegex('/^((br|en|et).*)$/') + + var.query.selectionOptions.withMulti(true) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.generalOptions.withLabel('Network device') + + var.query.withRefresh(2), + + interval: + var.interval.new('interval',['2m','3m','4m','5m']) + + var.query.withDatasourceFromVariable(self.Datasource) + + var.interval.generalOptions.withLabel('interval') + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(true) + +} \ No newline at end of file diff --git a/templates/CPT/cilium-k8s-perf-v2.jsonnet b/templates/CPT/cilium-k8s-perf-v2.jsonnet new file mode 100644 index 0000000..902c692 --- /dev/null +++ b/templates/CPT/cilium-k8s-perf-v2.jsonnet @@ -0,0 +1,73 @@ +local panels = import '../../assets/cilium-k8s-perf/panels.libsonnet'; +local queries = import '../../assets/cilium-k8s-perf/queries.libsonnet'; +local variables = import '../../assets/cilium-k8s-perf/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('Cilium k8s Performance') ++ g.dashboard.time.withFrom('now-1h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('30s') ++ g.dashboard.withEditable(true) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([ + variables.Datasource, + variables._worker_node, + variables.namespace, + variables.block_device, + variables.net_device, + variables.interval, +]) + ++ g.dashboard.withPanels([ + g.panel.row.new('Cilium Details') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.withCiliumAgg('Cilium Controller Failures', 'none', queries.ciliumControllerFailures.query(), { x: 0, y: 1, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Cilium IP Address Allocation', 'none', queries.ciliumIPAddressAllocation.query(), { x: 12, y: 1, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Cilium Container CPU', 'percent', queries.ciliumContainerCPU.query(), { x: 0, y: 9, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Cilium Container Memory', 'bytes', queries.ciliumConatinerMemory.query(), { x: 12, y: 9, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Cilium Network Polices Per Agent', 'none', queries.ciliumNetworkPolicesPerAgent.query(), { x: 0, y: 17, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Cilium BPF Operations', 'none', queries.ciliumBPFOperations.query(), { x: 12, y: 17, w: 12, h: 8 }), + ]), + g.panel.row.new('Cluster Details') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.stat.withclusterAgg('Current Node Count', 'none', queries.currentNodeCount.query(), { x: 0, y: 26, w: 8, h: 3 }), + panels.stat.withclusterAgg('Current namespace Count', 'none', queries.currentNamespaceCount.query(), { x: 8, y: 26, w: 8, h: 3 }), + panels.stat.withclusterAgg('Current Pod Count', 'none', queries.currentPodCount.query(), { x: 16, y: 26, w: 8, h: 3 }), + panels.timeSeries.withClusterAgg('Number of nodes', 'none', queries.numberOfNodes.query(), { x: 0, y: 29, w: 8, h: 8 }), + panels.timeSeries.withClusterAgg('Namespace count', 'none', queries.namespaceCount.query(), { x: 8, y: 29, w: 8, h: 8 }), + panels.timeSeries.withClusterAgg('Pod count', 'none', queries.podCount.query(), { x: 16, y: 29, w: 8, h: 8 }), + panels.timeSeries.withClusterAgg('Secret & configmap count', 'none', queries.secretConfigmapCount.query(), { x: 0, y: 37, w: 8, h: 8 }), + panels.timeSeries.withClusterAgg('Deployment count', 'none', queries.deploymentCount.query(), { x: 8, y: 37, w: 8, h: 8 }), + panels.timeSeries.withClusterAgg('Services count', 'none', queries.serviceCount.query(), { x: 16, y: 37, w: 8, h: 8 }), + panels.timeSeries.withCiliumAgg('Top 10 container RSS', 'bytes', queries.top10ContainerRSS.query(), { x: 0, y: 45, w: 24, h: 8 }), + panels.timeSeries.withCiliumAgg('Top 10 container CPU', 'percent', queries.top10ContainerCPU.query(), { x: 0, y: 53, w: 12, h: 8 }), + panels.timeSeries.withClusterAgg('Goroutines count', 'none', queries.goroutinesCount.query(), { x: 12, y: 53, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Pod Distribution', 'none', queries.podDistribution.query(), { x: 0, y: 61, w: 24, h: 8 }), + ]), + + g.panel.row.new('Node: $_worker_node') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withRepeat('_worker_node') + + g.panel.row.withPanels([ + panels.timeSeries.withCiliumAgg('CPU Basic: $_worker_node', 'percent', queries.CPUBasic.query(), { x: 0, y: 70, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('System Memory: $_worker_node', 'bytes', queries.SystemMemory.query(), { x: 12, y: 70, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Disk throughput: $_worker_node', 'Bps', queries.DiskThroughput.query(), { x: 0, y: 78, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Disk IOPS: $_worker_node', 'iops', queries.DiskIOPS.query(), { x: 12, y: 78, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Network Utilization: $_worker_node', 'bps', queries.networkUtilization.query(), { x: 0, y: 86, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Network Packets: $_worker_node', 'pps', queries.networkPackets.query(), { x: 12, y: 86, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Network packets drop: $_worker_node', 'pps', queries.networkPacketDrop.query(), { x: 0, y: 94, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Conntrack stats: $_worker_node', '', queries.conntrackStats.query(), { x: 12, y: 94, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Top 10 container CPU: $_worker_node', 'percent', queries.top10ContainerCPUNode.query(), { x: 0, y: 102, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Top 10 container RSS: $_worker_node', 'bytes', queries.top10ContainerRSSNode.query(), { x: 12, y: 102, w: 12, h: 8 }), + ]), + + +]) From 602a159cf2d17f744a9ab365bc0776897b81a42c Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 20 Dec 2023 15:56:41 +0530 Subject: [PATCH 03/18] Cilium k8s performance dashboard changes updated --- assets/cilium-k8s-perf/panels.libsonnet | 14 ------ assets/cilium-k8s-perf/queries.libsonnet | 64 +++++++++++------------- templates/CPT/cilium-k8s-perf-v2.jsonnet | 9 ++-- 3 files changed, 33 insertions(+), 54 deletions(-) diff --git a/assets/cilium-k8s-perf/panels.libsonnet b/assets/cilium-k8s-perf/panels.libsonnet index b7d9d6b..76ba885 100644 --- a/assets/cilium-k8s-perf/panels.libsonnet +++ b/assets/cilium-k8s-perf/panels.libsonnet @@ -48,15 +48,6 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + options.legend.withPlacement('bottom') + options.legend.withDisplayMode('table') + options.legend.withCalcs([]) - - - - - - - - - }, stat: { @@ -77,16 +68,11 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + options.withGraphMode("area") + options.text.withTitleSize(12), - withclusterAgg(title, unit, targets, gridPos): self.base(title, unit, targets, gridPos) + options.reduceOptions.withCalcs([ 'last', ]) + stat.standardOptions.thresholds.withSteps([]), - } - - - } \ No newline at end of file diff --git a/assets/cilium-k8s-perf/queries.libsonnet b/assets/cilium-k8s-perf/queries.libsonnet index 6e76273..9ca634c 100644 --- a/assets/cilium-k8s-perf/queries.libsonnet +++ b/assets/cilium-k8s-perf/queries.libsonnet @@ -23,7 +23,7 @@ local prometheus = g.query.prometheus; ciliumContainerCPU: { query(): - prometheus.withExpr('sum(irate(container_cpu_usage_seconds_total{container=~\"cilium.*\",container!=\"cilium-operator.*\",namespace!=\"\"}[$interval])) by (instance,pod,container,namespace,name,service) * 100') + prometheus.withExpr('sum(irate(container_cpu_usage_seconds_total{container=~"cilium.*",container!="cilium-operator.*",namespace!=""}[$interval])) by (instance,pod,container,namespace,name,service) * 100') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') @@ -32,7 +32,7 @@ local prometheus = g.query.prometheus; ciliumConatinerMemory: { query(): - prometheus.withExpr('container_memory_rss{container=~\"cilium.*\",namespace!=\"\"}') + prometheus.withExpr('container_memory_rss{container=~"cilium.*",namespace!=""}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ instance }} - {{ pod }}') @@ -65,7 +65,8 @@ local prometheus = g.query.prometheus; + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Number of nodes') + prometheus.withDatasource('$Datasource') , - prometheus.withExpr('sum(kube_node_status_condition{status=\"true\"}) by (condition) > 0') + + prometheus.withExpr('sum(kube_node_status_condition{status="true"}) by (condition) > 0') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Node: {{ condition }}') @@ -99,7 +100,8 @@ local prometheus = g.query.prometheus; + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Number of nodes') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('sum(kube_node_status_condition{status=\"true\"}) by (condition) > 0') + + prometheus.withExpr('sum(kube_node_status_condition{status="true"}) by (condition) > 0') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Node: {{ condition }}') @@ -161,7 +163,7 @@ local prometheus = g.query.prometheus; top10ContainerRSS: { query(): - prometheus.withExpr('topk(10, container_memory_rss{namespace!=\"\",container!=\"POD\",name!=\"\"})') + prometheus.withExpr('topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ namespace }} - {{ name }}') @@ -170,7 +172,7 @@ local prometheus = g.query.prometheus; top10ContainerCPU: { query(): - prometheus.withExpr('topk(10,irate(container_cpu_usage_seconds_total{namespace!=\"\",container!=\"POD\",name!=\"\"}[$interval])*100)') + prometheus.withExpr('topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ namespace }} - {{ name }}') @@ -197,35 +199,35 @@ local prometheus = g.query.prometheus; CPUBasic: { query(): - prometheus.withExpr('sum by (instance, mode)(rate(node_cpu_seconds_total{node=~\"$_worker_node\",job=~\".*\"}[$interval])) * 100') + prometheus.withExpr('sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"$_worker_node",job=~".*"}[$interval])) * 100') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Busy {{mode}}') + prometheus.withDatasource('$Datasource') }, - SystemMemory: { + systemMemory: { query(): [ - prometheus.withExpr('node_memory_Active_bytes{node=~\"$_worker_node\"}') + prometheus.withExpr('node_memory_Active_bytes{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Active') + prometheus.withDatasource('$Datasource') , - prometheus.withExpr('node_memory_MemTotal_bytes{node=~\"$_worker_node\"}') + prometheus.withExpr('node_memory_MemTotal_bytes{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Total') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('node_memory_Cached_bytes{node=~\"$_worker_node\"} + node_memory_Buffers_bytes{node=~\"$_worker_node\"}') + prometheus.withExpr('node_memory_Cached_bytes{node=~"$_worker_node"} + node_memory_Buffers_bytes{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Total') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('node_memory_MemAvailable_bytes{node=~\"$_worker_node\"}') + prometheus.withExpr('node_memory_MemAvailable_bytes{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('Total') @@ -234,16 +236,16 @@ local prometheus = g.query.prometheus; ] }, - DiskThroughput: { + diskThroughput: { query(): [ - prometheus.withExpr('rate(node_disk_read_bytes_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + prometheus.withExpr('rate(node_disk_read_bytes_total{device=~"$block_device",node=~"$_worker_node"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ device }} - read') + prometheus.withDatasource('$Datasource') , - prometheus.withExpr('rate(node_disk_written_bytes_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + prometheus.withExpr('rate(node_disk_written_bytes_total{device=~"$block_device",node=~"$_worker_node"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ device }} - write') @@ -251,16 +253,16 @@ local prometheus = g.query.prometheus; ] }, - DiskIOPS: { + diskIOPS: { query(): [ - prometheus.withExpr('rate(node_disk_reads_completed_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + prometheus.withExpr('rate(node_disk_reads_completed_total{device=~"$block_device",node=~"$_worker_node"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ device }} - read') + prometheus.withDatasource('$Datasource') , - prometheus.withExpr('rate(node_disk_writes_completed_total{device=~\"$block_device\",node=~\"$_worker_node\"}[$interval])') + prometheus.withExpr('rate(node_disk_writes_completed_total{device=~"$block_device",node=~"$_worker_node"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ device }} - write') @@ -271,13 +273,13 @@ local prometheus = g.query.prometheus; networkUtilization: { query(): [ - prometheus.withExpr('rate(node_network_receive_bytes_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval]) * 8') + prometheus.withExpr('rate(node_network_receive_bytes_total{node=~"$_worker_node",device=~"$net_device"}[$interval]) * 8') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}} - {{device}} - RX') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('rate(node_network_transmit_bytes_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval]) * 8') + prometheus.withExpr('rate(node_network_transmit_bytes_total{node=~"$_worker_node",device=~"$net_device"}[$interval]) * 8') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}} - {{device}} - TX') @@ -288,13 +290,13 @@ local prometheus = g.query.prometheus; networkPackets: { query(): [ - prometheus.withExpr('rate(node_network_receive_packets_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval])') + prometheus.withExpr('rate(node_network_receive_packets_total{node=~"$_worker_node",device=~"$net_device"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}} - {{device}} - RX') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('rate(node_network_transmit_packets_total{node=~\"$_worker_node\",device=~\"$net_device\"}[$interval])') + prometheus.withExpr('rate(node_network_transmit_packets_total{node=~"$_worker_node",device=~"$net_device"}[$interval])') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}} - {{device}} - TX') @@ -307,13 +309,13 @@ local prometheus = g.query.prometheus; networkPacketDrop: { query(): [ - prometheus.withExpr('topk(10, rate(node_network_receive_drop_total{node=~\"$_worker_node\"}[$interval]))') + prometheus.withExpr('topk(10, rate(node_network_receive_drop_total{node=~"$_worker_node"}[$interval]))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('rx-drop-{{ device }}') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('topk(10,rate(node_network_transmit_drop_total{node=~\"$_worker_node\"}[$interval]))') + prometheus.withExpr('topk(10,rate(node_network_transmit_drop_total{node=~"$_worker_node"}[$interval]))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('tx-drop-{{ device }}') @@ -324,13 +326,13 @@ local prometheus = g.query.prometheus; conntrackStats: { query(): [ - prometheus.withExpr('node_nf_conntrack_entries{node=~\"$_worker_node\"}') + prometheus.withExpr('node_nf_conntrack_entries{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('conntrack_entries') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('node_nf_conntrack_entries_limit{node=~\"$_worker_node\"}') + prometheus.withExpr('node_nf_conntrack_entries_limit{node=~"$_worker_node"}') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('conntrack_limit') @@ -340,7 +342,7 @@ local prometheus = g.query.prometheus; top10ContainerCPUNode: { query(): - prometheus.withExpr('topk(10, sum(irate(container_cpu_usage_seconds_total{container!=\"POD\",name!=\"\",instance=~\"$_worker_node\",namespace!=\"\",namespace=~\"$namespace\"}[$interval])) by (pod,container,namespace,name,service) * 100)') + prometheus.withExpr('topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD", instance=~"$_worker_node", namespace=~"$namespace"}[$interval])) by (pod, container) * 100)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ pod }}: {{ container }}') @@ -349,16 +351,10 @@ local prometheus = g.query.prometheus; top10ContainerRSSNode: { query(): - prometheus.withExpr('topk(10, container_memory_rss{container!=\"POD\",name!=\"\",instance=~\"$_worker_node\",namespace!=\"\",namespace=~\"$namespace\"})') + prometheus.withExpr('topk(10, container_memory_rss{container!="POD",name!="",instance=~"$_worker_node",namespace!="",namespace=~"$namespace"})') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{ pod }}: {{ container }}') + prometheus.withDatasource('$Datasource') }, - - - - - - } \ No newline at end of file diff --git a/templates/CPT/cilium-k8s-perf-v2.jsonnet b/templates/CPT/cilium-k8s-perf-v2.jsonnet index 902c692..65f162b 100644 --- a/templates/CPT/cilium-k8s-perf-v2.jsonnet +++ b/templates/CPT/cilium-k8s-perf-v2.jsonnet @@ -51,16 +51,15 @@ g.dashboard.new('Cilium k8s Performance') panels.timeSeries.withClusterAgg('Goroutines count', 'none', queries.goroutinesCount.query(), { x: 12, y: 53, w: 12, h: 8 }), panels.timeSeries.withCiliumAgg('Pod Distribution', 'none', queries.podDistribution.query(), { x: 0, y: 61, w: 24, h: 8 }), ]), - g.panel.row.new('Node: $_worker_node') + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + g.panel.row.withCollapsed(true) + g.panel.row.withRepeat('_worker_node') + g.panel.row.withPanels([ panels.timeSeries.withCiliumAgg('CPU Basic: $_worker_node', 'percent', queries.CPUBasic.query(), { x: 0, y: 70, w: 12, h: 8 }), - panels.timeSeries.withCiliumAgg('System Memory: $_worker_node', 'bytes', queries.SystemMemory.query(), { x: 12, y: 70, w: 12, h: 8 }), - panels.timeSeries.withCiliumAgg('Disk throughput: $_worker_node', 'Bps', queries.DiskThroughput.query(), { x: 0, y: 78, w: 12, h: 8 }), - panels.timeSeries.withCiliumAgg('Disk IOPS: $_worker_node', 'iops', queries.DiskIOPS.query(), { x: 12, y: 78, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('System Memory: $_worker_node', 'bytes', queries.systemMemory.query(), { x: 12, y: 70, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Disk throughput: $_worker_node', 'Bps', queries.diskThroughput.query(), { x: 0, y: 78, w: 12, h: 8 }), + panels.timeSeries.withCiliumAgg('Disk IOPS: $_worker_node', 'iops', queries.diskIOPS.query(), { x: 12, y: 78, w: 12, h: 8 }), panels.timeSeries.withCiliumAgg('Network Utilization: $_worker_node', 'bps', queries.networkUtilization.query(), { x: 0, y: 86, w: 12, h: 8 }), panels.timeSeries.withCiliumAgg('Network Packets: $_worker_node', 'pps', queries.networkPackets.query(), { x: 12, y: 86, w: 12, h: 8 }), panels.timeSeries.withCiliumAgg('Network packets drop: $_worker_node', 'pps', queries.networkPacketDrop.query(), { x: 0, y: 94, w: 12, h: 8 }), @@ -68,6 +67,4 @@ g.dashboard.new('Cilium k8s Performance') panels.timeSeries.withCiliumAgg('Top 10 container CPU: $_worker_node', 'percent', queries.top10ContainerCPUNode.query(), { x: 0, y: 102, w: 12, h: 8 }), panels.timeSeries.withCiliumAgg('Top 10 container RSS: $_worker_node', 'bytes', queries.top10ContainerRSSNode.query(), { x: 12, y: 102, w: 12, h: 8 }), ]), - - ]) From 98c259c3320f3f6aea609806fa5418c9fb01ce18 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 20 Dec 2023 17:25:37 +0530 Subject: [PATCH 04/18] api-performance dashboard changes update --- .../queries.libsonnet | 47 +++++++++---------- .../variables.libsonnet | 1 - .../CPT/api-performance-overview-v2.jsonnet | 4 +- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/assets/api-performance-overview/queries.libsonnet b/assets/api-performance-overview/queries.libsonnet index 4c2c7eb..6f871db 100644 --- a/assets/api-performance-overview/queries.libsonnet +++ b/assets/api-performance-overview/queries.libsonnet @@ -5,7 +5,7 @@ local prometheus = g.query.prometheus; { requestDuration99thQuantile: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",subresource!=\"log\",verb!~\"WATCH|WATCHLIST|PROXY\"}[$interval])) by(verb,le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(verb,le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{verb}}') @@ -14,7 +14,7 @@ local prometheus = g.query.prometheus; requestRateByInstance: { query(): - prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\",verb=~\"$verb\"}[$interval])) by(instance)') + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",code=~"$code",verb=~"$verb"}[$interval])) by(instance)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}}') @@ -23,7 +23,7 @@ local prometheus = g.query.prometheus; requestDuarationByResource: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",subresource!=\"log\",verb!~\"WATCH|WATCHLIST|PROXY\"}[$interval])) by(resource,le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(resource,le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{resource}}') @@ -32,7 +32,7 @@ local prometheus = g.query.prometheus; requestDurationBy99Quatile: { query(): - prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\",verb=~\"$verb\"}[$interval])) by(resource)') + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",code=~"$code",verb=~"$verb"}[$interval])) by(resource)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{resource}}') @@ -41,13 +41,13 @@ local prometheus = g.query.prometheus; requestDurationReadWrite: { query(): - [ prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"LIST|GET\"}[$interval])) by(le))') + [ prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",verb=~"LIST|GET"}[$interval])) by(le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('read') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"POST|PUT|PATCH|UPDATE|DELETE\"}[$interval])) by(le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[$interval])) by(le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('write') @@ -56,13 +56,13 @@ local prometheus = g.query.prometheus; requestRateReadWrite: { query(): - [prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"LIST|GET\"}[$interval]))') + [prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",verb=~"LIST|GET"}[$interval]))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('read') + prometheus.withDatasource('$Datasource'), - prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"POST|PUT|PATCH|UPDATE|DELETE\"}[$interval]))') + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",verb=~"POST|PUT|PATCH|UPDATE|DELETE"}[$interval]))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('write') @@ -72,7 +72,7 @@ local prometheus = g.query.prometheus; requestRateDropped: { query(): - prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{instance=~\"$instance\"}[$interval])) by (requestKind)') + prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{instance=~"$instance"}[$interval])) by (requestKind)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('') @@ -81,7 +81,7 @@ local prometheus = g.query.prometheus; requestRateTerminated: { query(): - prometheus.withExpr('sum(rate(apiserver_request_terminations_total{instance=~\"$instance\",resource=~\"$resource\",code=~\"$code\"}[$interval])) by(component)') + prometheus.withExpr('sum(rate(apiserver_request_terminations_total{instance=~"$instance",resource=~"$resource",code=~"$code"}[$interval])) by(component)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('') @@ -90,7 +90,7 @@ local prometheus = g.query.prometheus; requestRateStatus: { query(): - prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~\"$apiserver\",instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\",code=~\"$code\"}[$interval])) by(code)') + prometheus.withExpr('sum(rate(apiserver_request_total{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",verb=~"$verb",code=~"$code"}[$interval])) by(code)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{code}}') @@ -99,7 +99,7 @@ local prometheus = g.query.prometheus; requestsLongRunning: { query(): - prometheus.withExpr('sum(apiserver_longrunning_gauge{instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\"}) by(instance)') + prometheus.withExpr('sum(apiserver_longrunning_gauge{instance=~"$instance",resource=~"$resource",verb=~"$verb"}) by(instance)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}}') @@ -108,7 +108,7 @@ local prometheus = g.query.prometheus; requestInFlight: { query(): - prometheus.withExpr('sum(apiserver_current_inflight_requests{instance=~\"$instance\"}) by (instance,requestKind)') + prometheus.withExpr('sum(apiserver_current_inflight_requests{instance=~"$instance"}) by (instance,requestKind)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{requestKind}}-{{instance}}') @@ -117,7 +117,7 @@ local prometheus = g.query.prometheus; requestRejectPandF: { query(): - prometheus.withExpr('sum(rate(apiserver_flowcontrol_rejected_requests_total{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by (reason)') + prometheus.withExpr('sum(rate(apiserver_flowcontrol_rejected_requests_total{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by (reason)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('') @@ -126,7 +126,7 @@ local prometheus = g.query.prometheus; responseSize99Quatile: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_response_sizes_bucket{instance=~\"$instance\",resource=~\"$resource\",verb=~\"$verb\"}[$interval])) by(instance,le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_response_sizes_bucket{instance=~"$instance",resource=~"$resource",verb=~"$verb"}[$interval])) by(instance,le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{instance}}') @@ -135,7 +135,7 @@ local prometheus = g.query.prometheus; requestQueueLengthPandF: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema, priorityLevel, le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema, priorityLevel, le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') @@ -144,7 +144,7 @@ local prometheus = g.query.prometheus; requestWaitDuration99QuatilePandF: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~\"$instance\"}[5m])) by(flow_schema, priority_level, le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~"$instance"}[5m])) by(flow_schema, priority_level, le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('') @@ -153,7 +153,7 @@ local prometheus = g.query.prometheus; requestDispatchRatePandF: { query(): - prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema,priorityLevel)') + prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema,priorityLevel)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') @@ -162,29 +162,28 @@ local prometheus = g.query.prometheus; requestExecutionDurationPandF: { query(): - prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}[$interval])) by(flowSchema, priorityLevel, le))') + prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema, priorityLevel, le))') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + prometheus.withDatasource('$Datasource') }, - PendingInQueuePandF: { + pendingInQueuePandF: { query(): - prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{instance=~\"$instance\",flowSchema=~\"$flowSchema\",priorityLevel=~\"$priorityLevel\"}) by (flowSchema,priorityLevel)') + prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}) by (flowSchema,priorityLevel)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}') + prometheus.withDatasource('$Datasource') }, - ConcurrencyLimitByKubeapiserverPandF: { + concurrencyLimitByKubeapiserverPandF: { query(): - prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{instance=~\".*:6443\",priorityLevel=~\"$priorityLevel\"}) by (instance,priorityLevel)') + prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{instance=~".*:6443",priorityLevel=~"$priorityLevel"}) by (instance,priorityLevel)') + prometheus.withFormat('time_series') + prometheus.withIntervalFactor(2) + prometheus.withLegendFormat('') + prometheus.withDatasource('$Datasource') } - } \ No newline at end of file diff --git a/assets/api-performance-overview/variables.libsonnet b/assets/api-performance-overview/variables.libsonnet index b18374a..6a522ca 100644 --- a/assets/api-performance-overview/variables.libsonnet +++ b/assets/api-performance-overview/variables.libsonnet @@ -71,5 +71,4 @@ local var = g.dashboard.variable; + var.query.withRefresh(2) + var.query.selectionOptions.withMulti(false) + var.query.selectionOptions.withIncludeAll(true) - } \ No newline at end of file diff --git a/templates/CPT/api-performance-overview-v2.jsonnet b/templates/CPT/api-performance-overview-v2.jsonnet index 3730714..354739d 100644 --- a/templates/CPT/api-performance-overview-v2.jsonnet +++ b/templates/CPT/api-performance-overview-v2.jsonnet @@ -47,6 +47,6 @@ g.dashboard.new('API-Performance') panels.timeSeries.withRequestWaitDurationAggregations('p&f - request wait duration - 99th quantile', 'short', queries.requestWaitDuration99QuatilePandF.query(), { x: 0, y: 56, w: 24, h: 8 }), panels.timeSeries.withCommonAggregations('p&f - request dispatch rate', 'short', queries.requestDispatchRatePandF.query(), { x: 0, y: 64, w: 12, h: 8 }), panels.timeSeries.withCommonAggregations('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - pending in queue', 'short', queries.PendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - concurrency limit by kube-apiserver', 'short', queries.ConcurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - pending in queue', 'short', queries.pendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), + panels.timeSeries.withCommonAggregations('p&f - concurrency limit by kube-apiserver', 'short', queries.concurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), ]) \ No newline at end of file From 28c8bf9edfa4174d1b089424ee9d8307ecdd55d0 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 26 Dec 2023 22:44:22 +0530 Subject: [PATCH 05/18] etcd on cluster Dashboard --- .../panels.libsonnet | 116 ++++++++++++++ .../queries.libsonnet | 148 ++++++++++++++++++ .../variables.libsonnet | 14 ++ .../CPT/etcd-on-cluster-dashboard-v2.jsonnet | 69 ++++++++ 4 files changed, 347 insertions(+) create mode 100644 assets/etcd-on-cluster-dashboard/panels.libsonnet create mode 100644 assets/etcd-on-cluster-dashboard/queries.libsonnet create mode 100644 assets/etcd-on-cluster-dashboard/variables.libsonnet create mode 100644 templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet diff --git a/assets/etcd-on-cluster-dashboard/panels.libsonnet b/assets/etcd-on-cluster-dashboard/panels.libsonnet new file mode 100644 index 0000000..da18039 --- /dev/null +++ b/assets/etcd-on-cluster-dashboard/panels.libsonnet @@ -0,0 +1,116 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + timeSeries: { + local timeSeries = g.panel.timeSeries, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + base(title, unit, targets, gridPos): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withUid('$Datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withDrawStyle("line") + + custom.withLineInterpolation("linear") + + custom.withBarAlignment(0) + + custom.withLineWidth(1) + + custom.withFillOpacity(10) + + custom.withGradientMode("none") + + custom.withSpanNulls(false) + + custom.withPointSize(5) + + custom.withSpanNulls(false) + + custom.stacking.withMode("none") + + custom.withShowPoints('never') + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withShowLegend(true) + + options.legend.withPlacement('bottom'), + + generalUsageAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([ + 'mean', + 'max' + ]) + + options.legend.withDisplayMode('table'), + + withoutCalcsAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([]) + + options.legend.withDisplayMode('table'), + + GeneralInfoAgg(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([ + 'mean', + 'max' + ]) + + options.legend.withDisplayMode('list'), + + GeneralInfo(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([]) + + options.legend.withDisplayMode('list'), + }, + + stat: { + local stat = g.panel.stat, + local options = stat.options, + + base(title, unit, targets, gridPos): + stat.new(title) + + stat.datasource.withType('elasticsearch') + + stat.datasource.withUid('$Datasource') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withTargets(targets) + + stat.gridPos.withX(gridPos.x) + + stat.gridPos.withY(gridPos.y) + + stat.gridPos.withH(gridPos.h) + + stat.gridPos.withW(gridPos.w) + + options.withJustifyMode("auto") + + options.withGraphMode("none") + + options.text.withTitleSize(12) + + stat.standardOptions.color.withMode('thresholds') + + options.withColorMode('none'), + + + etcdLeader(title, unit, target, gridPos): + self.base(title, unit, target, gridPos) + + stat.options.reduceOptions.withCalcs([ + 'mean' + ]) + + stat.standardOptions.withMappings({ + "type": "value", + "options": { + "0": { + "text": "NO" + }, + "1": { + "text": "YES" + } + } + }), + + failedProposalsSeen(title, unit, target, gridPos): + self.base(title, unit, target, gridPos) + + stat.options.reduceOptions.withCalcs([ + 'mean' + ]) + + stat.standardOptions.withMappings( + { + "type": "special", + "options": { + "match": "null", + "result": { + "text": "N/A" + } + } + }), + } +} \ No newline at end of file diff --git a/assets/etcd-on-cluster-dashboard/queries.libsonnet b/assets/etcd-on-cluster-dashboard/queries.libsonnet new file mode 100644 index 0000000..483e4f1 --- /dev/null +++ b/assets/etcd-on-cluster-dashboard/queries.libsonnet @@ -0,0 +1,148 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; + +local generateTimeSeriesQuery(query, legend) = [ + local prometheusQuery = g.query.prometheus; + prometheusQuery.new('$'+variables.Datasource.name, query) + + prometheusQuery.withFormat('time_series') + + prometheusQuery.withIntervalFactor(2) + + prometheusQuery.withLegendFormat(legend), +]; + +{ + CPUUsage: { + query(): + generateTimeSeriesQuery('sum(irate(container_cpu_usage_seconds_total{namespace="openshift-etcd", container="etcd"}[2m])) by (pod) * 100','{{ pod }}') + }, + + memoryUsage: { + query(): + generateTimeSeriesQuery('sum(avg_over_time(container_memory_working_set_bytes{container="",pod!="", namespace=~"openshift-etcd.*"}[2m])) BY (pod, namespace)','{{ pod }}') + }, + + diskWalSyncDuration: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, sum(irate(etcd_disk_wal_fsync_duration_seconds_bucket{namespace="openshift-etcd"}[2m])) by (pod, le))','{{pod}} WAL fsync') + }, + + diskBackendSyncDuration: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, sum(irate(etcd_disk_backend_commit_duration_seconds_bucket{namespace="openshift-etcd"}[2m])) by (pod, le))','{{pod}} DB fsync') + }, + + etcdContainerDiskWrites: { + query(): + generateTimeSeriesQuery('rate(container_fs_writes_bytes_total{namespace="openshift-etcd",container="etcd",device!~".+dm.+"}[2m])','{{ pod }}: {{ device }}') + }, + + dbSize: { + query(): + generateTimeSeriesQuery('etcd_mvcc_db_total_size_in_bytes{namespace="openshift-etcd"}','{{pod}} DB physical size') + + generateTimeSeriesQuery('etcd_mvcc_db_total_size_in_use_in_bytes{namespace="openshift-etcd"}','{{pod}} DB logical size') + }, + + containerNetworkTraffic: { + query(): + generateTimeSeriesQuery('sum(rate(container_network_receive_bytes_total{ container="etcd", namespace=~"openshift-etcd.*"}[2m])) BY (namespace, pod)','rx {{ pod }}') + + generateTimeSeriesQuery('sum(rate(container_network_transmit_bytes_total{ container="etcd", namespace=~"openshift-etcd.*"}[2m])) BY (namespace, pod)','tx {{ pod }}') + }, + + p99PeerToPeerLatency: { + query(): + generateTimeSeriesQuery('histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{namespace="openshift-etcd"}[2m]))','{{pod}}') + }, + + peerNetworkTraffic: { + query(): + generateTimeSeriesQuery('rate(etcd_network_peer_received_bytes_total{namespace="openshift-etcd"}[2m])','rx {{pod}} Peer Traffic') + + generateTimeSeriesQuery('rate(etcd_network_peer_sent_bytes_total{namespace="openshift-etcd"}[2m])','tx {{pod}} Peer Traffic') + }, + + gRPCNetworkTraffic: { + query(): + generateTimeSeriesQuery('rate(etcd_network_client_grpc_received_bytes_total{namespace="openshift-etcd"}[2m])','rx {{pod}}') + + generateTimeSeriesQuery('rate(etcd_network_client_grpc_sent_bytes_total{namespace="openshift-etcd"}[2m])','tx {{pod}}') + }, + + activeStreams: { + query(): + generateTimeSeriesQuery('sum(grpc_server_started_total{namespace="openshift-etcd",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{namespace="openshift-etcd",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})','Watch Streams') + + generateTimeSeriesQuery('sum(grpc_server_started_total{namespace="openshift-etcd",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{namespace="openshift-etcd",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})','Lease Streams') + }, + + snapshotDuration: { + query(): + generateTimeSeriesQuery('sum(rate(etcd_debugging_snap_save_total_duration_seconds_sum{namespace="openshift-etcd"}[2m]))','the total latency distributions of save called by snapshot') + }, + + dbSpaceUsed: { + query(): + generateTimeSeriesQuery('(etcd_mvcc_db_total_size_in_bytes{namespace="openshift-etcd"} / etcd_server_quota_backend_bytes{namespace="openshift-etcd"})*100','{{pod}}') + }, + + dbLeftCapacity: { + query(): + generateTimeSeriesQuery('etcd_server_quota_backend_bytes{namespace="openshift-etcd"} - etcd_mvcc_db_total_size_in_bytes{namespace="openshift-etcd"}','{{pod}}') + }, + + dbSizeLimit: { + query(): + generateTimeSeriesQuery('etcd_server_quota_backend_bytes{namespace="openshift-etcd"}','{{ pod }} Quota Bytes') + }, + + raftProposals: { + query(): + generateTimeSeriesQuery('sum(rate(etcd_server_proposals_failed_total{namespace="openshift-etcd"}[2m]))','Proposal Failure Rate') + + generateTimeSeriesQuery('sum(etcd_server_proposals_pending{namespace="openshift-etcd"})','Proposal Pending Total') + + generateTimeSeriesQuery('sum(rate(etcd_server_proposals_committed_total{namespace="openshift-etcd"}[2m]))','Proposal Commit Rate') + + generateTimeSeriesQuery('sum(rate(etcd_server_proposals_applied_total{namespace="openshift-etcd"}[2m]))','Proposal Apply Rate') + }, + + numberOfLeaderChangesSeen: { + query(): + generateTimeSeriesQuery('sum(rate(etcd_server_leader_changes_seen_total{namespace="openshift-etcd"}[2m]))','') + }, + + etcdHasALeader: { + query(): + generateTimeSeriesQuery('max(etcd_server_has_leader{namespace="openshift-etcd"})','') + }, + + totalNumberOfProposalsSeen: { + query(): + generateTimeSeriesQuery('max(etcd_server_proposals_committed_total{namespace="openshift-etcd"})','') + }, + + keys: { + query(): + generateTimeSeriesQuery('etcd_debugging_mvcc_keys_total{namespace="openshift-etcd"}','{{ pod }} Num keys') + }, + + leaderElectionsPerDay: { + query(): + generateTimeSeriesQuery('changes(etcd_server_leader_changes_seen_total{namespace="openshift-etcd"}[1d])','{{instance}} Total Leader Elections Per Day') + }, + + slowOperations: { + query(): + generateTimeSeriesQuery('delta(etcd_server_slow_apply_total{namespace="openshift-etcd"}[2m])','{{ pod }} slow applies') + + generateTimeSeriesQuery('delta(etcd_server_slow_read_indexes_total{namespace="openshift-etcd"}[2m])','{{ pod }} slow read indexes') + }, + + keyOperations: { + query(): + generateTimeSeriesQuery('rate(etcd_mvcc_put_total{namespace="openshift-etcd"}[2m])','{{ pod }} puts/s') + + generateTimeSeriesQuery('rate(etcd_mvcc_delete_total{namespace="openshift-etcd"}[2m])','{{ pod }} deletes/s') + }, + + heartBeatFailure: { + query(): + generateTimeSeriesQuery('etcd_server_heartbeat_send_failures_total{namespace="openshift-etcd"}','{{ pod }} heartbeat failures') + + generateTimeSeriesQuery('etcd_server_health_failures{namespace="openshift-etcd"}','{{ pod }} health failures') + }, + + compactedKeys: { + query(): + generateTimeSeriesQuery('etcd_debugging_mvcc_db_compaction_keys_total{namespace="openshift-etcd"}','{{ pod }} keys compacted') + } +} \ No newline at end of file diff --git a/assets/etcd-on-cluster-dashboard/variables.libsonnet b/assets/etcd-on-cluster-dashboard/variables.libsonnet new file mode 100644 index 0000000..405085d --- /dev/null +++ b/assets/etcd-on-cluster-dashboard/variables.libsonnet @@ -0,0 +1,14 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + Datasource: + var.datasource.new('Datasource','prometheus') + + var.datasource.withRegex("") + + var.query.generalOptions.withLabel('Datasource') + + var.query.withRefresh(1) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(false), + + +} \ No newline at end of file diff --git a/templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet b/templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet new file mode 100644 index 0000000..c11d3ce --- /dev/null +++ b/templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet @@ -0,0 +1,69 @@ +local panels = import '../../assets/etcd-on-cluster-dashboard/panels.libsonnet'; +local queries = import '../../assets/etcd-on-cluster-dashboard/queries.libsonnet'; +local variables = import '../../assets/etcd-on-cluster-dashboard/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('etcd-cluster-info') ++ g.dashboard.time.withFrom('now-1h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('') ++ g.dashboard.withEditable(true) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([ + variables.Datasource, +]) + ++ g.dashboard.withPanels([ + g.panel.row.new('General Resource Usage') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.generalUsageAgg('CPU usage', 'percent', queries.CPUUsage.query(), { x: 0, y: 1, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('Memory usage', 'bytes', queries.memoryUsage.query(), { x: 12, y: 1, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('Disk WAL Sync Duration', 's', queries.diskWalSyncDuration.query(), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('Disk Backend Sync Duration', 's', queries.diskBackendSyncDuration.query(), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('Etcd container disk writes', 'Bps', queries.etcdContainerDiskWrites.query(), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('DB Size', 'bytes', queries.dbSize.query(), { x: 12, y: 16, w: 12, h: 8 }), + ]), + + g.panel.row.new('Network Usage') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.generalUsageAgg('Container network traffic', 'Bps', queries.containerNetworkTraffic.query(), { x: 0, y: 1, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('p99 peer to peer latency', 's', queries.p99PeerToPeerLatency.query(), { x: 12, y: 1, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('Peer network traffic', 'Bps', queries.peerNetworkTraffic.query(), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.generalUsageAgg('gRPC network traffic', 'Bps', queries.gRPCNetworkTraffic.query(), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.withoutCalcsAgg('Active Streams', '', queries.activeStreams.query(), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.withoutCalcsAgg('Snapshot duration', 's', queries.snapshotDuration.query(), { x: 12, y: 16, w: 12, h: 8 }), + ]), + + g.panel.row.new('DB Info per Member') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.withoutCalcsAgg('% DB Space Used', 'percent', queries.dbSpaceUsed.query(), { x: 0, y: 8, w: 8, h: 8 }), + panels.timeSeries.withoutCalcsAgg('DB Left capacity (with fragmented space)', 'bytes', queries.dbLeftCapacity.query(), { x: 8, y: 8, w: 8, h: 8 }), + panels.timeSeries.withoutCalcsAgg('DB Size Limit (Backend-bytes)', 'bytes', queries.dbSizeLimit.query(), { x: 16, y: 8, w: 8, h: 8 }), + ]), + + g.panel.row.new('General Info') + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withCollapsed(true) + + g.panel.row.withPanels([ + panels.timeSeries.GeneralInfo('Raft Proposals', '', queries.raftProposals.query(), { x: 0, y: 1, w: 12, h: 8 }), + panels.timeSeries.GeneralInfo('Number of leader changes seen', '', queries.numberOfLeaderChangesSeen.query(), { x: 12, y: 1, w: 12, h: 8 }), + panels.stat.etcdLeader('Etcd has a leader?', 'none', queries.etcdHasALeader.query(), { x: 0, y: 8, w: 6, h: 2 }), + panels.stat.failedProposalsSeen('Total number of failed proposals seen', 'none', queries.totalNumberOfProposalsSeen.query(), { x: 6, y: 8, w: 6, h: 2 }), + panels.timeSeries.GeneralInfo('Keys', 'short', queries.keys.query(), { x: 12, y: 12, w: 12, h: 8 }), + panels.timeSeries.GeneralInfo('Leader Elections Per Day', 'short', queries.leaderElectionsPerDay.query(), { x: 0, y: 12, w: 12, h: 6 }), + panels.timeSeries.GeneralInfo('Slow Operations', 'ops', queries.slowOperations.query(), { x: 0, y: 20, w: 12, h: 8 }), + panels.timeSeries.GeneralInfo('Key Operations', 'ops', queries.keyOperations.query(), { x: 12, y: 20, w: 12, h: 8 }), + panels.timeSeries.GeneralInfo('Heartbeat Failures', 'short', queries.heartBeatFailure.query(), { x: 0, y: 28, w: 12, h: 8 }), + panels.timeSeries.GeneralInfo('Compacted Keys', 'short', queries.compactedKeys.query(), { x: 12, y: 28, w: 12, h: 8 }), + ]), + +]) From c6b5712b5f11f639bacaf490732aaf205df2bcbe Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 26 Dec 2023 22:50:12 +0530 Subject: [PATCH 06/18] etcd on cluster dashboard --- assets/etcd-on-cluster-dashboard/variables.libsonnet | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/assets/etcd-on-cluster-dashboard/variables.libsonnet b/assets/etcd-on-cluster-dashboard/variables.libsonnet index 405085d..ded8516 100644 --- a/assets/etcd-on-cluster-dashboard/variables.libsonnet +++ b/assets/etcd-on-cluster-dashboard/variables.libsonnet @@ -8,7 +8,5 @@ local var = g.dashboard.variable; + var.query.generalOptions.withLabel('Datasource') + var.query.withRefresh(1) + var.query.selectionOptions.withMulti(false) - + var.query.selectionOptions.withIncludeAll(false), - - + + var.query.selectionOptions.withIncludeAll(false), } \ No newline at end of file From 1f34f56f66acd70090b4c2ed3503921284a131bc Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 27 Dec 2023 17:49:53 +0530 Subject: [PATCH 07/18] Updated the dashboard --- assets/etcd-on-cluster-dashboard/panels.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/etcd-on-cluster-dashboard/panels.libsonnet b/assets/etcd-on-cluster-dashboard/panels.libsonnet index da18039..483c98e 100644 --- a/assets/etcd-on-cluster-dashboard/panels.libsonnet +++ b/assets/etcd-on-cluster-dashboard/panels.libsonnet @@ -9,7 +9,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn base(title, unit, targets, gridPos): timeSeries.new(title) + timeSeries.queryOptions.withTargets(targets) - + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withType('prometheus') + timeSeries.datasource.withUid('$Datasource') + timeSeries.standardOptions.withUnit(unit) + timeSeries.gridPos.withX(gridPos.x) @@ -65,7 +65,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn base(title, unit, targets, gridPos): stat.new(title) - + stat.datasource.withType('elasticsearch') + + stat.datasource.withType('prometheus') + stat.datasource.withUid('$Datasource') + stat.standardOptions.withUnit(unit) + stat.queryOptions.withTargets(targets) From 9b2537f6856565eeea3bfc1a118c3c2d88b594d1 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 27 Dec 2023 17:55:47 +0530 Subject: [PATCH 08/18] updated the cilium k8s performance dashboard --- assets/cilium-k8s-perf/panels.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/cilium-k8s-perf/panels.libsonnet b/assets/cilium-k8s-perf/panels.libsonnet index 76ba885..8df9447 100644 --- a/assets/cilium-k8s-perf/panels.libsonnet +++ b/assets/cilium-k8s-perf/panels.libsonnet @@ -9,7 +9,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn base(title, unit, targets, gridPos): timeSeries.new(title) + timeSeries.queryOptions.withTargets(targets) - + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withType('prometheus') + timeSeries.datasource.withUid('$Datasource') + timeSeries.standardOptions.withUnit(unit) + timeSeries.gridPos.withX(gridPos.x) @@ -56,7 +56,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn base(title, unit, targets, gridPos): stat.new(title) - + stat.datasource.withType('elasticsearch') + + stat.datasource.withType('prometheus') + stat.datasource.withUid('$Datasource') + stat.standardOptions.withUnit(unit) + stat.queryOptions.withTargets(targets) From 901237bdee155967f6c5cdba2325a93e12b6c478 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 27 Dec 2023 17:59:29 +0530 Subject: [PATCH 09/18] updates in api-perf-overview dashboard --- assets/api-performance-overview/panels.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index 5ceff92..fd45bab 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -9,7 +9,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn base(title, unit, targets, gridPos): timeSeries.new(title) + timeSeries.queryOptions.withTargets(targets) - + timeSeries.datasource.withType('elasticsearch') + + timeSeries.datasource.withType('prometheus') + timeSeries.datasource.withUid('$Datasource') + timeSeries.standardOptions.withUnit(unit) + timeSeries.gridPos.withX(gridPos.x) From 7b63ba6bbe158226b19886fcba2ecf0ecd6ea5f0 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Fri, 29 Dec 2023 15:50:58 +0530 Subject: [PATCH 10/18] k8s Performance Dashboard --- assets/k8s-perf/panels.libsonnet | 76 ++++++++++++++ assets/k8s-perf/queries.libsonnet | 138 ++++++++++++++++++++++++++ assets/k8s-perf/variables.libsonnet | 54 ++++++++++ templates/General/k8s-perf-v2.jsonnet | 61 ++++++++++++ 4 files changed, 329 insertions(+) create mode 100644 assets/k8s-perf/panels.libsonnet create mode 100644 assets/k8s-perf/queries.libsonnet create mode 100644 assets/k8s-perf/variables.libsonnet create mode 100644 templates/General/k8s-perf-v2.jsonnet diff --git a/assets/k8s-perf/panels.libsonnet b/assets/k8s-perf/panels.libsonnet new file mode 100644 index 0000000..0aeed13 --- /dev/null +++ b/assets/k8s-perf/panels.libsonnet @@ -0,0 +1,76 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +{ + stat: { + local stat = g.panel.stat, + local options = stat.options, + + base(title, unit, targets, gridPos): + stat.new(title) + + stat.datasource.withType('prometheus') + + stat.datasource.withUid('$Datasource') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withTargets(targets) + + stat.gridPos.withX(gridPos.x) + + stat.gridPos.withY(gridPos.y) + + stat.gridPos.withH(gridPos.h) + + stat.gridPos.withW(gridPos.w) + + options.withJustifyMode("auto") + + options.withGraphMode("none") + + options.text.withTitleSize(12) + + stat.standardOptions.color.withMode('thresholds') + + options.withColorMode('none'), + + genericStatLegendPanel(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + stat.options.reduceOptions.withCalcs([ + 'last' + ]) + }, + + timeSeries: { + local timeSeries = g.panel.timeSeries, + local custom = timeSeries.fieldConfig.defaults.custom, + local options = timeSeries.options, + + base(title, unit, targets, gridPos): + timeSeries.new(title) + + timeSeries.queryOptions.withTargets(targets) + + timeSeries.datasource.withType('prometheus') + + timeSeries.datasource.withUid('$Datasource') + + timeSeries.standardOptions.withUnit(unit) + + timeSeries.gridPos.withX(gridPos.x) + + timeSeries.gridPos.withY(gridPos.y) + + timeSeries.gridPos.withH(gridPos.h) + + timeSeries.gridPos.withW(gridPos.w) + + custom.withDrawStyle("line") + + custom.withLineInterpolation("linear") + + custom.withBarAlignment(0) + + custom.withLineWidth(1) + + custom.withFillOpacity(10) + + custom.withGradientMode("none") + + custom.withSpanNulls(false) + + custom.withPointSize(5) + + custom.withSpanNulls(false) + + custom.stacking.withMode("none") + + custom.withShowPoints('never') + + options.tooltip.withMode('multi') + + options.tooltip.withSort('desc') + + options.legend.withShowLegend(true) + + options.legend.withPlacement('bottom'), + + genericTimeSeriesPanel(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([]) + + options.legend.withDisplayMode('table'), + + genericTimeSeriesLegendPanel(title, unit, targets, gridPos): + self.base(title, unit, targets, gridPos) + + options.legend.withCalcs([ + "mean", + "max" + ]) + + options.legend.withDisplayMode('table'), + + }, +} \ No newline at end of file diff --git a/assets/k8s-perf/queries.libsonnet b/assets/k8s-perf/queries.libsonnet new file mode 100644 index 0000000..5bc9a12 --- /dev/null +++ b/assets/k8s-perf/queries.libsonnet @@ -0,0 +1,138 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local variables = import './variables.libsonnet'; + +local generateTimeSeriesQuery(query, legend) = [ + local prometheusQuery = g.query.prometheus; + prometheusQuery.new('$'+variables.Datasource.name, query) + + prometheusQuery.withFormat('time_series') + + prometheusQuery.withIntervalFactor(2) + + prometheusQuery.withLegendFormat(legend), +]; + +{ + currentNodeCount: { + query(): + generateTimeSeriesQuery('sum(kube_node_info{})','Number of nodes') + + generateTimeSeriesQuery('sum(kube_node_status_condition{status="true"}) by (condition) > 0','Node: {{ condition }}') + }, + + currentNamespaceCount: { + query(): + generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase)','{{ phase }}') + }, + + currentPodCount: { + query(): + generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase) > 0','{{ phase}} Pods') + }, + + numberOfNodes: { + query(): + generateTimeSeriesQuery('sum(kube_node_info{})','Number of nodes') + + generateTimeSeriesQuery('sum(kube_node_status_condition{status="true"}) by (condition) > 0','Node: {{ condition }}') + }, + + namespaceCount: { + query(): + generateTimeSeriesQuery('sum(kube_namespace_status_phase) by (phase) > 0','{{ phase }} namespaces') + }, + + podCount: { + query(): + generateTimeSeriesQuery('sum(kube_pod_status_phase{}) by (phase)','{{phase}} pods') + }, + + secretAndConfigMapCount: { + query(): + generateTimeSeriesQuery('count(kube_secret_info{})','secrets') + + generateTimeSeriesQuery('count(kube_configmap_info{})','Configmaps') + }, + deployCount: { + query(): + generateTimeSeriesQuery('count(kube_deployment_labels{})','Deployments') + }, + + serviceCount: { + query(): + generateTimeSeriesQuery('count(kube_service_info{})','Services') + }, + + top10ContainerRSS: { + query(): + generateTimeSeriesQuery('topk(10, container_memory_rss{namespace!="",container!="POD",name!=""})','{{ namespace }} - {{ name }}') + }, + + top10ContainerCPU: { + query(): + generateTimeSeriesQuery('topk(10,irate(container_cpu_usage_seconds_total{namespace!="",container!="POD",name!=""}[$interval])*100)','{{ namespace }} - {{ name }}') + }, + + goroutinesCount: { + query(): + generateTimeSeriesQuery('topk(10, sum(go_goroutines{}) by (job,instance))','{{ job }} - {{ instance }}') + }, + + podDistribution: { + query(): + generateTimeSeriesQuery('count(kube_pod_info{}) by (exported_node)','{{ node }}') + }, + + basicCPU: { + query(nodeName): + generateTimeSeriesQuery('sum by (instance, mode)(rate(node_cpu_seconds_total{node=~"' + nodeName + '",job=~".*"}[$interval])) * 100','Busy {{mode}}') + }, + + systemMemory: { + query(nodeName): + generateTimeSeriesQuery('node_memory_Active_bytes{node=~"' + nodeName + '"}','Active') + + generateTimeSeriesQuery('node_memory_MemTotal_bytes{node=~"' + nodeName + '"}','Total') + + generateTimeSeriesQuery('node_memory_Cached_bytes{node=~"' + nodeName + '"} + node_memory_Buffers_bytes{node=~"' + nodeName + '"}','Cached + Buffers') + + generateTimeSeriesQuery('node_memory_MemAvailable_bytes{node=~"' + nodeName + '"}','Available') + }, + + diskThroughput: { + query(nodeName): + generateTimeSeriesQuery('rate(node_disk_read_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])','{{ device }} - read') + + generateTimeSeriesQuery('rate(node_disk_written_bytes_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])','{{ device }} - write') + }, + + diskIOPS: { + query(nodeName): + generateTimeSeriesQuery('rate(node_disk_reads_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])','{{ device }} - read') + + generateTimeSeriesQuery('rate(node_disk_writes_completed_total{device=~"$block_device",node=~"' + nodeName + '"}[$interval])','{{ device }} - write') + }, + + networkUtilization: { + query(nodeName): + generateTimeSeriesQuery('rate(node_network_receive_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8','{{instance}} - {{device}} - RX') + + generateTimeSeriesQuery('rate(node_network_transmit_bytes_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval]) * 8','{{instance}} - {{device}} - TX') + }, + + networkPackets: { + query(nodeName): + generateTimeSeriesQuery('rate(node_network_receive_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])','{{instance}} - {{device}} - RX') + + generateTimeSeriesQuery('rate(node_network_transmit_packets_total{node=~"' + nodeName + '",device=~"$net_device"}[$interval])','{{instance}} - {{device}} - TX') + }, + + networkDrop: { + query(nodeName): + generateTimeSeriesQuery('topk(10, rate(node_network_receive_drop_total{node=~"' + nodeName + '"}[$interval]))','rx-drop-{{ device }}') + + generateTimeSeriesQuery('topk(10,rate(node_network_transmit_drop_total{node=~"' + nodeName + '"}[$interval]))','tx-drop-{{ device }}') + }, + + conntrackStats: { + query(nodeName): + generateTimeSeriesQuery('node_nf_conntrack_entries{node=~"' + nodeName + '"}','conntrack_entries') + + generateTimeSeriesQuery('node_nf_conntrack_entries_limit{node=~"' + nodeName + '"}','conntrack_limit') + }, + + top10ContainersCPU: { + query(nodeName): + generateTimeSeriesQuery('topk(10, sum(irate(container_cpu_usage_seconds_total{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"}[$interval])) by (pod,container,namespace,name,service) * 100)','{{ pod }}: {{ container }}') + }, + + top10ContainersRSS: { + query(nodeName): + generateTimeSeriesQuery('topk(10, container_memory_rss{container!="POD",name!="",instance=~"' + nodeName + '",namespace!="",namespace=~"$namespace"})','{{ pod }}: {{ container }}') + } +} \ No newline at end of file diff --git a/assets/k8s-perf/variables.libsonnet b/assets/k8s-perf/variables.libsonnet new file mode 100644 index 0000000..9ea1ecc --- /dev/null +++ b/assets/k8s-perf/variables.libsonnet @@ -0,0 +1,54 @@ +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; +local var = g.dashboard.variable; + +{ + Datasource: + var.datasource.new('Datasource','prometheus') + + var.datasource.withRegex('') + + var.query.withRefresh(1) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.selectionOptions.withMulti(false), + + _worker_node: + var.query.new('_worker_node','label_values(kube_node_labels{}, exported_node)') + + var.query.generalOptions.withLabel('Worker') + + var.query.withSort(0) + + var.query.withRefresh(2) + + var.query.selectionOptions.withIncludeAll(false) + + var.query.selectionOptions.withMulti(true), + + namespace: + var.query.new('namespace','label_values(kube_pod_info, exported_namespace)') + + var.query.generalOptions.withLabel('Namespace') + + var.query.withSort(0) + + var.query.withRefresh(2) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.selectionOptions.withMulti(false), + + block_device: + var.query.new('block_device','label_values(node_disk_written_bytes_total,device)') + + var.query.generalOptions.withLabel('Block device') + + var.query.withSort(0) + + var.datasource.withRegex('/^(?:(?!dm|rb).)*$/') + + var.query.withRefresh(2) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.selectionOptions.withMulti(true), + + net_device: + var.query.new('net_device','label_values(node_network_receive_bytes_total,device)') + + var.query.generalOptions.withLabel('Network device') + + var.query.withSort(0) + + var.datasource.withRegex('/^((br|en|et).*)$/') + + var.query.withRefresh(2) + + var.query.selectionOptions.withIncludeAll(true) + + var.query.selectionOptions.withMulti(true), + + interval: + var.interval.new('interval',['2m','3m','4m','5m']) + + var.query.withDatasourceFromVariable(self.Datasource) + + var.interval.generalOptions.withLabel('interval') + + var.interval.withAutoOption(count=30, minInterval='10s') + + var.query.withRefresh(2) + + var.query.selectionOptions.withMulti(false) + + var.query.selectionOptions.withIncludeAll(false) +} \ No newline at end of file diff --git a/templates/General/k8s-perf-v2.jsonnet b/templates/General/k8s-perf-v2.jsonnet new file mode 100644 index 0000000..7f28d3d --- /dev/null +++ b/templates/General/k8s-perf-v2.jsonnet @@ -0,0 +1,61 @@ +local panels = import '../../assets/k8s-perf/panels.libsonnet'; +local queries = import '../../assets/k8s-perf/queries.libsonnet'; +local variables = import '../../assets/k8s-perf/variables.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +g.dashboard.new('k8s Performance') ++ g.dashboard.time.withFrom('now-1h') ++ g.dashboard.time.withTo('now') ++ g.dashboard.withTimezone('utc') ++ g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) ++ g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) ++ g.dashboard.withRefresh('30s') ++ g.dashboard.withEditable(false) ++ g.dashboard.graphTooltip.withSharedCrosshair() ++ g.dashboard.withVariables([ + variables.Datasource, + variables._worker_node, + variables.namespace, + variables.block_device, + variables.net_device, + variables.interval, +]) + ++ g.dashboard.withPanels([ + g.panel.row.new('Cluster Details') + + g.panel.row.withCollapsed(true) + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withPanels([ + panels.stat.genericStatLegendPanel('Current Node Count', 'none', queries.currentNodeCount.query(), { x: 0, y: 4, w: 8, h: 3 }), + panels.stat.genericStatLegendPanel('Current namespace Count', 'none', queries.currentNamespaceCount.query(), { x: 8, y: 4, w: 8, h: 3 }), + panels.stat.genericStatLegendPanel('Current Pod Count', 'none', queries.currentPodCount.query(), { x: 16, y: 4, w: 8, h: 3 }), + panels.timeSeries.genericTimeSeriesPanel('Number of nodes', 'none', queries.numberOfNodes.query(), { x: 0, y: 12, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Namespace count', 'none', queries.namespaceCount.query(), { x: 8, y: 12, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Pod count', 'none', queries.podCount.query(), { x: 16, y: 12, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Secret & configmap count', 'none', queries.secretAndConfigMapCount.query(), { x: 0, y: 20, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Deployment count', 'none', queries.deployCount.query(), { x: 8, y: 20, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Services count', 'none', queries.serviceCount.query(), { x: 16, y: 20, w: 8, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 container RSS', 'bytes', queries.top10ContainerRSS.query(), { x: 0, y: 28, w: 24, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 container CPU', 'percent', queries.top10ContainerCPU.query(), { x: 0, y: 36, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesPanel('Goroutines count', 'none', queries.goroutinesCount.query(), { x: 12, y: 36, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Pod Distribution', 'none', queries.podDistribution.query(), { x: 0, y: 44, w: 24, h: 8 }), + ]), + + g.panel.row.new('Node: $_worker_node') + + g.panel.row.withCollapsed(true) + + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) + + g.panel.row.withRepeat('_worker_node') + + g.panel.row.withPanels([ + panels.timeSeries.genericTimeSeriesLegendPanel('CPU Basic: $_worker_node ', 'percent', queries.basicCPU.query('$_worker_node'), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('System Memory: $_worker_node ', 'bytes', queries.systemMemory.query('$_worker_node'), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Disk throughput: $_worker_node ', 'Bps', queries.diskThroughput.query('$_worker_node'), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Disk IOPS: $_worker_node', 'iops', queries.diskIOPS.query('$_worker_node'), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Network Utilization: $_worker_node', 'bps', queries.networkUtilization.query('$_worker_node'), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Network Packets: $_worker_node', 'pps', queries.networkPackets.query('$_worker_node'), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Network packets drop: $_worker_node', 'pps', queries.networkDrop.query('$_worker_node'), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Conntrack stats: $_worker_node', '', queries.conntrackStats.query('$_worker_node'), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 container CPU: $_worker_node', 'percent', queries.top10ContainersCPU.query('$_worker_node'), { x: 0, y: 32, w: 12, h: 8 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 container RSS: $_worker_node', 'bytes', queries.top10ContainersRSS.query(' $_worker_node'), { x: 12, y: 32, w: 12, h: 8 }), + + ]), +]) From a8c06c1889b5d776003caa935d8edfae93f1f0c4 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 9 Jan 2024 16:17:28 +0530 Subject: [PATCH 11/18] Upadated the dashboard without spaces --- assets/api-performance-overview/panels.libsonnet | 5 +---- .../{CPT => General}/api-performance-overview-v2.jsonnet | 8 +++----- 2 files changed, 4 insertions(+), 9 deletions(-) rename templates/{CPT => General}/api-performance-overview-v2.jsonnet (98%) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index fd45bab..a09d430 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -53,9 +53,6 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn 'mean', 'max', 'lastNotNull' - ]) - - - + ]) } } \ No newline at end of file diff --git a/templates/CPT/api-performance-overview-v2.jsonnet b/templates/General/api-performance-overview-v2.jsonnet similarity index 98% rename from templates/CPT/api-performance-overview-v2.jsonnet rename to templates/General/api-performance-overview-v2.jsonnet index 354739d..d4550d0 100644 --- a/templates/CPT/api-performance-overview-v2.jsonnet +++ b/templates/General/api-performance-overview-v2.jsonnet @@ -3,7 +3,7 @@ local queries = import '../../assets/api-performance-overview/queries.libsonnet' local variables = import '../../assets/api-performance-overview/variables.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -g.dashboard.new('API-Performance') +g.dashboard.new('API Performance Dashboard') + g.dashboard.withDescription(||| Dashboard for Api-performance-overview |||) @@ -14,7 +14,7 @@ g.dashboard.new('API-Performance') + g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) + g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) + g.dashboard.withRefresh('30s') -+ g.dashboard.withEditable(true) ++ g.dashboard.withEditable(false) + g.dashboard.graphTooltip.withSharedCrosshair() + g.dashboard.withVariables([ variables.Datasource, @@ -27,9 +27,7 @@ g.dashboard.new('API-Performance') variables.priorityLevel, variables.interval, ]) - + g.dashboard.withPanels([ - panels.timeSeries.withCommonAggregations('request duration - 99th quantile', 'short', queries.requestDuration99thQuantile.query(), { x: 0, y: 0, w: 12, h: 8 }), panels.timeSeries.withCommonAggregations('request rate - by instance', 'short', queries.requestRateByInstance.query(), { x: 12, y: 0, w: 12, h: 8 }), panels.timeSeries.withCommonAggregations('request duration - 99th quantile - by resource', 'short', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }), @@ -49,4 +47,4 @@ g.dashboard.new('API-Performance') panels.timeSeries.withCommonAggregations('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }), panels.timeSeries.withCommonAggregations('p&f - pending in queue', 'short', queries.pendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), panels.timeSeries.withCommonAggregations('p&f - concurrency limit by kube-apiserver', 'short', queries.concurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), -]) \ No newline at end of file +]) From 1976d2a4554fc88cbe163d3a6dc9c3c5341b2b64 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 23 Jan 2024 16:45:22 +0530 Subject: [PATCH 12/18] changes in panel legend settings --- .../api-performance-overview/panels.libsonnet | 2 ++ .../queries.libsonnet | 2 +- .../api-performance-overview-v2.jsonnet | 36 +++++++++---------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index a09d430..8942261 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -37,6 +37,8 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + options.legend.withShowLegend(true) + options.legend.withDisplayMode('table') + options.legend.withPlacement('right') + + options.legend.withSortDesc(true) + + options.legend.withAsTable(true) + options.tooltip.withMode('multi'), withReadWriteSettings(title, unit, targets, gridPos): diff --git a/assets/api-performance-overview/queries.libsonnet b/assets/api-performance-overview/queries.libsonnet index 6f871db..e242160 100644 --- a/assets/api-performance-overview/queries.libsonnet +++ b/assets/api-performance-overview/queries.libsonnet @@ -3,7 +3,7 @@ local variables = import './variables.libsonnet'; local prometheus = g.query.prometheus; { - requestDuration99thQuantile: { + request_duration_99th_quantile: { query(): prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(verb,le))') + prometheus.withFormat('time_series') diff --git a/templates/General/api-performance-overview-v2.jsonnet b/templates/General/api-performance-overview-v2.jsonnet index d4550d0..ca835c7 100644 --- a/templates/General/api-performance-overview-v2.jsonnet +++ b/templates/General/api-performance-overview-v2.jsonnet @@ -28,23 +28,23 @@ g.dashboard.new('API Performance Dashboard') variables.interval, ]) + g.dashboard.withPanels([ - panels.timeSeries.withCommonAggregations('request duration - 99th quantile', 'short', queries.requestDuration99thQuantile.query(), { x: 0, y: 0, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('request rate - by instance', 'short', queries.requestRateByInstance.query(), { x: 12, y: 0, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('request duration - 99th quantile - by resource', 'short', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('request duration - 99th quantile', 'short', queries.requestDurationBy99Quatile.query(), { x: 12, y: 8, w: 12, h: 8 }), - panels.timeSeries.withReadWriteSettings('request duration - read vs write', 'short', queries.requestDurationReadWrite.query(), { x: 0, y: 16, w: 12, h: 8 }), - panels.timeSeries.withReadWriteSettings('request rate - read vs write', 'short', queries.requestRateReadWrite.query(), { x: 12, y: 16, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('requests dropped rate', 'short', queries.requestRateDropped.query(), { x: 0, y: 24, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('requests terminated rate', 'short', queries.requestRateTerminated.query(), { x: 12, y: 24, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('requests status rate', 'short', queries.requestRateStatus.query(), { x: 0, y: 32, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('long running requests', 'short', queries.requestsLongRunning.query(), { x: 12, y: 32, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('request in flight', 'short', queries.requestInFlight.query(), { x: 0, y: 40, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - requests rejected', 'short', queries.requestRejectPandF.query(), { x: 12, y: 40, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('response size - 99th quantile', 'short', queries.responseSize99Quatile.query(), { x: 0, y: 48, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - request queue length', 'short', queries.requestQueueLengthPandF.query(), { x: 12, y: 48, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('request duration - 99th quantile', 'short', queries.request_duration_99th_quantile.query(), { x: 0, y: 0, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('request rate - by instance', 'short', queries.requestRateByInstance.query(), { x: 12, y: 0, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('request duration - 99th quantile - by resource', 'short', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('request duration - 99th quantile', 'short', queries.requestDurationBy99Quatile.query(), { x: 12, y: 8, w: 12, h: 8 }), + panels.timeSeries.legendBottomPlacement('request duration - read vs write', 'short', queries.requestDurationReadWrite.query(), { x: 0, y: 16, w: 12, h: 8 }), + panels.timeSeries.legendBottomPlacement('request rate - read vs write', 'short', queries.requestRateReadWrite.query(), { x: 12, y: 16, w: 12, h: 8 }), + panels.timeSeries.legendBottomPlacement('requests dropped rate', 'short', queries.requestRateDropped.query(), { x: 0, y: 24, w: 12, h: 8 }), + panels.timeSeries.legendBottomPlacement('requests terminated rate', 'short', queries.requestRateTerminated.query(), { x: 12, y: 24, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('requests status rate', 'short', queries.requestRateStatus.query(), { x: 0, y: 32, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('long running requests', 'short', queries.requestsLongRunning.query(), { x: 12, y: 32, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('request in flight', 'short', queries.requestInFlight.query(), { x: 0, y: 40, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - requests rejected', 'short', queries.requestRejectPandF.query(), { x: 12, y: 40, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('response size - 99th quantile', 'short', queries.responseSize99Quatile.query(), { x: 0, y: 48, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - request queue length', 'short', queries.requestQueueLengthPandF.query(), { x: 12, y: 48, w: 12, h: 8 }), panels.timeSeries.withRequestWaitDurationAggregations('p&f - request wait duration - 99th quantile', 'short', queries.requestWaitDuration99QuatilePandF.query(), { x: 0, y: 56, w: 24, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - request dispatch rate', 'short', queries.requestDispatchRatePandF.query(), { x: 0, y: 64, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - pending in queue', 'short', queries.pendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), - panels.timeSeries.withCommonAggregations('p&f - concurrency limit by kube-apiserver', 'short', queries.concurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - request dispatch rate', 'short', queries.requestDispatchRatePandF.query(), { x: 0, y: 64, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - pending in queue', 'short', queries.pendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }), + panels.timeSeries.legendRightPlacement('p&f - concurrency limit by kube-apiserver', 'short', queries.concurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }), ]) From e96e79e03714ec632bf4c41c95cd378332b50ec7 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 23 Jan 2024 17:21:27 +0530 Subject: [PATCH 13/18] panel settings --- assets/api-performance-overview/panels.libsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index 8942261..9c9e678 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -29,7 +29,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + custom.stacking.withMode("none") + custom.withShowPoints('never'), - withCommonAggregations(title, unit, targets, gridPos): + legendRightPlacement(title, unit, targets, gridPos): self.base(title, unit, targets, gridPos) + options.legend.withCalcs([ 'lastNotNull' @@ -41,7 +41,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + options.legend.withAsTable(true) + options.tooltip.withMode('multi'), - withReadWriteSettings(title, unit, targets, gridPos): + legendBottomPlacement(title, unit, targets, gridPos): self.base(title, unit, targets, gridPos) + options.tooltip.withMode('multi') + options.legend.withShowLegend(true) From 3e4f1e08bfad532f9674ec5e5db9912695e219a2 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Tue, 23 Jan 2024 17:26:19 +0530 Subject: [PATCH 14/18] panel settings --- assets/api-performance-overview/panels.libsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index 9c9e678..e90e22b 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -50,7 +50,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + options.tooltip.withMode('multi'), withRequestWaitDurationAggregations(title, unit, targets, gridPos): - self.withCommonAggregations(title, unit, targets, gridPos) + self.legendRightPlacement(title, unit, targets, gridPos) + options.legend.withCalcs([ 'mean', 'max', From 7d9a1ec9bc8a2223aebcc264abc061542ae06dd4 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Thu, 1 Feb 2024 00:06:42 +0530 Subject: [PATCH 15/18] Adding legend calcs as Max to panels and tooltips as desc --- assets/api-performance-overview/panels.libsonnet | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/assets/api-performance-overview/panels.libsonnet b/assets/api-performance-overview/panels.libsonnet index e90e22b..68cf721 100644 --- a/assets/api-performance-overview/panels.libsonnet +++ b/assets/api-performance-overview/panels.libsonnet @@ -27,17 +27,20 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn + custom.withSpanNulls(false) + custom.stacking.withGroup("A") + custom.stacking.withMode("none") - + custom.withShowPoints('never'), + + custom.withShowPoints('never') + + options.tooltip.withSort('desc') + + timeSeries.queryOptions.withTimeFrom(null) + + timeSeries.queryOptions.withTimeShift(null) + + options.legend.withSortDesc(true), legendRightPlacement(title, unit, targets, gridPos): self.base(title, unit, targets, gridPos) + options.legend.withCalcs([ - 'lastNotNull' + 'max' ]) + options.legend.withShowLegend(true) + options.legend.withDisplayMode('table') + options.legend.withPlacement('right') - + options.legend.withSortDesc(true) + options.legend.withAsTable(true) + options.tooltip.withMode('multi'), From 73a6379a772de0e675fe6ab2496d0d428c4276d8 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 7 Feb 2024 00:15:09 +0530 Subject: [PATCH 16/18] Cilium dashboard update --- templates/{CPT => General}/cilium-k8s-perf-v2.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename templates/{CPT => General}/cilium-k8s-perf-v2.jsonnet (98%) diff --git a/templates/CPT/cilium-k8s-perf-v2.jsonnet b/templates/General/cilium-k8s-perf-v2.jsonnet similarity index 98% rename from templates/CPT/cilium-k8s-perf-v2.jsonnet rename to templates/General/cilium-k8s-perf-v2.jsonnet index 65f162b..67045cb 100644 --- a/templates/CPT/cilium-k8s-perf-v2.jsonnet +++ b/templates/General/cilium-k8s-perf-v2.jsonnet @@ -3,14 +3,14 @@ local queries = import '../../assets/cilium-k8s-perf/queries.libsonnet'; local variables = import '../../assets/cilium-k8s-perf/variables.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -g.dashboard.new('Cilium k8s Performance') +g.dashboard.new('Cilium k8s Performance dashboard') + g.dashboard.time.withFrom('now-1h') + g.dashboard.time.withTo('now') + g.dashboard.withTimezone('utc') + g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) + g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) + g.dashboard.withRefresh('30s') -+ g.dashboard.withEditable(true) ++ g.dashboard.withEditable(false) + g.dashboard.graphTooltip.withSharedCrosshair() + g.dashboard.withVariables([ variables.Datasource, From 2651d9663b750778e3aff7813dad86c4865b2008 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 7 Feb 2024 00:29:09 +0530 Subject: [PATCH 17/18] etcd-cluster dashboard update --- .../{CPT => General}/etcd-on-cluster-dashboard-v2.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename templates/{CPT => General}/etcd-on-cluster-dashboard-v2.jsonnet (98%) diff --git a/templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet b/templates/General/etcd-on-cluster-dashboard-v2.jsonnet similarity index 98% rename from templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet rename to templates/General/etcd-on-cluster-dashboard-v2.jsonnet index c11d3ce..f2d08b3 100644 --- a/templates/CPT/etcd-on-cluster-dashboard-v2.jsonnet +++ b/templates/General/etcd-on-cluster-dashboard-v2.jsonnet @@ -3,14 +3,14 @@ local queries = import '../../assets/etcd-on-cluster-dashboard/queries.libsonnet local variables = import '../../assets/etcd-on-cluster-dashboard/variables.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -g.dashboard.new('etcd-cluster-info') +g.dashboard.new('etcd-cluster-info dashoard') + g.dashboard.time.withFrom('now-1h') + g.dashboard.time.withTo('now') + g.dashboard.withTimezone('utc') + g.dashboard.timepicker.withRefreshIntervals(['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d']) + g.dashboard.timepicker.withTimeOptions(['5m', '15m', '1h', '6h', '12h', '24h', '2d', '7d', '30d']) + g.dashboard.withRefresh('') -+ g.dashboard.withEditable(true) ++ g.dashboard.withEditable(false) + g.dashboard.graphTooltip.withSharedCrosshair() + g.dashboard.withVariables([ variables.Datasource, From a75dacc13b25a53a6bb8f229404f51a0da194e71 Mon Sep 17 00:00:00 2001 From: Manda-supraja26 Date: Wed, 7 Feb 2024 00:34:45 +0530 Subject: [PATCH 18/18] K8S-perfromance dashboard update --- templates/General/k8s-perf-v2.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/General/k8s-perf-v2.jsonnet b/templates/General/k8s-perf-v2.jsonnet index 7f28d3d..fd0f28d 100644 --- a/templates/General/k8s-perf-v2.jsonnet +++ b/templates/General/k8s-perf-v2.jsonnet @@ -3,7 +3,7 @@ local queries = import '../../assets/k8s-perf/queries.libsonnet'; local variables = import '../../assets/k8s-perf/variables.libsonnet'; local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; -g.dashboard.new('k8s Performance') +g.dashboard.new('k8s Performance dashboard') + g.dashboard.time.withFrom('now-1h') + g.dashboard.time.withTo('now') + g.dashboard.withTimezone('utc')