diff --git a/.gitignore b/.gitignore index fb16f45..88f4278 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ vendor tmp jsonnetfile.lock.json -./dashboards_out/lint +dashboards_out/.lint diff --git a/alerts/alerts.libsonnet b/alerts/alerts.libsonnet index 38c23a7..2fc2a9e 100644 --- a/alerts/alerts.libsonnet +++ b/alerts/alerts.libsonnet @@ -3,29 +3,32 @@ groups+: [ { name: 'argo-cd', - rules: [ + rules: std.prune([ { - alert: 'ArgoCdAppOutOfSync', + alert: 'ArgoCdAppSyncFailed', expr: ||| sum( - argocd_app_info{ - %(argoCdSelector)s, - sync_status!="Synced" - } - ) by (job, dest_server, project, name, sync_status) - > 0 + round( + increase( + argocd_app_sync_total{ + %(argoCdSelector)s, + phase!="Succeeded" + }[%(argoCdAppSyncInterval)s] + ) + ) + ) by (job, dest_server, project, name, phase) > 0 ||| % $._config, labels: { severity: 'warning', }, - 'for': $._config.argoCdAppOutOfSyncFor, + 'for': '1m', annotations: { - summary: 'An ArgoCD Application is Out Of Sync.', - description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past %s.' % $._config.argoCdAppOutOfSyncFor, + summary: 'An ArgoCD Application has Failed to Sync.', + description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past %s.' % $._config.argoCdAppSyncInterval, dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}', }, }, - { + if $._config.argoCdAppUnhealthyEnabled then { alert: 'ArgoCdAppUnhealthy', expr: ||| sum( @@ -46,49 +49,46 @@ dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}', }, }, - { - alert: 'ArgoCdAppAutoSyncDisabled', + if $._config.argoCdAppOutOfSyncEnabled then { + alert: 'ArgoCdAppOutOfSync', expr: ||| sum( argocd_app_info{ %(argoCdSelector)s, - autosync_enabled!="true", - name!~"%(argoAutoSyncDisabledIgnoredApps)s" + sync_status!="Synced" } - ) by (job, dest_server, project, name, autosync_enabled) + ) by (job, dest_server, project, name, sync_status) > 0 ||| % $._config, labels: { severity: 'warning', }, - 'for': $._config.argoCdAppAutoSyncDisabledFor, + 'for': $._config.argoCdAppOutOfSyncFor, annotations: { - summary: 'An ArgoCD Application has AutoSync Disabled.', - description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past %s.' % $._config.argoCdAppAutoSyncDisabledFor, + summary: 'An ArgoCD Application is Out Of Sync.', + description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past %s.' % $._config.argoCdAppOutOfSyncFor, dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}', }, }, - { - alert: 'ArgoCdAppSyncFailed', + if $._config.argoCdAppAutoSyncDisabledEnabled then { + alert: 'ArgoCdAppAutoSyncDisabled', expr: ||| sum( - round( - increase( - argocd_app_sync_total{ - %(argoCdSelector)s, - phase!="Succeeded" - }[%(argoCdAppSyncInterval)s] - ) - ) - ) by (job, dest_server, project, name, phase) > 0 + argocd_app_info{ + %(argoCdSelector)s, + autosync_enabled!="true", + name!~"%(argoAutoSyncDisabledIgnoredApps)s" + } + ) by (job, dest_server, project, name, autosync_enabled) + > 0 ||| % $._config, labels: { severity: 'warning', }, - 'for': '1m', + 'for': $._config.argoCdAppAutoSyncDisabledFor, annotations: { - summary: 'An ArgoCD Application has Failed to Sync.', - description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past %s.' % $._config.argoCdAppSyncInterval, + summary: 'An ArgoCD Application has AutoSync Disabled.', + description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past %s.' % $._config.argoCdAppAutoSyncDisabledFor, dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}', }, }, @@ -116,7 +116,7 @@ dashboard_url: $._config.notificationsOverviewDashboardUrl + '?var-job={{ $labels.job }}&var-exported_service={{ $labels.exported_service }}', }, }, - ], + ]), }, ], }, diff --git a/config.libsonnet b/config.libsonnet index fe66781..e5c92c3 100644 --- a/config.libsonnet +++ b/config.libsonnet @@ -23,8 +23,11 @@ local annotation = g.dashboard.annotation; tags: ['ci/cd', 'argo-cd'], + argoCdAppOutOfSyncEnabled: true, argoCdAppOutOfSyncFor: '15m', + argoCdAppUnhealthyEnabled: true, argoCdAppUnhealthyFor: '15m', + argoCdAppAutoSyncDisabledEnabled: true, argoCdAppAutoSyncDisabledFor: '2h', argoCdAppSyncInterval: '10m', argoCdNotificationDeliveryInterval: '10m', diff --git a/dashboards_out/.lint b/dashboards_out/.lint deleted file mode 100644 index dfc108c..0000000 --- a/dashboards_out/.lint +++ /dev/null @@ -1,5 +0,0 @@ -exclusions: - template-job-rule: - panel-job-instance-rule: - target-rate-interval-rule: - panel-datasource-rule: diff --git a/dashboards_out/argo-cd-application-overview.json b/dashboards_out/argo-cd-application-overview.json index f4e3200..ec2c6f3 100644 --- a/dashboards_out/argo-cd-application-overview.json +++ b/dashboards_out/argo-cd-application-overview.json @@ -64,7 +64,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -115,7 +115,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -166,7 +166,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -217,7 +217,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -247,7 +247,7 @@ "content": "No applications defined", "mode": "markdown" }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "title": "Application Badges", "type": "text" }, @@ -329,7 +329,7 @@ } ] }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -435,7 +435,7 @@ } ] }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -541,7 +541,7 @@ } ] }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -647,7 +647,7 @@ } ] }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -735,7 +735,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -785,7 +785,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -835,7 +835,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { diff --git a/dashboards_out/argo-cd-notifications-overview.json b/dashboards_out/argo-cd-notifications-overview.json index 55269a9..578f6e0 100644 --- a/dashboards_out/argo-cd-notifications-overview.json +++ b/dashboards_out/argo-cd-notifications-overview.json @@ -64,7 +64,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -115,7 +115,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { diff --git a/dashboards_out/argo-cd-operational-overview.json b/dashboards_out/argo-cd-operational-overview.json index f4af33b..4e413a4 100644 --- a/dashboards_out/argo-cd-operational-overview.json +++ b/dashboards_out/argo-cd-operational-overview.json @@ -44,7 +44,7 @@ "y": 1 }, "id": 2, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -74,7 +74,7 @@ "y": 1 }, "id": 3, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -104,7 +104,7 @@ "y": 1 }, "id": 4, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -194,7 +194,7 @@ "mode": "multi" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -286,7 +286,7 @@ "mode": "multi" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -349,7 +349,7 @@ } ] }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -441,7 +441,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -493,7 +493,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -557,7 +557,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -588,7 +588,7 @@ "y": 19 }, "id": 13, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -641,7 +641,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -693,7 +693,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -757,7 +757,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -809,7 +809,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -861,7 +861,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -925,7 +925,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -977,7 +977,7 @@ "sort": "desc" } }, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -1008,7 +1008,7 @@ "y": 45 }, "id": 23, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { @@ -1040,7 +1040,7 @@ "y": 45 }, "id": 24, - "pluginVersion": "v11.0.0", + "pluginVersion": "v11.4.0", "targets": [ { "datasource": { diff --git a/prometheus_alerts.yaml b/prometheus_alerts.yaml index 5b5ac8e..8ef7bfe 100644 --- a/prometheus_alerts.yaml +++ b/prometheus_alerts.yaml @@ -1,20 +1,23 @@ "groups": - "name": "argo-cd" "rules": - - "alert": "ArgoCdAppOutOfSync" + - "alert": "ArgoCdAppSyncFailed" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" - "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 15m." - "summary": "An ArgoCD Application is Out Of Sync." + "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past 10m." + "summary": "An ArgoCD Application has Failed to Sync." "expr": | sum( - argocd_app_info{ - job=~".*", - sync_status!="Synced" - } - ) by (job, dest_server, project, name, sync_status) - > 0 - "for": "15m" + round( + increase( + argocd_app_sync_total{ + job=~".*", + phase!="Succeeded" + }[10m] + ) + ) + ) by (job, dest_server, project, name, phase) > 0 + "for": "1m" "labels": "severity": "warning" - "alert": "ArgoCdAppUnhealthy" @@ -33,6 +36,22 @@ "for": "15m" "labels": "severity": "warning" + - "alert": "ArgoCdAppOutOfSync" + "annotations": + "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" + "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 15m." + "summary": "An ArgoCD Application is Out Of Sync." + "expr": | + sum( + argocd_app_info{ + job=~".*", + sync_status!="Synced" + } + ) by (job, dest_server, project, name, sync_status) + > 0 + "for": "15m" + "labels": + "severity": "warning" - "alert": "ArgoCdAppAutoSyncDisabled" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" @@ -50,25 +69,6 @@ "for": "2h" "labels": "severity": "warning" - - "alert": "ArgoCdAppSyncFailed" - "annotations": - "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" - "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past 10m." - "summary": "An ArgoCD Application has Failed to Sync." - "expr": | - sum( - round( - increase( - argocd_app_sync_total{ - job=~".*", - phase!="Succeeded" - }[10m] - ) - ) - ) by (job, dest_server, project, name, phase) > 0 - "for": "1m" - "labels": - "severity": "warning" - "alert": "ArgoCdNotificationDeliveryFailed" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-notifications-overview-kask/argocd-notifications-overview?var-job={{ $labels.job }}&var-exported_service={{ $labels.exported_service }}"