From ef89608c654ecd9cd59b6b3c304bc4017c1f6584 Mon Sep 17 00:00:00 2001 From: CHP Date: Wed, 10 Jul 2024 13:25:50 -0400 Subject: [PATCH 1/2] isSystem model and checkUserAlerts filter --- packages/api/src/models/alert.ts | 14 ++++++++++++++ packages/api/src/tasks/alerts/checkUserAlerts.ts | 1 + 2 files changed, 15 insertions(+) diff --git a/packages/api/src/models/alert.ts b/packages/api/src/models/alert.ts index 7ece34cdc..b1ae55d41 100644 --- a/packages/api/src/models/alert.ts +++ b/packages/api/src/models/alert.ts @@ -5,6 +5,12 @@ import { Chart } from './dashboard'; export type AlertType = 'presence' | 'absence'; +export enum SystemAlertName { + ANOMALOUS_ERRORS = 'Anomalous HTTP Server Errors', + ANOMALOUS_REQUESTS = 'Anomalous HTTP Server Requests', + ANOMALOUS_ERROR_LOGS = 'Anomalous General Error Logs', +} + export enum AlertState { ALERT = 'ALERT', DISABLED = 'DISABLED', @@ -82,6 +88,9 @@ export interface IAlert { until: Date; }; + // System + isSystem?: boolean; + customConfig?: AlertCustomConfig; historyWindow?: number; // in minutes @@ -198,6 +207,11 @@ const AlertSchema = new Schema( required: false, }, }, + isSystem: { + type: Boolean, + required: false, + default: false, + }, customConfig: { type: Schema.Types.Mixed, required: false, diff --git a/packages/api/src/tasks/alerts/checkUserAlerts.ts b/packages/api/src/tasks/alerts/checkUserAlerts.ts index dd97326cc..a11574dce 100644 --- a/packages/api/src/tasks/alerts/checkUserAlerts.ts +++ b/packages/api/src/tasks/alerts/checkUserAlerts.ts @@ -266,6 +266,7 @@ export const processAlert = async (now: Date, alert: AlertDocument) => { export default async () => { const now = new Date(); const alerts = await Alert.find({ + isSystem: { $ne: true }, state: { $ne: AlertState.DISABLED }, $or: [ { checker: { $exists: false } }, From 9d1358526f09fbc718896d5ca793d28fede811d5 Mon Sep 17 00:00:00 2001 From: CHP Date: Wed, 10 Jul 2024 16:40:34 -0400 Subject: [PATCH 2/2] add addsystemalerts task - still need documentation update --- packages/api/src/models/alert.ts | 4 +- .../api/src/tasks/alerts/addSystemAlerts.ts | 168 ++++++++++++++++++ packages/api/src/tasks/index.ts | 4 + 3 files changed, 174 insertions(+), 2 deletions(-) create mode 100644 packages/api/src/tasks/alerts/addSystemAlerts.ts diff --git a/packages/api/src/models/alert.ts b/packages/api/src/models/alert.ts index b1ae55d41..1dd0cb018 100644 --- a/packages/api/src/models/alert.ts +++ b/packages/api/src/models/alert.ts @@ -7,8 +7,8 @@ export type AlertType = 'presence' | 'absence'; export enum SystemAlertName { ANOMALOUS_ERRORS = 'Anomalous HTTP Server Errors', - ANOMALOUS_REQUESTS = 'Anomalous HTTP Server Requests', - ANOMALOUS_ERROR_LOGS = 'Anomalous General Error Logs', + ANOMALOUS_REQUESTS = 'Anomalous HTTP Successful Requests', + ANOMALOUS_ERROR_EVENTS = 'Anomalous Error Events', } export enum AlertState { diff --git a/packages/api/src/tasks/alerts/addSystemAlerts.ts b/packages/api/src/tasks/alerts/addSystemAlerts.ts new file mode 100644 index 000000000..252214dcd --- /dev/null +++ b/packages/api/src/tasks/alerts/addSystemAlerts.ts @@ -0,0 +1,168 @@ +import { serializeError } from 'serialize-error'; + +import { AggFn } from '@/clickhouse'; +import Alert, { + AlertChannel, + AnomalyModel, + CheckerType, + SystemAlertName, +} from '@/models/alert'; +import Team from '@/models/team'; +import logger from '@/utils/logger'; + +type SystemAlertConfig = { + name: SystemAlertName; + where: string; + message: string; + models: AnomalyModel[]; + interval: string; +}; + +const SYSTEM_ALERT_CONFIGS: SystemAlertConfig[] = [ + { + name: SystemAlertName.ANOMALOUS_ERRORS, + where: 'level:"error" span.kind:"server"', + message: [ + `Alert for ${SystemAlertName.ANOMALOUS_ERRORS}`, + 'Observed {{value}} requests with errors returned in the past {{granularity}}(s).', + ].join('\n\n'), + models: [ + { + name: 'zscore', + enabled: true, + params: { + threshold: 10, + }, + }, + ], + interval: '5m', + }, + { + name: SystemAlertName.ANOMALOUS_REQUESTS, + where: 'level:"ok" span.kind:"server"', + message: [ + `Alert for ${SystemAlertName.ANOMALOUS_REQUESTS}`, + 'Observed {{value}} requests returned in the past {{granularity}}(s).', + ].join('\n\n'), + models: [ + { + name: 'zscore', + enabled: true, + params: { + threshold: 10, + }, + }, + ], + interval: '5m', + }, + { + name: SystemAlertName.ANOMALOUS_ERROR_EVENTS, + where: 'level:error', + message: [ + `Alert for ${SystemAlertName.ANOMALOUS_ERROR_EVENTS}`, + 'Observed {{value}} error logs returned in the past {{granularity}}(s).', + ].join('\n\n'), + models: [ + { + name: 'zscore', + enabled: true, + params: { + threshold: 10, + }, + }, + ], + interval: '5m', + }, +]; + +async function createAlertIfMissing( + teamId: string, + channel: AlertChannel, + config: SystemAlertConfig, +): Promise { + const { name, where, message, models, interval } = config; + try { + await Alert.create({ + team: teamId, + isSystem: true, + name: name, + interval: interval, + threshold: 1, + type: 'presence', + cron: '* * * * *', + timezone: 'UTC', + source: 'CUSTOM', + channel: channel, + checker: { + type: CheckerType.Anomaly, + config: { + mode: 'any', + models: models, + }, + }, + customConfig: { + series: [ + { + table: 'logs', + type: 'table', + where: where, + aggFn: AggFn.Count, + groupBy: [], + }, + ], + }, + historyWindow: 1440, + message: message, + }); + } catch (e) { + logger.error({ + message: 'error creating system alert', + teamId, + config, + error: serializeError(e), + }); + } +} + +export default async () => { + const teams = await Team.find({}); + logger.info(`Checking system alerts for ${teams.length} teams`); + const promises: Promise[] = []; + + for (const team of teams) { + logger.info(`Processing team ${team.id}`); + const teamId = team.id; + + for (const systemAlertConfig of SYSTEM_ALERT_CONFIGS) { + const existingAlert = await Alert.findOne( + { + team: teamId, + isSystem: true, + source: 'CUSTOM', + name: systemAlertConfig.name, + }, + {}, + ); + + if (!existingAlert) { + logger.info( + `Team ${teamId} is missing ${systemAlertConfig.name}, creating one`, + ); + + const defaultChannel: AlertChannel = { + type: 'webhook', + webhookId: 'YOUR_WEBHOOK_ID', + }; + + const alertPromise = createAlertIfMissing( + teamId, + defaultChannel, + systemAlertConfig, + ); + + promises.push(alertPromise); + } + } + await Promise.all(promises); + } +}; diff --git a/packages/api/src/tasks/index.ts b/packages/api/src/tasks/index.ts index 20a2a577f..c7b43e767 100644 --- a/packages/api/src/tasks/index.ts +++ b/packages/api/src/tasks/index.ts @@ -5,6 +5,7 @@ import { serializeError } from 'serialize-error'; import { IS_DEV } from '@/config'; import { connectDB, mongooseConnection } from '@/models'; +import addSystemAlerts from '@/tasks/alerts/addSystemAlerts'; import checkAnomalyAlerts from '@/tasks/alerts/checkAnomalyAlerts'; import checkUserAlerts from '@/tasks/alerts/checkUserAlerts'; import refreshPropertyTypeMappings from '@/tasks/refreshPropertyTypeMappings'; @@ -21,6 +22,9 @@ const main = async () => { const t0 = performance.now(); logger.info(`Task [${taskName}] started at ${new Date()}`); switch (taskName) { + case 'add-system-alerts': + await addSystemAlerts(); + break; // TODO: rename to check-users-alerts case 'check-alerts': await checkUserAlerts();