Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backend Port of System Alerts Schema Additions #468

Draft
wants to merge 2 commits into
base: cole/hdx-963-new-system-alerts-backend
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions packages/api/src/models/alert.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import { Chart } from './dashboard';

export type AlertType = 'presence' | 'absence';

export enum SystemAlertName {
ANOMALOUS_ERRORS = 'Anomalous HTTP Server Errors',
ANOMALOUS_REQUESTS = 'Anomalous HTTP Successful Requests',
ANOMALOUS_ERROR_EVENTS = 'Anomalous Error Events',
}

export enum AlertState {
ALERT = 'ALERT',
DISABLED = 'DISABLED',
Expand Down Expand Up @@ -82,6 +88,9 @@ export interface IAlert {
until: Date;
};

// System
isSystem?: boolean;

customConfig?: AlertCustomConfig;
historyWindow?: number; // in minutes

Expand Down Expand Up @@ -198,6 +207,11 @@ const AlertSchema = new Schema<IAlert>(
required: false,
},
},
isSystem: {
type: Boolean,
required: false,
default: false,
},
customConfig: {
type: Schema.Types.Mixed,
required: false,
Expand Down
168 changes: 168 additions & 0 deletions packages/api/src/tasks/alerts/addSystemAlerts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { serializeError } from 'serialize-error';

import { AggFn } from '@/clickhouse';
import Alert, {
AlertChannel,
AnomalyModel,
CheckerType,
SystemAlertName,
} from '@/models/alert';
import Team from '@/models/team';
import logger from '@/utils/logger';

type SystemAlertConfig = {
name: SystemAlertName;
where: string;
message: string;
models: AnomalyModel[];
interval: string;
};

const SYSTEM_ALERT_CONFIGS: SystemAlertConfig[] = [
{
name: SystemAlertName.ANOMALOUS_ERRORS,
where: 'level:"error" span.kind:"server"',
message: [
`Alert for ${SystemAlertName.ANOMALOUS_ERRORS}`,
'Observed {{value}} requests with errors returned in the past {{granularity}}(s).',
].join('\n\n'),
models: [
{
name: 'zscore',
enabled: true,
params: {
threshold: 10,
},
},
],
interval: '5m',
},
{
name: SystemAlertName.ANOMALOUS_REQUESTS,
where: 'level:"ok" span.kind:"server"',
message: [
`Alert for ${SystemAlertName.ANOMALOUS_REQUESTS}`,
'Observed {{value}} requests returned in the past {{granularity}}(s).',
].join('\n\n'),
models: [
{
name: 'zscore',
enabled: true,
params: {
threshold: 10,
},
},
],
interval: '5m',
},
{
name: SystemAlertName.ANOMALOUS_ERROR_EVENTS,
where: 'level:error',
message: [
`Alert for ${SystemAlertName.ANOMALOUS_ERROR_EVENTS}`,
'Observed {{value}} error logs returned in the past {{granularity}}(s).',
].join('\n\n'),
models: [
{
name: 'zscore',
enabled: true,
params: {
threshold: 10,
},
},
],
interval: '5m',
},
];

async function createAlertIfMissing(
teamId: string,
channel: AlertChannel,
config: SystemAlertConfig,
): Promise<void> {
const { name, where, message, models, interval } = config;
try {
await Alert.create({
team: teamId,
isSystem: true,
name: name,
interval: interval,
threshold: 1,
type: 'presence',
cron: '* * * * *',
timezone: 'UTC',
source: 'CUSTOM',
channel: channel,
checker: {
type: CheckerType.Anomaly,
config: {
mode: 'any',
models: models,
},
},
customConfig: {
series: [
{
table: 'logs',
type: 'table',
where: where,
aggFn: AggFn.Count,
groupBy: [],
},
],
},
historyWindow: 1440,
message: message,
});
} catch (e) {
logger.error({
message: 'error creating system alert',
teamId,
config,
error: serializeError(e),
});
}
}

export default async () => {
const teams = await Team.find({});
logger.info(`Checking system alerts for ${teams.length} teams`);
const promises: Promise<void>[] = [];

for (const team of teams) {
logger.info(`Processing team ${team.id}`);
const teamId = team.id;

for (const systemAlertConfig of SYSTEM_ALERT_CONFIGS) {
const existingAlert = await Alert.findOne(
{
team: teamId,
isSystem: true,
source: 'CUSTOM',
name: systemAlertConfig.name,
},
{},
);

if (!existingAlert) {
logger.info(
`Team ${teamId} is missing ${systemAlertConfig.name}, creating one`,
);

const defaultChannel: AlertChannel = {
type: 'webhook',
webhookId: 'YOUR_WEBHOOK_ID',
};

const alertPromise = createAlertIfMissing(
teamId,
defaultChannel,
systemAlertConfig,
);

promises.push(alertPromise);
}
}
await Promise.all(promises);
}
};
1 change: 1 addition & 0 deletions packages/api/src/tasks/alerts/checkUserAlerts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ export const processAlert = async (now: Date, alert: AlertDocument) => {
export default async () => {
const now = new Date();
const alerts = await Alert.find({
isSystem: { $ne: true },
state: { $ne: AlertState.DISABLED },
$or: [
{ checker: { $exists: false } },
Expand Down
4 changes: 4 additions & 0 deletions packages/api/src/tasks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { serializeError } from 'serialize-error';

import { IS_DEV } from '@/config';
import { connectDB, mongooseConnection } from '@/models';
import addSystemAlerts from '@/tasks/alerts/addSystemAlerts';
import checkAnomalyAlerts from '@/tasks/alerts/checkAnomalyAlerts';
import checkUserAlerts from '@/tasks/alerts/checkUserAlerts';
import refreshPropertyTypeMappings from '@/tasks/refreshPropertyTypeMappings';
Expand All @@ -21,6 +22,9 @@ const main = async () => {
const t0 = performance.now();
logger.info(`Task [${taskName}] started at ${new Date()}`);
switch (taskName) {
case 'add-system-alerts':
await addSystemAlerts();
break;
// TODO: rename to check-users-alerts
case 'check-alerts':
await checkUserAlerts();
Expand Down