From 9bf5ed50749b548a074b29111bff82cd689d1cc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 11:29:33 +0200 Subject: [PATCH 01/13] fix: move event listeners outside metric collection --- src/lib/metrics.ts | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 6d08ffde5c7b..9a7775593196 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -460,30 +460,6 @@ export default class MetricsMonitor { .set(stage.duration); }); - eventBus.on( - events.STAGE_ENTERED, - (entered: { stage: string; feature: string }) => { - if (flagResolver.isEnabled('trackLifecycleMetrics')) { - logger.info( - `STAGE_ENTERED listened ${JSON.stringify(entered)}`, - ); - } - featureLifecycleStageEnteredCounter.increment({ - stage: entered.stage, - }); - }, - ); - - eventBus.on( - events.EXCEEDS_LIMIT, - ({ - resource, - limit, - }: { resource: string; limit: number }) => { - exceedsLimitErrorCounter.increment({ resource, limit }); - }, - ); - featureLifecycleStageCountByProject.reset(); stageCountByProjectResult.forEach((stageResult) => featureLifecycleStageCountByProject @@ -723,6 +699,27 @@ export default class MetricsMonitor { 0, // no jitter ); + eventBus.on( + events.EXCEEDS_LIMIT, + ({ resource, limit }: { resource: string; limit: number }) => { + exceedsLimitErrorCounter.increment({ resource, limit }); + }, + ); + + eventBus.on( + events.STAGE_ENTERED, + (entered: { stage: string; feature: string }) => { + if (flagResolver.isEnabled('trackLifecycleMetrics')) { + logger.info( + `STAGE_ENTERED listened ${JSON.stringify(entered)}`, + ); + } + featureLifecycleStageEnteredCounter.increment({ + stage: entered.stage, + }); + }, + ); + eventBus.on( events.REQUEST_TIME, ({ path, method, time, statusCode, appName }) => { From 7b585f1d006ee4af4745335fb850c5df9aef345c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 11:33:23 +0200 Subject: [PATCH 02/13] Migrate max_feature_environment_strategies --- src/lib/metrics.ts | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 9a7775593196..64e0caac2b39 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -122,10 +122,19 @@ export default class MetricsMonitor { help: 'Number of feature flags', labelNames: ['version'], }); - const maxFeatureEnvironmentStrategies = createGauge({ + dbMetrics.registerGaugeDbMetric({ name: 'max_feature_environment_strategies', help: 'Maximum number of environment strategies in one feature', labelNames: ['feature', 'environment'], + query: () => + stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), + map: (result) => ({ + count: result.count, + labels: { + environment: result.environment, + feature: result.feature, + }, + }), }); dbMetrics.registerGaugeDbMetric({ @@ -408,7 +417,6 @@ export default class MetricsMonitor { const stats = await instanceStatsService.getStats(); const [ - maxEnvironmentStrategies, maxConstraintValuesResult, maxConstraintsPerStrategyResult, stageCountByProjectResult, @@ -419,7 +427,6 @@ export default class MetricsMonitor { instanceOnboardingMetrics, projectsOnboardingMetrics, ] = await Promise.all([ - stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), stores.featureStrategiesReadModel.getMaxConstraintValues(), stores.featureStrategiesReadModel.getMaxConstraintsPerStrategy(), stores.featureLifecycleReadModel.getStageCountByProject(), @@ -488,16 +495,6 @@ export default class MetricsMonitor { legacyTokensActive.reset(); legacyTokensActive.set(deprecatedTokens.activeLegacyTokens); - if (maxEnvironmentStrategies) { - maxFeatureEnvironmentStrategies.reset(); - maxFeatureEnvironmentStrategies - .labels({ - environment: maxEnvironmentStrategies.environment, - feature: maxEnvironmentStrategies.feature, - }) - .set(maxEnvironmentStrategies.count); - } - if (maxConstraintValuesResult) { maxConstraintValues.reset(); maxConstraintValues From 8cda5cd986f289c6219ad4c9e6ce95d29754cf4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 11:38:05 +0200 Subject: [PATCH 03/13] Migrate feature_toggles_total --- src/lib/metrics.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 64e0caac2b39..429f3478f469 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -117,10 +117,12 @@ export default class MetricsMonitor { help: 'Number of times a feature flag has been used', labelNames: ['toggle', 'active', 'appName'], }); - const featureFlagsTotal = createGauge({ + dbMetrics.registerGaugeDbMetric({ name: 'feature_toggles_total', help: 'Number of feature flags', labelNames: ['version'], + query: () => instanceStatsService.getToggleCount(), + map: (count) => ({ count, labels: { version } }), }); dbMetrics.registerGaugeDbMetric({ name: 'max_feature_environment_strategies', @@ -446,9 +448,6 @@ export default class MetricsMonitor { : Promise.resolve([]), ]); - featureFlagsTotal.reset(); - featureFlagsTotal.labels({ version }).set(stats.featureToggles); - featureTogglesArchivedTotal.reset(); featureTogglesArchivedTotal.set(stats.archivedFeatureToggles); From e2cd347e8e0262e18e492db947304b6d8a51aa98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 12:45:29 +0200 Subject: [PATCH 04/13] Ability to execute a task immediately --- src/lib/metrics-gauge.ts | 10 ++++++---- src/lib/metrics.ts | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index 7938759f6d59..87a0269ed4ac 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -22,17 +22,19 @@ export class DbMetricsMonitor { constructor() {} - registerGaugeDbMetric(definition: GaugeDefinition) { + registerGaugeDbMetric(definition: GaugeDefinition): Task { const gauge = createGauge(definition); this.gauges.set(definition.name, gauge); - this.tasks.add(async () => { + const task = async () => { const result = await definition.query(); - if (result) { + if (result !== null && result !== undefined) { const { count, labels } = definition.map(result); gauge.reset(); gauge.labels(labels).set(count); } - }); + }; + this.tasks.add(task); + return task; } refreshDbMetrics = async () => { diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 429f3478f469..d276e61e44d6 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -117,13 +117,16 @@ export default class MetricsMonitor { help: 'Number of times a feature flag has been used', labelNames: ['toggle', 'active', 'appName'], }); - dbMetrics.registerGaugeDbMetric({ + + // schedule and execute immediately + await dbMetrics.registerGaugeDbMetric({ name: 'feature_toggles_total', help: 'Number of feature flags', labelNames: ['version'], query: () => instanceStatsService.getToggleCount(), map: (count) => ({ count, labels: { version } }), - }); + })(); + dbMetrics.registerGaugeDbMetric({ name: 'max_feature_environment_strategies', help: 'Maximum number of environment strategies in one feature', From 73f355984de468ee83c4271d24e9681dd05391aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 13:15:51 +0200 Subject: [PATCH 05/13] Remove jitter and migrate tasks to execute immediately --- src/lib/metrics-gauge.ts | 40 ++++++++++++++++++++++++++++++---------- src/lib/metrics.ts | 19 ++++++++++--------- 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index 87a0269ed4ac..880b26259b36 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -1,11 +1,18 @@ +import type { Logger } from './logger'; +import type { IUnleashConfig } from './types'; import { createGauge, type Gauge } from './util/metrics'; type RestrictedRecord = Record; type Query = () => Promise; -type MapResult = (result: R) => { - count: number; - labels: RestrictedRecord['labelNames']>; -}; +type MapResult = (result: R) => + | { + count: number; + labels: RestrictedRecord['labelNames']>; + } + | { + count: number; + labels: RestrictedRecord['labelNames']>; + }[]; type GaugeDefinition = { name: string; @@ -19,18 +26,31 @@ type Task = () => Promise; export class DbMetricsMonitor { private tasks: Set = new Set(); private gauges: Map> = new Map(); + private logger: Logger; + + constructor(config: IUnleashConfig) { + this.logger = config.getLogger('gauge-metrics'); + } - constructor() {} + private asArray(value: T | T[]): T[] { + return Array.isArray(value) ? value : [value]; + } registerGaugeDbMetric(definition: GaugeDefinition): Task { const gauge = createGauge(definition); this.gauges.set(definition.name, gauge); const task = async () => { - const result = await definition.query(); - if (result !== null && result !== undefined) { - const { count, labels } = definition.map(result); - gauge.reset(); - gauge.labels(labels).set(count); + try { + const result = await definition.query(); + if (result !== null && result !== undefined) { + const results = this.asArray(definition.map(result)); + gauge.reset(); + for (const r of results) { + gauge.labels(r.labels).set(r.count); + } + } + } catch (e) { + this.logger.warn(`Failed to refresh ${definition.name}`, e); } }; this.tasks.add(task); diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index d276e61e44d6..75b7c966ed58 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -57,7 +57,7 @@ export default class MetricsMonitor { const { eventStore, environmentStore } = stores; const { flagResolver } = config; - const dbMetrics = new DbMetricsMonitor(); + const dbMetrics = new DbMetricsMonitor(config); const cachedEnvironments: () => Promise = memoizee( async () => environmentStore.getAll(), @@ -260,11 +260,18 @@ export default class MetricsMonitor { help: 'Number of strategies', }); - const clientAppsTotal = createGauge({ + // execute immediately to get initial values + await dbMetrics.registerGaugeDbMetric({ name: 'client_apps_total', help: 'Number of registered client apps aggregated by range by last seen', labelNames: ['range'], - }); + query: () => instanceStatsService.getLabeledAppCounts(), + map: (result) => + Object.entries(result).map(([range, count]) => ({ + count, + labels: { range }, + })), + })(); const samlEnabled = createGauge({ name: 'saml_enabled', @@ -628,11 +635,6 @@ export default class MetricsMonitor { oidcEnabled.reset(); oidcEnabled.set(stats.OIDCenabled ? 1 : 0); - clientAppsTotal.reset(); - stats.clientApps.forEach(({ range, count }) => - clientAppsTotal.labels({ range }).set(count), - ); - rateLimits.reset(); rateLimits .labels({ @@ -695,7 +697,6 @@ export default class MetricsMonitor { collectStaticCounters.bind(this), hoursToMilliseconds(2), 'collectStaticCounters', - 0, // no jitter ); eventBus.on( From 4806106fc4d1b3ec22c5070ab77c2f01bc01eb8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Tue, 15 Oct 2024 13:22:48 +0200 Subject: [PATCH 06/13] Add type for MetricValue --- src/lib/metrics-gauge.ts | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index 880b26259b36..e4f62d74bdd4 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -4,15 +4,11 @@ import { createGauge, type Gauge } from './util/metrics'; type RestrictedRecord = Record; type Query = () => Promise; -type MapResult = (result: R) => - | { - count: number; - labels: RestrictedRecord['labelNames']>; - } - | { - count: number; - labels: RestrictedRecord['labelNames']>; - }[]; +type MetricValue = { + count: number; + labels: RestrictedRecord['labelNames']>; +}; +type MapResult = (result: R) => MetricValue | MetricValue[]; type GaugeDefinition = { name: string; From 05a338c48797ff700d9f505993216cbdbfbfeba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Wed, 16 Oct 2024 23:12:09 +0200 Subject: [PATCH 07/13] Get closer to prom-client types --- src/lib/metrics-gauge.test.ts | 114 ++++++++++++++++++++++++++++++++++ src/lib/metrics-gauge.ts | 50 ++++++++++----- src/lib/metrics.ts | 8 +-- 3 files changed, 152 insertions(+), 20 deletions(-) create mode 100644 src/lib/metrics-gauge.test.ts diff --git a/src/lib/metrics-gauge.test.ts b/src/lib/metrics-gauge.test.ts new file mode 100644 index 000000000000..e024563f9628 --- /dev/null +++ b/src/lib/metrics-gauge.test.ts @@ -0,0 +1,114 @@ +import { register } from 'prom-client'; +import { createTestConfig } from '../test/config/test-config'; +import type { IUnleashConfig } from './types'; +import { DbMetricsMonitor } from './metrics-gauge'; + +const prometheusRegister = register; +let config: IUnleashConfig; +let dbMetrics: DbMetricsMonitor; + +beforeAll(async () => { + config = createTestConfig({ + server: { + serverMetrics: true, + }, + }); +}); + +beforeEach(async () => { + dbMetrics = new DbMetricsMonitor(config); +}); + +test('should collect registered metrics', async () => { + dbMetrics.registerGaugeDbMetric({ + name: 'my_metric', + help: 'This is the answer to life, the univers, and everything', + labelNames: [], + query: () => Promise.resolve(42), + map: (result) => ({ value: result }), + }); + + await dbMetrics.refreshDbMetrics(); + + const metrics = await prometheusRegister.metrics(); + expect(metrics).toMatch(/my_metric 42/); +}); + +test('should collect registered metrics with labels', async () => { + dbMetrics.registerGaugeDbMetric({ + name: 'life_the_universe_and_everything', + help: 'This is the answer to life, the univers, and everything', + labelNames: ['test'], + query: () => Promise.resolve(42), + map: (result) => ({ value: result, labels: { test: 'case' } }), + }); + + await dbMetrics.refreshDbMetrics(); + + const metrics = await prometheusRegister.metrics(); + expect(metrics).toMatch( + /life_the_universe_and_everything\{test="case"\} 42/, + ); +}); + +test('should collect multiple registered metrics with and without labels', async () => { + dbMetrics.registerGaugeDbMetric({ + name: 'my_first_metric', + help: 'This is the answer to life, the univers, and everything', + labelNames: [], + query: () => Promise.resolve(42), + map: (result) => ({ value: result }), + }); + + dbMetrics.registerGaugeDbMetric({ + name: 'my_other_metric', + help: 'This is Eulers number', + labelNames: ['euler'], + query: () => Promise.resolve(Math.E), + map: (result) => ({ value: result, labels: { euler: 'number' } }), + }); + + await dbMetrics.refreshDbMetrics(); + + const metrics = await prometheusRegister.metrics(); + expect(metrics).toMatch(/my_first_metric 42/); + expect(metrics).toMatch(/my_other_metric\{euler="number"\} 2.71828/); +}); + +test('should support different label and value pairs', async () => { + dbMetrics.registerGaugeDbMetric({ + name: 'multi_dimensional', + help: 'This metric has different values for different labels', + labelNames: ['version', 'range'], + query: () => Promise.resolve(2), + map: (result) => [ + { value: result, labels: { version: '1', range: 'linear' } }, + { + value: result * result, + labels: { version: '2', range: 'square' }, + }, + { value: result / 2, labels: { version: '3', range: 'half' } }, + ], + }); + + await dbMetrics.refreshDbMetrics(); + + const metrics = await prometheusRegister.metrics(); + expect(metrics).toMatch( + /multi_dimensional\{version="1",range="linear"\} 2\nmulti_dimensional\{version="2",range="square"\} 4\nmulti_dimensional\{version="3",range="half"\} 1/, + ); + expect( + await dbMetrics.findValue('multi_dimensional', { range: 'linear' }), + ).toBe(2); + expect( + await dbMetrics.findValue('multi_dimensional', { range: 'half' }), + ).toBe(1); + expect( + await dbMetrics.findValue('multi_dimensional', { range: 'square' }), + ).toBe(4); + expect( + await dbMetrics.findValue('multi_dimensional', { range: 'x' }), + ).toBeUndefined(); + expect(await dbMetrics.findValue('multi_dimensional')).toBe(2); // first match + expect(await dbMetrics.findValue('other')).toBeUndefined(); +}); diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index e4f62d74bdd4..60accbeaba9b 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -2,37 +2,40 @@ import type { Logger } from './logger'; import type { IUnleashConfig } from './types'; import { createGauge, type Gauge } from './util/metrics'; -type RestrictedRecord = Record; type Query = () => Promise; -type MetricValue = { - count: number; - labels: RestrictedRecord['labelNames']>; +type MetricValue = { + value: number; + labels?: Record; }; -type MapResult = (result: R) => MetricValue | MetricValue[]; +type MapResult = ( + result: R, +) => MetricValue | MetricValue[]; -type GaugeDefinition = { +type GaugeDefinition = { name: string; help: string; - labelNames: string[]; + labelNames: L[]; query: Query; - map: MapResult; + map: MapResult; }; type Task = () => Promise; export class DbMetricsMonitor { private tasks: Set = new Set(); private gauges: Map> = new Map(); - private logger: Logger; + private log: Logger; - constructor(config: IUnleashConfig) { - this.logger = config.getLogger('gauge-metrics'); + constructor({ getLogger }: Pick) { + this.log = getLogger('gauge-metrics'); } private asArray(value: T | T[]): T[] { return Array.isArray(value) ? value : [value]; } - registerGaugeDbMetric(definition: GaugeDefinition): Task { + registerGaugeDbMetric( + definition: GaugeDefinition, + ): Task { const gauge = createGauge(definition); this.gauges.set(definition.name, gauge); const task = async () => { @@ -42,11 +45,15 @@ export class DbMetricsMonitor { const results = this.asArray(definition.map(result)); gauge.reset(); for (const r of results) { - gauge.labels(r.labels).set(r.count); + if (r.labels) { + gauge.labels(r.labels).set(r.value); + } else { + gauge.set(r.value); + } } } } catch (e) { - this.logger.warn(`Failed to refresh ${definition.name}`, e); + this.log.warn(`Failed to refresh ${definition.name}`, e); } }; this.tasks.add(task); @@ -59,10 +66,21 @@ export class DbMetricsMonitor { } }; - async getLastValue(name: string): Promise { + async findValue( + name: string, + labels?: Record, + ): Promise { const gauge = await this.gauges.get(name)?.gauge?.get(); if (gauge && gauge.values.length > 0) { - return gauge.values[0].value; + const values = labels + ? gauge.values.filter(({ labels: l }) => { + return Object.entries(labels).every( + ([key, value]) => l[key] === value, + ); + }) + : gauge.values; + // return first value + return values.map(({ value }) => value).shift(); } return undefined; } diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 75b7c966ed58..8b0a0e390e97 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -124,7 +124,7 @@ export default class MetricsMonitor { help: 'Number of feature flags', labelNames: ['version'], query: () => instanceStatsService.getToggleCount(), - map: (count) => ({ count, labels: { version } }), + map: (value) => ({ value, labels: { version } }), })(); dbMetrics.registerGaugeDbMetric({ @@ -134,7 +134,7 @@ export default class MetricsMonitor { query: () => stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), map: (result) => ({ - count: result.count, + value: result.count, labels: { environment: result.environment, feature: result.feature, @@ -149,7 +149,7 @@ export default class MetricsMonitor { query: () => stores.featureStrategiesReadModel.getMaxFeatureStrategies(), map: (result) => ({ - count: result.count, + value: result.count, labels: { feature: result.feature }, }), }); @@ -268,7 +268,7 @@ export default class MetricsMonitor { query: () => instanceStatsService.getLabeledAppCounts(), map: (result) => Object.entries(result).map(([range, count]) => ({ - count, + value: count, labels: { range }, })), })(); From f5cf84c8d3b2192612aba494525237270b157180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Wed, 16 Oct 2024 23:34:52 +0200 Subject: [PATCH 08/13] Renames to have gauge and task together --- src/lib/metrics-gauge.ts | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index 60accbeaba9b..86a0c639ec89 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -20,9 +20,13 @@ type GaugeDefinition = { }; type Task = () => Promise; + +interface GaugeUpdater { + target: Gauge; + task: Task; +} export class DbMetricsMonitor { - private tasks: Set = new Set(); - private gauges: Map> = new Map(); + private updaters: Map = new Map(); private log: Logger; constructor({ getLogger }: Pick) { @@ -37,7 +41,6 @@ export class DbMetricsMonitor { definition: GaugeDefinition, ): Task { const gauge = createGauge(definition); - this.gauges.set(definition.name, gauge); const task = async () => { try { const result = await definition.query(); @@ -56,12 +59,15 @@ export class DbMetricsMonitor { this.log.warn(`Failed to refresh ${definition.name}`, e); } }; - this.tasks.add(task); + this.updaters.set(definition.name, { target: gauge, task }); return task; } refreshDbMetrics = async () => { - for (const task of this.tasks) { + const tasks = Array.from(this.updaters.values()).map( + (updater) => updater.task, + ); + for (const task of tasks) { await task(); } }; @@ -70,7 +76,7 @@ export class DbMetricsMonitor { name: string, labels?: Record, ): Promise { - const gauge = await this.gauges.get(name)?.gauge?.get(); + const gauge = await this.updaters.get(name)?.target.gauge?.get(); if (gauge && gauge.values.length > 0) { const values = labels ? gauge.values.filter(({ labels: l }) => { From 5edd0f879fd61f3e8dff96c1f4fab78004174935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 17 Oct 2024 19:12:48 +0200 Subject: [PATCH 09/13] Split metric registration from refresh, use existing prometheus metrics for stats --- .../instance-stats-service.test.ts | 36 +- .../instance-stats/instance-stats-service.ts | 61 +- src/lib/metrics-gauge.ts | 7 +- src/lib/metrics.test.ts | 3 +- src/lib/metrics.ts | 1442 +++++++++-------- .../e2e/api/admin/instance-admin.e2e.test.ts | 11 + 6 files changed, 792 insertions(+), 768 deletions(-) diff --git a/src/lib/features/instance-stats/instance-stats-service.test.ts b/src/lib/features/instance-stats/instance-stats-service.test.ts index d3f423bec1a6..24fb34a0f9b8 100644 --- a/src/lib/features/instance-stats/instance-stats-service.test.ts +++ b/src/lib/features/instance-stats/instance-stats-service.test.ts @@ -4,11 +4,16 @@ import createStores from '../../../test/fixtures/store'; import VersionService from '../../services/version-service'; import { createFakeGetActiveUsers } from './getActiveUsers'; import { createFakeGetProductionChanges } from './getProductionChanges'; - +import { registerPrometheusMetrics } from '../../metrics'; +import { register } from 'prom-client'; +import type { IClientInstanceStore } from '../../types'; let instanceStatsService: InstanceStatsService; let versionService: VersionService; +let clientInstanceStore: IClientInstanceStore; beforeEach(() => { + register.clear(); + const config = createTestConfig(); const stores = createStores(); versionService = new VersionService( @@ -17,6 +22,7 @@ beforeEach(() => { createFakeGetActiveUsers(), createFakeGetProductionChanges(), ); + clientInstanceStore = stores.clientInstanceStore; instanceStatsService = new InstanceStatsService( stores, config, @@ -25,20 +31,25 @@ beforeEach(() => { createFakeGetProductionChanges(), ); - jest.spyOn(instanceStatsService, 'refreshAppCountSnapshot'); - jest.spyOn(instanceStatsService, 'getLabeledAppCounts'); + registerPrometheusMetrics( + config, + stores, + undefined as unknown as string, + config.eventBus, + instanceStatsService, + ); + + jest.spyOn(clientInstanceStore, 'getDistinctApplicationsCount'); jest.spyOn(instanceStatsService, 'getStats'); - // validate initial state without calls to these methods - expect(instanceStatsService.refreshAppCountSnapshot).toHaveBeenCalledTimes( - 0, - ); expect(instanceStatsService.getStats).toHaveBeenCalledTimes(0); }); test('get snapshot should not call getStats', async () => { - await instanceStatsService.refreshAppCountSnapshot(); - expect(instanceStatsService.getLabeledAppCounts).toHaveBeenCalledTimes(1); + await instanceStatsService.dbMetrics.refreshDbMetrics(); + expect( + clientInstanceStore.getDistinctApplicationsCount, + ).toHaveBeenCalledTimes(3); expect(instanceStatsService.getStats).toHaveBeenCalledTimes(0); // subsequent calls to getStatsSnapshot don't call getStats @@ -51,12 +62,11 @@ test('get snapshot should not call getStats', async () => { ]); } // after querying the stats snapshot no call to getStats should be issued - expect(instanceStatsService.getLabeledAppCounts).toHaveBeenCalledTimes(1); + expect( + clientInstanceStore.getDistinctApplicationsCount, + ).toHaveBeenCalledTimes(3); }); test('before the snapshot is refreshed we can still get the appCount', async () => { - expect(instanceStatsService.refreshAppCountSnapshot).toHaveBeenCalledTimes( - 0, - ); expect(instanceStatsService.getAppCountSnapshot('7d')).toBeUndefined(); }); diff --git a/src/lib/features/instance-stats/instance-stats-service.ts b/src/lib/features/instance-stats/instance-stats-service.ts index 3fb505fd6480..1f2e20fa21e2 100644 --- a/src/lib/features/instance-stats/instance-stats-service.ts +++ b/src/lib/features/instance-stats/instance-stats-service.ts @@ -29,6 +29,7 @@ import { CUSTOM_ROOT_ROLE_TYPE } from '../../util'; import type { GetActiveUsers } from './getActiveUsers'; import type { ProjectModeCount } from '../project/project-store'; import type { GetProductionChanges } from './getProductionChanges'; +import { DbMetricsMonitor } from '../../metrics-gauge'; export type TimeRange = 'allTime' | '30d' | '7d'; @@ -115,6 +116,8 @@ export class InstanceStatsService { private featureStrategiesReadModel: IFeatureStrategiesReadModel; + dbMetrics: DbMetricsMonitor; + constructor( { featureToggleStore, @@ -178,37 +181,20 @@ export class InstanceStatsService { this.clientMetricsStore = clientMetricsStoreV2; this.flagResolver = flagResolver; this.featureStrategiesReadModel = featureStrategiesReadModel; + this.dbMetrics = new DbMetricsMonitor({ getLogger }); } - async refreshAppCountSnapshot(): Promise< - Partial<{ [key in TimeRange]: number }> - > { - try { - this.appCount = await this.getLabeledAppCounts(); - return this.appCount; - } catch (error) { - this.logger.warn( - 'Unable to retrieve statistics. This will be retried', - error, - ); - return { - '7d': 0, - '30d': 0, - allTime: 0, - }; - } + async fromPrometheus( + name: string, + labels?: Record, + ): Promise { + return (await this.dbMetrics.findValue(name, labels)) ?? 0; } getProjectModeCount(): Promise { return this.projectStore.getProjectModeCounts(); } - getToggleCount(): Promise { - return this.featureToggleStore.count({ - archived: false, - }); - } - getArchivedToggleCount(): Promise { return this.featureToggleStore.count({ archived: true, @@ -263,7 +249,7 @@ export class InstanceStatsService { maxConstraintValues, maxConstraints, ] = await Promise.all([ - this.getToggleCount(), + this.fromPrometheus('feature_toggles_total'), this.getArchivedToggleCount(), this.userStore.count(), this.userStore.countServiceAccounts(), @@ -280,7 +266,7 @@ export class InstanceStatsService { this.strategyStore.count(), this.hasSAML(), this.hasOIDC(), - this.appCount ? this.appCount : this.refreshAppCountSnapshot(), + this.clientAppCounts(), this.eventStore.deprecatedFilteredCount({ type: FEATURES_EXPORTED, }), @@ -329,20 +315,19 @@ export class InstanceStatsService { maxConstraints: maxConstraints?.count ?? 0, }; } - - async getLabeledAppCounts(): Promise< - Partial<{ [key in TimeRange]: number }> - > { - const [t7d, t30d, allTime] = await Promise.all([ - this.clientInstanceStore.getDistinctApplicationsCount(7), - this.clientInstanceStore.getDistinctApplicationsCount(30), - this.clientInstanceStore.getDistinctApplicationsCount(), - ]); - return { - '7d': t7d, - '30d': t30d, - allTime, + async clientAppCounts(): Promise> { + this.appCount = { + '7d': await this.fromPrometheus('client_apps_total', { + range: '7d', + }), + '30d': await this.fromPrometheus('client_apps_total', { + range: '30d', + }), + allTime: await this.fromPrometheus('client_apps_total', { + range: 'allTime', + }), }; + return this.appCount; } getAppCountSnapshot(range: TimeRange): number | undefined { diff --git a/src/lib/metrics-gauge.ts b/src/lib/metrics-gauge.ts index 86a0c639ec89..7f1e5cc06265 100644 --- a/src/lib/metrics-gauge.ts +++ b/src/lib/metrics-gauge.ts @@ -64,10 +64,11 @@ export class DbMetricsMonitor { } refreshDbMetrics = async () => { - const tasks = Array.from(this.updaters.values()).map( - (updater) => updater.task, + const tasks = Array.from(this.updaters.entries()).map( + ([name, updater]) => ({ name, task: updater.task }), ); - for (const task of tasks) { + for (const { name, task } of tasks) { + this.log.debug(`Refreshing metric ${name}`); await task(); } }; diff --git a/src/lib/metrics.test.ts b/src/lib/metrics.test.ts index bf92983caf55..afff08c245c8 100644 --- a/src/lib/metrics.test.ts +++ b/src/lib/metrics.test.ts @@ -212,6 +212,7 @@ test('should collect metrics for function timings', async () => { }); test('should collect metrics for feature flag size', async () => { + await statsService.dbMetrics.refreshDbMetrics(); const metrics = await prometheusRegister.metrics(); expect(metrics).toMatch(/feature_toggles_total\{version="(.*)"\} 0/); }); @@ -222,7 +223,7 @@ test('should collect metrics for archived feature flag size', async () => { }); test('should collect metrics for total client apps', async () => { - await statsService.refreshAppCountSnapshot(); + await statsService.dbMetrics.refreshDbMetrics(); const metrics = await prometheusRegister.metrics(); expect(metrics).toMatch(/client_apps_total\{range="(.*)"\} 0/); }); diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 8b0a0e390e97..05ba0e98859f 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -37,393 +37,707 @@ import { } from './util/metrics'; import type { SchedulerService } from './services'; import type { IClientMetricsEnv } from './features/metrics/client-metrics/client-metrics-store-v2-type'; -import { DbMetricsMonitor } from './metrics-gauge'; -export default class MetricsMonitor { - constructor() {} +export function registerPrometheusMetrics( + config: IUnleashConfig, + stores: IUnleashStores, + version: string, + eventBus: EventEmitter, + instanceStatsService: InstanceStatsService, +) { + const resolveEnvironmentType = async ( + environment: string, + cachedEnvironments: () => Promise, + ): Promise => { + const environments = await cachedEnvironments(); + const env = environments.find((e) => e.name === environment); - async startMonitoring( - config: IUnleashConfig, - stores: IUnleashStores, - version: string, - eventBus: EventEmitter, - instanceStatsService: InstanceStatsService, - schedulerService: SchedulerService, - db: Knex, - ): Promise { - if (!config.server.serverMetrics) { - return Promise.resolve(); + if (env) { + return env.type; + } else { + return 'unknown'; } - - const { eventStore, environmentStore } = stores; - const { flagResolver } = config; - const dbMetrics = new DbMetricsMonitor(config); - - const cachedEnvironments: () => Promise = memoizee( - async () => environmentStore.getAll(), - { - promise: true, - maxAge: hoursToMilliseconds(1), - }, - ); - - collectDefaultMetrics(); - - const requestDuration = createSummary({ - name: 'http_request_duration_milliseconds', - help: 'App response time', - labelNames: ['path', 'method', 'status', 'appName'], - percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], - maxAgeSeconds: 600, - ageBuckets: 5, - }); - const schedulerDuration = createSummary({ - name: 'scheduler_duration_seconds', - help: 'Scheduler duration time', - labelNames: ['jobId'], - percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], - maxAgeSeconds: 600, - ageBuckets: 5, - }); - const dbDuration = createSummary({ - name: 'db_query_duration_seconds', - help: 'DB query duration time', - labelNames: ['store', 'action'], - percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], - maxAgeSeconds: 600, - ageBuckets: 5, - }); - const functionDuration = createSummary({ - name: 'function_duration_seconds', - help: 'Function duration time', - labelNames: ['functionName', 'className'], - percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], - maxAgeSeconds: 600, - ageBuckets: 5, - }); - const featureFlagUpdateTotal = createCounter({ - name: 'feature_toggle_update_total', - help: 'Number of times a toggle has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature flag is created.', - labelNames: [ - 'toggle', - 'project', - 'environment', - 'environmentType', - 'action', - ], - }); - const featureFlagUsageTotal = createCounter({ - name: 'feature_toggle_usage_total', - help: 'Number of times a feature flag has been used', - labelNames: ['toggle', 'active', 'appName'], - }); - - // schedule and execute immediately - await dbMetrics.registerGaugeDbMetric({ - name: 'feature_toggles_total', - help: 'Number of feature flags', - labelNames: ['version'], - query: () => instanceStatsService.getToggleCount(), - map: (value) => ({ value, labels: { version } }), - })(); - - dbMetrics.registerGaugeDbMetric({ - name: 'max_feature_environment_strategies', - help: 'Maximum number of environment strategies in one feature', - labelNames: ['feature', 'environment'], - query: () => - stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), - map: (result) => ({ - value: result.count, - labels: { - environment: result.environment, - feature: result.feature, - }, + }; + + const { eventStore, environmentStore } = stores; + const { flagResolver } = config; + const dbMetrics = instanceStatsService.dbMetrics; + + const cachedEnvironments: () => Promise = memoizee( + async () => environmentStore.getAll(), + { + promise: true, + maxAge: hoursToMilliseconds(1), + }, + ); + + const requestDuration = createSummary({ + name: 'http_request_duration_milliseconds', + help: 'App response time', + labelNames: ['path', 'method', 'status', 'appName'], + percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], + maxAgeSeconds: 600, + ageBuckets: 5, + }); + const schedulerDuration = createSummary({ + name: 'scheduler_duration_seconds', + help: 'Scheduler duration time', + labelNames: ['jobId'], + percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], + maxAgeSeconds: 600, + ageBuckets: 5, + }); + const dbDuration = createSummary({ + name: 'db_query_duration_seconds', + help: 'DB query duration time', + labelNames: ['store', 'action'], + percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], + maxAgeSeconds: 600, + ageBuckets: 5, + }); + const functionDuration = createSummary({ + name: 'function_duration_seconds', + help: 'Function duration time', + labelNames: ['functionName', 'className'], + percentiles: [0.1, 0.5, 0.9, 0.95, 0.99], + maxAgeSeconds: 600, + ageBuckets: 5, + }); + const featureFlagUpdateTotal = createCounter({ + name: 'feature_toggle_update_total', + help: 'Number of times a toggle has been updated. Environment label would be "n/a" when it is not available, e.g. when a feature flag is created.', + labelNames: [ + 'toggle', + 'project', + 'environment', + 'environmentType', + 'action', + ], + }); + const featureFlagUsageTotal = createCounter({ + name: 'feature_toggle_usage_total', + help: 'Number of times a feature flag has been used', + labelNames: ['toggle', 'active', 'appName'], + }); + + dbMetrics.registerGaugeDbMetric({ + name: 'feature_toggles_total', + help: 'Number of feature flags', + labelNames: ['version'], + query: () => + stores.featureToggleStore.count({ + archived: false, }), - }); - - dbMetrics.registerGaugeDbMetric({ - name: 'max_feature_strategies', - help: 'Maximum number of strategies in one feature', - labelNames: ['feature'], - query: () => - stores.featureStrategiesReadModel.getMaxFeatureStrategies(), - map: (result) => ({ - value: result.count, - labels: { feature: result.feature }, - }), - }); - - const maxConstraintValues = createGauge({ - name: 'max_constraint_values', - help: 'Maximum number of constraint values used in a single constraint', - labelNames: ['feature', 'environment'], - }); - const maxConstraintsPerStrategy = createGauge({ - name: 'max_strategy_constraints', - help: 'Maximum number of constraints used on a single strategy', - labelNames: ['feature', 'environment'], - }); - const largestProjectEnvironment = createGauge({ - name: 'largest_project_environment_size', - help: 'The largest project environment size (bytes) based on strategies, constraints, variants and parameters', - labelNames: ['project', 'environment'], - }); - const largestFeatureEnvironment = createGauge({ - name: 'largest_feature_environment_size', - help: 'The largest feature environment size (bytes) base on strategies, constraints, variants and parameters', - labelNames: ['feature', 'environment'], - }); - - const featureTogglesArchivedTotal = createGauge({ - name: 'feature_toggles_archived_total', - help: 'Number of archived feature flags', - }); - const usersTotal = createGauge({ - name: 'users_total', - help: 'Number of users', - }); - const serviceAccounts = createGauge({ - name: 'service_accounts_total', - help: 'Number of service accounts', - }); - const apiTokens = createGauge({ - name: 'api_tokens_total', - help: 'Number of API tokens', - labelNames: ['type'], - }); - const enabledMetricsBucketsPreviousDay = createGauge({ - name: 'enabled_metrics_buckets_previous_day', - help: 'Number of hourly enabled/disabled metric buckets in the previous day', - }); - const variantMetricsBucketsPreviousDay = createGauge({ - name: 'variant_metrics_buckets_previous_day', - help: 'Number of hourly variant metric buckets in the previous day', - }); - const usersActive7days = createGauge({ - name: 'users_active_7', - help: 'Number of users active in the last 7 days', - }); - const usersActive30days = createGauge({ - name: 'users_active_30', - help: 'Number of users active in the last 30 days', - }); - const usersActive60days = createGauge({ - name: 'users_active_60', - help: 'Number of users active in the last 60 days', - }); - const usersActive90days = createGauge({ - name: 'users_active_90', - help: 'Number of users active in the last 90 days', - }); - const projectsTotal = createGauge({ - name: 'projects_total', - help: 'Number of projects', - labelNames: ['mode'], - }); - const environmentsTotal = createGauge({ - name: 'environments_total', - help: 'Number of environments', - }); - const groupsTotal = createGauge({ - name: 'groups_total', - help: 'Number of groups', - }); - - const rolesTotal = createGauge({ - name: 'roles_total', - help: 'Number of roles', - }); - - const customRootRolesTotal = createGauge({ - name: 'custom_root_roles_total', - help: 'Number of custom root roles', - }); - - const customRootRolesInUseTotal = createGauge({ - name: 'custom_root_roles_in_use_total', - help: 'Number of custom root roles in use', - }); - - const segmentsTotal = createGauge({ - name: 'segments_total', - help: 'Number of segments', - }); - - const contextTotal = createGauge({ - name: 'context_total', - help: 'Number of context', - }); - - const strategiesTotal = createGauge({ - name: 'strategies_total', - help: 'Number of strategies', - }); - - // execute immediately to get initial values - await dbMetrics.registerGaugeDbMetric({ - name: 'client_apps_total', - help: 'Number of registered client apps aggregated by range by last seen', - labelNames: ['range'], - query: () => instanceStatsService.getLabeledAppCounts(), - map: (result) => - Object.entries(result).map(([range, count]) => ({ - value: count, - labels: { range }, - })), - })(); - - const samlEnabled = createGauge({ - name: 'saml_enabled', - help: 'Whether SAML is enabled', - }); - - const oidcEnabled = createGauge({ - name: 'oidc_enabled', - help: 'Whether OIDC is enabled', - }); - - const clientSdkVersionUsage = createCounter({ - name: 'client_sdk_versions', - help: 'Which sdk versions are being used', - labelNames: [ - 'sdk_name', - 'sdk_version', - 'platform_name', - 'platform_version', - 'yggdrasil_version', - 'spec_version', - ], - }); - - const productionChanges30 = createGauge({ - name: 'production_changes_30', - help: 'Changes made to production environment last 30 days', - labelNames: ['environment'], - }); - const productionChanges60 = createGauge({ - name: 'production_changes_60', - help: 'Changes made to production environment last 60 days', - labelNames: ['environment'], - }); - const productionChanges90 = createGauge({ - name: 'production_changes_90', - help: 'Changes made to production environment last 90 days', - labelNames: ['environment'], - }); - - const rateLimits = createGauge({ - name: 'rate_limits', - help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs', - labelNames: ['endpoint', 'method'], - }); - const featureCreatedByMigration = createCounter({ - name: 'feature_created_by_migration_count', - help: 'Feature createdBy migration count', - }); - const eventCreatedByMigration = createCounter({ - name: 'event_created_by_migration_count', - help: 'Event createdBy migration count', - }); - const proxyRepositoriesCreated = createCounter({ - name: 'proxy_repositories_created', - help: 'Proxy repositories created', - }); - const frontendApiRepositoriesCreated = createCounter({ - name: 'frontend_api_repositories_created', - help: 'Frontend API repositories created', - }); - const mapFeaturesForClientDuration = createHistogram({ - name: 'map_features_for_client_duration', - help: 'Duration of mapFeaturesForClient function', - }); - - const featureLifecycleStageDuration = createGauge({ - name: 'feature_lifecycle_stage_duration', - labelNames: ['stage', 'project_id'], - help: 'Duration of feature lifecycle stages', - }); - - const onboardingDuration = createGauge({ - name: 'onboarding_duration', - labelNames: ['event'], - help: 'firstLogin, secondLogin, firstFeatureFlag, firstPreLive, firstLive from first user creation', - }); - const projectOnboardingDuration = createGauge({ - name: 'project_onboarding_duration', - labelNames: ['event', 'project'], - help: 'firstFeatureFlag, firstPreLive, firstLive from project creation', - }); - - const featureLifecycleStageCountByProject = createGauge({ - name: 'feature_lifecycle_stage_count_by_project', - help: 'Count features in a given stage by project id', - labelNames: ['stage', 'project_id'], - }); - - const featureLifecycleStageEnteredCounter = createCounter({ - name: 'feature_lifecycle_stage_entered', - help: 'Count how many features entered a given stage', - labelNames: ['stage'], - }); - - const projectActionsCounter = createCounter({ - name: 'project_actions_count', - help: 'Count project actions', - labelNames: ['action'], - }); - - const projectEnvironmentsDisabled = createCounter({ - name: 'project_environments_disabled', - help: 'How many "environment disabled" events we have received for each project', - labelNames: ['project_id'], - }); - - const orphanedTokensTotal = createGauge({ - name: 'orphaned_api_tokens_total', - help: 'Number of API tokens without a project', - }); - - const orphanedTokensActive = createGauge({ - name: 'orphaned_api_tokens_active', - help: 'Number of API tokens without a project, last seen within 3 months', - }); - - const legacyTokensTotal = createGauge({ - name: 'legacy_api_tokens_total', - help: 'Number of API tokens with v1 format', - }); + map: (value) => ({ value, labels: { version } }), + }); + + dbMetrics.registerGaugeDbMetric({ + name: 'max_feature_environment_strategies', + help: 'Maximum number of environment strategies in one feature', + labelNames: ['feature', 'environment'], + query: () => + stores.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), + map: (result) => ({ + value: result.count, + labels: { + environment: result.environment, + feature: result.feature, + }, + }), + }); + + dbMetrics.registerGaugeDbMetric({ + name: 'max_feature_strategies', + help: 'Maximum number of strategies in one feature', + labelNames: ['feature'], + query: () => + stores.featureStrategiesReadModel.getMaxFeatureStrategies(), + map: (result) => ({ + value: result.count, + labels: { feature: result.feature }, + }), + }); + + const maxConstraintValues = createGauge({ + name: 'max_constraint_values', + help: 'Maximum number of constraint values used in a single constraint', + labelNames: ['feature', 'environment'], + }); + const maxConstraintsPerStrategy = createGauge({ + name: 'max_strategy_constraints', + help: 'Maximum number of constraints used on a single strategy', + labelNames: ['feature', 'environment'], + }); + const largestProjectEnvironment = createGauge({ + name: 'largest_project_environment_size', + help: 'The largest project environment size (bytes) based on strategies, constraints, variants and parameters', + labelNames: ['project', 'environment'], + }); + const largestFeatureEnvironment = createGauge({ + name: 'largest_feature_environment_size', + help: 'The largest feature environment size (bytes) base on strategies, constraints, variants and parameters', + labelNames: ['feature', 'environment'], + }); + + const featureTogglesArchivedTotal = createGauge({ + name: 'feature_toggles_archived_total', + help: 'Number of archived feature flags', + }); + const usersTotal = createGauge({ + name: 'users_total', + help: 'Number of users', + }); + const serviceAccounts = createGauge({ + name: 'service_accounts_total', + help: 'Number of service accounts', + }); + const apiTokens = createGauge({ + name: 'api_tokens_total', + help: 'Number of API tokens', + labelNames: ['type'], + }); + const enabledMetricsBucketsPreviousDay = createGauge({ + name: 'enabled_metrics_buckets_previous_day', + help: 'Number of hourly enabled/disabled metric buckets in the previous day', + }); + const variantMetricsBucketsPreviousDay = createGauge({ + name: 'variant_metrics_buckets_previous_day', + help: 'Number of hourly variant metric buckets in the previous day', + }); + const usersActive7days = createGauge({ + name: 'users_active_7', + help: 'Number of users active in the last 7 days', + }); + const usersActive30days = createGauge({ + name: 'users_active_30', + help: 'Number of users active in the last 30 days', + }); + const usersActive60days = createGauge({ + name: 'users_active_60', + help: 'Number of users active in the last 60 days', + }); + const usersActive90days = createGauge({ + name: 'users_active_90', + help: 'Number of users active in the last 90 days', + }); + const projectsTotal = createGauge({ + name: 'projects_total', + help: 'Number of projects', + labelNames: ['mode'], + }); + const environmentsTotal = createGauge({ + name: 'environments_total', + help: 'Number of environments', + }); + const groupsTotal = createGauge({ + name: 'groups_total', + help: 'Number of groups', + }); + + const rolesTotal = createGauge({ + name: 'roles_total', + help: 'Number of roles', + }); + + const customRootRolesTotal = createGauge({ + name: 'custom_root_roles_total', + help: 'Number of custom root roles', + }); + + const customRootRolesInUseTotal = createGauge({ + name: 'custom_root_roles_in_use_total', + help: 'Number of custom root roles in use', + }); + + const segmentsTotal = createGauge({ + name: 'segments_total', + help: 'Number of segments', + }); + + const contextTotal = createGauge({ + name: 'context_total', + help: 'Number of context', + }); + + const strategiesTotal = createGauge({ + name: 'strategies_total', + help: 'Number of strategies', + }); + + dbMetrics.registerGaugeDbMetric({ + name: 'client_apps_total', + help: 'Number of registered client apps aggregated by range by last seen', + labelNames: ['range'], + query: async () => { + const [t7d, t30d, allTime] = await Promise.all([ + stores.clientInstanceStore.getDistinctApplicationsCount(7), + stores.clientInstanceStore.getDistinctApplicationsCount(30), + stores.clientInstanceStore.getDistinctApplicationsCount(), + ]); + return { + '7d': t7d, + '30d': t30d, + allTime, + }; + }, + map: (result) => + Object.entries(result).map(([range, count]) => ({ + value: count, + labels: { range }, + })), + }); + + const samlEnabled = createGauge({ + name: 'saml_enabled', + help: 'Whether SAML is enabled', + }); + + const oidcEnabled = createGauge({ + name: 'oidc_enabled', + help: 'Whether OIDC is enabled', + }); + + const clientSdkVersionUsage = createCounter({ + name: 'client_sdk_versions', + help: 'Which sdk versions are being used', + labelNames: [ + 'sdk_name', + 'sdk_version', + 'platform_name', + 'platform_version', + 'yggdrasil_version', + 'spec_version', + ], + }); + + const productionChanges30 = createGauge({ + name: 'production_changes_30', + help: 'Changes made to production environment last 30 days', + labelNames: ['environment'], + }); + const productionChanges60 = createGauge({ + name: 'production_changes_60', + help: 'Changes made to production environment last 60 days', + labelNames: ['environment'], + }); + const productionChanges90 = createGauge({ + name: 'production_changes_90', + help: 'Changes made to production environment last 90 days', + labelNames: ['environment'], + }); + + const rateLimits = createGauge({ + name: 'rate_limits', + help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs', + labelNames: ['endpoint', 'method'], + }); + const featureCreatedByMigration = createCounter({ + name: 'feature_created_by_migration_count', + help: 'Feature createdBy migration count', + }); + const eventCreatedByMigration = createCounter({ + name: 'event_created_by_migration_count', + help: 'Event createdBy migration count', + }); + const proxyRepositoriesCreated = createCounter({ + name: 'proxy_repositories_created', + help: 'Proxy repositories created', + }); + const frontendApiRepositoriesCreated = createCounter({ + name: 'frontend_api_repositories_created', + help: 'Frontend API repositories created', + }); + const mapFeaturesForClientDuration = createHistogram({ + name: 'map_features_for_client_duration', + help: 'Duration of mapFeaturesForClient function', + }); + + const featureLifecycleStageDuration = createGauge({ + name: 'feature_lifecycle_stage_duration', + labelNames: ['stage', 'project_id'], + help: 'Duration of feature lifecycle stages', + }); + + const onboardingDuration = createGauge({ + name: 'onboarding_duration', + labelNames: ['event'], + help: 'firstLogin, secondLogin, firstFeatureFlag, firstPreLive, firstLive from first user creation', + }); + const projectOnboardingDuration = createGauge({ + name: 'project_onboarding_duration', + labelNames: ['event', 'project'], + help: 'firstFeatureFlag, firstPreLive, firstLive from project creation', + }); + + const featureLifecycleStageCountByProject = createGauge({ + name: 'feature_lifecycle_stage_count_by_project', + help: 'Count features in a given stage by project id', + labelNames: ['stage', 'project_id'], + }); + + const featureLifecycleStageEnteredCounter = createCounter({ + name: 'feature_lifecycle_stage_entered', + help: 'Count how many features entered a given stage', + labelNames: ['stage'], + }); + + const projectActionsCounter = createCounter({ + name: 'project_actions_count', + help: 'Count project actions', + labelNames: ['action'], + }); + + const projectEnvironmentsDisabled = createCounter({ + name: 'project_environments_disabled', + help: 'How many "environment disabled" events we have received for each project', + labelNames: ['project_id'], + }); + + const orphanedTokensTotal = createGauge({ + name: 'orphaned_api_tokens_total', + help: 'Number of API tokens without a project', + }); + + const orphanedTokensActive = createGauge({ + name: 'orphaned_api_tokens_active', + help: 'Number of API tokens without a project, last seen within 3 months', + }); + + const legacyTokensTotal = createGauge({ + name: 'legacy_api_tokens_total', + help: 'Number of API tokens with v1 format', + }); + + const legacyTokensActive = createGauge({ + name: 'legacy_api_tokens_active', + help: 'Number of API tokens with v1 format, last seen within 3 months', + }); + + const exceedsLimitErrorCounter = createCounter({ + name: 'exceeds_limit_error', + help: 'The number of exceeds limit errors registered by this instance.', + labelNames: ['resource', 'limit'], + }); + + const requestOriginCounter = createCounter({ + name: 'request_origin_counter', + help: 'Number of authenticated requests, including origin information.', + labelNames: ['type', 'method', 'source'], + }); + + const resourceLimit = createGauge({ + name: 'resource_limit', + help: 'The maximum number of resources allowed.', + labelNames: ['resource'], + }); + + const addonEventsHandledCounter = createCounter({ + name: 'addon_events_handled', + help: 'Events handled by addons and the result.', + labelNames: ['result', 'destination'], + }); + + // register event listeners + eventBus.on( + events.EXCEEDS_LIMIT, + ({ resource, limit }: { resource: string; limit: number }) => { + exceedsLimitErrorCounter.increment({ resource, limit }); + }, + ); + + eventBus.on( + events.STAGE_ENTERED, + (entered: { stage: string; feature: string }) => { + if (flagResolver.isEnabled('trackLifecycleMetrics')) { + logger.info( + `STAGE_ENTERED listened ${JSON.stringify(entered)}`, + ); + } + featureLifecycleStageEnteredCounter.increment({ + stage: entered.stage, + }); + }, + ); - const legacyTokensActive = createGauge({ - name: 'legacy_api_tokens_active', - help: 'Number of API tokens with v1 format, last seen within 3 months', - }); + eventBus.on( + events.REQUEST_TIME, + ({ path, method, time, statusCode, appName }) => { + requestDuration + .labels({ + path, + method, + status: statusCode, + appName, + }) + .observe(time); + }, + ); + + eventBus.on(events.SCHEDULER_JOB_TIME, ({ jobId, time }) => { + schedulerDuration.labels(jobId).observe(time); + }); + + eventBus.on(events.FUNCTION_TIME, ({ functionName, className, time }) => { + functionDuration + .labels({ + functionName, + className, + }) + .observe(time); + }); + + eventBus.on(events.EVENTS_CREATED_BY_PROCESSED, ({ updated }) => { + eventCreatedByMigration.inc(updated); + }); + + eventBus.on(events.FEATURES_CREATED_BY_PROCESSED, ({ updated }) => { + featureCreatedByMigration.inc(updated); + }); + + eventBus.on(events.DB_TIME, ({ store, action, time }) => { + dbDuration + .labels({ + store, + action, + }) + .observe(time); + }); + + eventBus.on(events.PROXY_REPOSITORY_CREATED, () => { + proxyRepositoriesCreated.inc(); + }); + + eventBus.on(events.FRONTEND_API_REPOSITORY_CREATED, () => { + frontendApiRepositoriesCreated.inc(); + }); + + eventBus.on(events.PROXY_FEATURES_FOR_TOKEN_TIME, ({ duration }) => { + mapFeaturesForClientDuration.observe(duration); + }); + + events.onMetricEvent( + eventBus, + events.REQUEST_ORIGIN, + ({ type, method, source }) => { + if (flagResolver.isEnabled('originMiddleware')) { + requestOriginCounter.increment({ type, method, source }); + } + }, + ); + + eventStore.on(FEATURE_CREATED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'n/a', + environmentType: 'n/a', + action: 'created', + }); + }); + eventStore.on(FEATURE_VARIANTS_UPDATED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'n/a', + environmentType: 'n/a', + action: 'updated', + }); + }); + eventStore.on(FEATURE_METADATA_UPDATED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'n/a', + environmentType: 'n/a', + action: 'updated', + }); + }); + eventStore.on(FEATURE_UPDATED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'default', + environmentType: 'production', + action: 'updated', + }); + }); + eventStore.on( + FEATURE_STRATEGY_ADD, + async ({ featureName, project, environment }) => { + const environmentType = await resolveEnvironmentType( + environment, + cachedEnvironments, + ); + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment, + environmentType, + action: 'updated', + }); + }, + ); + eventStore.on( + FEATURE_STRATEGY_REMOVE, + async ({ featureName, project, environment }) => { + const environmentType = await resolveEnvironmentType( + environment, + cachedEnvironments, + ); + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment, + environmentType, + action: 'updated', + }); + }, + ); + eventStore.on( + FEATURE_STRATEGY_UPDATE, + async ({ featureName, project, environment }) => { + const environmentType = await resolveEnvironmentType( + environment, + cachedEnvironments, + ); + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment, + environmentType, + action: 'updated', + }); + }, + ); + eventStore.on( + FEATURE_ENVIRONMENT_DISABLED, + async ({ featureName, project, environment }) => { + const environmentType = await resolveEnvironmentType( + environment, + cachedEnvironments, + ); + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment, + environmentType, + action: 'updated', + }); + }, + ); + eventStore.on( + FEATURE_ENVIRONMENT_ENABLED, + async ({ featureName, project, environment }) => { + const environmentType = await resolveEnvironmentType( + environment, + cachedEnvironments, + ); + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment, + environmentType, + action: 'updated', + }); + }, + ); + eventStore.on(FEATURE_ARCHIVED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'n/a', + environmentType: 'n/a', + action: 'archived', + }); + }); + eventStore.on(FEATURE_REVIVED, ({ featureName, project }) => { + featureFlagUpdateTotal.increment({ + toggle: featureName, + project, + environment: 'n/a', + environmentType: 'n/a', + action: 'revived', + }); + }); + eventStore.on(PROJECT_CREATED, () => { + projectActionsCounter.increment({ action: PROJECT_CREATED }); + }); + eventStore.on(PROJECT_ARCHIVED, () => { + projectActionsCounter.increment({ action: PROJECT_ARCHIVED }); + }); + eventStore.on(PROJECT_REVIVED, () => { + projectActionsCounter.increment({ action: PROJECT_REVIVED }); + }); + eventStore.on(PROJECT_DELETED, () => { + projectActionsCounter.increment({ action: PROJECT_DELETED }); + }); + + const logger = config.getLogger('metrics.ts'); + eventBus.on(CLIENT_METRICS, (metrics: IClientMetricsEnv[]) => { + try { + for (const metric of metrics) { + featureFlagUsageTotal.increment( + { + toggle: metric.featureName, + active: 'true', + appName: metric.appName, + }, + metric.yes, + ); + featureFlagUsageTotal.increment( + { + toggle: metric.featureName, + active: 'false', + appName: metric.appName, + }, + metric.no, + ); + } + } catch (e) { + logger.warn('Metrics registration failed', e); + } + }); - const exceedsLimitErrorCounter = createCounter({ - name: 'exceeds_limit_error', - help: 'The number of exceeds limit errors registered by this instance.', - labelNames: ['resource', 'limit'], - }); + eventStore.on(CLIENT_REGISTER, (heartbeatEvent: ISdkHeartbeat) => { + if (!heartbeatEvent.sdkName || !heartbeatEvent.sdkVersion) { + return; + } - const requestOriginCounter = createCounter({ - name: 'request_origin_counter', - help: 'Number of authenticated requests, including origin information.', - labelNames: ['type', 'method', 'source'], - }); + if (flagResolver.isEnabled('extendedMetrics')) { + clientSdkVersionUsage.increment({ + sdk_name: heartbeatEvent.sdkName, + sdk_version: heartbeatEvent.sdkVersion, + platform_name: + heartbeatEvent.metadata?.platformName ?? 'not-set', + platform_version: + heartbeatEvent.metadata?.platformVersion ?? 'not-set', + yggdrasil_version: + heartbeatEvent.metadata?.yggdrasilVersion ?? 'not-set', + spec_version: heartbeatEvent.metadata?.specVersion ?? 'not-set', + }); + } else { + clientSdkVersionUsage.increment({ + sdk_name: heartbeatEvent.sdkName, + sdk_version: heartbeatEvent.sdkVersion, + platform_name: 'not-set', + platform_version: 'not-set', + yggdrasil_version: 'not-set', + spec_version: 'not-set', + }); + } + }); - const resourceLimit = createGauge({ - name: 'resource_limit', - help: 'The maximum number of resources allowed.', - labelNames: ['resource'], - }); + eventStore.on(PROJECT_ENVIRONMENT_REMOVED, ({ project }) => { + projectEnvironmentsDisabled.increment({ project_id: project }); + }); - const addonEventsHandledCounter = createCounter({ - name: 'addon_events_handled', - help: 'Events handled by addons and the result.', - labelNames: ['result', 'destination'], - }); + eventBus.on(events.ADDON_EVENTS_HANDLED, ({ result, destination }) => { + addonEventsHandledCounter.increment({ result, destination }); + }); - async function collectStaticCounters() { + // return an update function (temporarily) to allow for manual refresh + return { + collectStaticCounters: async () => { try { dbMetrics.refreshDbMetrics(); @@ -691,329 +1005,60 @@ export default class MetricsMonitor { config.rateLimiting.callSignalEndpointMaxPerSecond * 60, ); } catch (e) {} - } - - await schedulerService.schedule( - collectStaticCounters.bind(this), - hoursToMilliseconds(2), - 'collectStaticCounters', - ); - - eventBus.on( - events.EXCEEDS_LIMIT, - ({ resource, limit }: { resource: string; limit: number }) => { - exceedsLimitErrorCounter.increment({ resource, limit }); - }, - ); - - eventBus.on( - events.STAGE_ENTERED, - (entered: { stage: string; feature: string }) => { - if (flagResolver.isEnabled('trackLifecycleMetrics')) { - logger.info( - `STAGE_ENTERED listened ${JSON.stringify(entered)}`, - ); - } - featureLifecycleStageEnteredCounter.increment({ - stage: entered.stage, - }); - }, - ); - - eventBus.on( - events.REQUEST_TIME, - ({ path, method, time, statusCode, appName }) => { - requestDuration - .labels({ - path, - method, - status: statusCode, - appName, - }) - .observe(time); - }, - ); - - eventBus.on(events.SCHEDULER_JOB_TIME, ({ jobId, time }) => { - schedulerDuration.labels(jobId).observe(time); - }); - - eventBus.on( - events.FUNCTION_TIME, - ({ functionName, className, time }) => { - functionDuration - .labels({ - functionName, - className, - }) - .observe(time); - }, - ); - - eventBus.on(events.EVENTS_CREATED_BY_PROCESSED, ({ updated }) => { - eventCreatedByMigration.inc(updated); - }); - - eventBus.on(events.FEATURES_CREATED_BY_PROCESSED, ({ updated }) => { - featureCreatedByMigration.inc(updated); - }); - - eventBus.on(events.DB_TIME, ({ store, action, time }) => { - dbDuration - .labels({ - store, - action, - }) - .observe(time); - }); - - eventBus.on(events.PROXY_REPOSITORY_CREATED, () => { - proxyRepositoriesCreated.inc(); - }); + }, + }; +} +export default class MetricsMonitor { + constructor() {} - eventBus.on(events.FRONTEND_API_REPOSITORY_CREATED, () => { - frontendApiRepositoriesCreated.inc(); - }); + async startMonitoring( + config: IUnleashConfig, + stores: IUnleashStores, + version: string, + eventBus: EventEmitter, + instanceStatsService: InstanceStatsService, + schedulerService: SchedulerService, + db: Knex, + ): Promise { + if (!config.server.serverMetrics) { + return Promise.resolve(); + } - eventBus.on(events.PROXY_FEATURES_FOR_TOKEN_TIME, ({ duration }) => { - mapFeaturesForClientDuration.observe(duration); - }); + collectDefaultMetrics(); - events.onMetricEvent( + const { collectStaticCounters } = registerPrometheusMetrics( + config, + stores, + version, eventBus, - events.REQUEST_ORIGIN, - ({ type, method, source }) => { - if (flagResolver.isEnabled('originMiddleware')) { - requestOriginCounter.increment({ type, method, source }); - } - }, - ); - - eventStore.on(FEATURE_CREATED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'n/a', - environmentType: 'n/a', - action: 'created', - }); - }); - eventStore.on(FEATURE_VARIANTS_UPDATED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'n/a', - environmentType: 'n/a', - action: 'updated', - }); - }); - eventStore.on(FEATURE_METADATA_UPDATED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'n/a', - environmentType: 'n/a', - action: 'updated', - }); - }); - eventStore.on(FEATURE_UPDATED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'default', - environmentType: 'production', - action: 'updated', - }); - }); - eventStore.on( - FEATURE_STRATEGY_ADD, - async ({ featureName, project, environment }) => { - const environmentType = await this.resolveEnvironmentType( - environment, - cachedEnvironments, - ); - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment, - environmentType, - action: 'updated', - }); - }, - ); - eventStore.on( - FEATURE_STRATEGY_REMOVE, - async ({ featureName, project, environment }) => { - const environmentType = await this.resolveEnvironmentType( - environment, - cachedEnvironments, - ); - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment, - environmentType, - action: 'updated', - }); - }, - ); - eventStore.on( - FEATURE_STRATEGY_UPDATE, - async ({ featureName, project, environment }) => { - const environmentType = await this.resolveEnvironmentType( - environment, - cachedEnvironments, - ); - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment, - environmentType, - action: 'updated', - }); - }, - ); - eventStore.on( - FEATURE_ENVIRONMENT_DISABLED, - async ({ featureName, project, environment }) => { - const environmentType = await this.resolveEnvironmentType( - environment, - cachedEnvironments, - ); - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment, - environmentType, - action: 'updated', - }); - }, + instanceStatsService, ); - eventStore.on( - FEATURE_ENVIRONMENT_ENABLED, - async ({ featureName, project, environment }) => { - const environmentType = await this.resolveEnvironmentType( - environment, - cachedEnvironments, - ); - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment, - environmentType, - action: 'updated', - }); - }, - ); - eventStore.on(FEATURE_ARCHIVED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'n/a', - environmentType: 'n/a', - action: 'archived', - }); - }); - eventStore.on(FEATURE_REVIVED, ({ featureName, project }) => { - featureFlagUpdateTotal.increment({ - toggle: featureName, - project, - environment: 'n/a', - environmentType: 'n/a', - action: 'revived', - }); - }); - eventStore.on(PROJECT_CREATED, () => { - projectActionsCounter.increment({ action: PROJECT_CREATED }); - }); - eventStore.on(PROJECT_ARCHIVED, () => { - projectActionsCounter.increment({ action: PROJECT_ARCHIVED }); - }); - eventStore.on(PROJECT_REVIVED, () => { - projectActionsCounter.increment({ action: PROJECT_REVIVED }); - }); - eventStore.on(PROJECT_DELETED, () => { - projectActionsCounter.increment({ action: PROJECT_DELETED }); - }); - - const logger = config.getLogger('metrics.ts'); - eventBus.on(CLIENT_METRICS, (metrics: IClientMetricsEnv[]) => { - try { - for (const metric of metrics) { - featureFlagUsageTotal.increment( - { - toggle: metric.featureName, - active: 'true', - appName: metric.appName, - }, - metric.yes, - ); - featureFlagUsageTotal.increment( - { - toggle: metric.featureName, - active: 'false', - appName: metric.appName, - }, - metric.no, - ); - } - } catch (e) { - logger.warn('Metrics registration failed', e); - } - }); - - eventStore.on(CLIENT_REGISTER, (heartbeatEvent: ISdkHeartbeat) => { - if (!heartbeatEvent.sdkName || !heartbeatEvent.sdkVersion) { - return; - } - - if (flagResolver.isEnabled('extendedMetrics')) { - clientSdkVersionUsage.increment({ - sdk_name: heartbeatEvent.sdkName, - sdk_version: heartbeatEvent.sdkVersion, - platform_name: - heartbeatEvent.metadata?.platformName ?? 'not-set', - platform_version: - heartbeatEvent.metadata?.platformVersion ?? 'not-set', - yggdrasil_version: - heartbeatEvent.metadata?.yggdrasilVersion ?? 'not-set', - spec_version: - heartbeatEvent.metadata?.specVersion ?? 'not-set', - }); - } else { - clientSdkVersionUsage.increment({ - sdk_name: heartbeatEvent.sdkName, - sdk_version: heartbeatEvent.sdkVersion, - platform_name: 'not-set', - platform_version: 'not-set', - yggdrasil_version: 'not-set', - spec_version: 'not-set', - }); - } - }); - - eventStore.on(PROJECT_ENVIRONMENT_REMOVED, ({ project }) => { - projectEnvironmentsDisabled.increment({ project_id: project }); - }); - - eventBus.on(events.ADDON_EVENTS_HANDLED, ({ result, destination }) => { - addonEventsHandledCounter.increment({ result, destination }); - }); - await this.configureDbMetrics( + await this.registerPrometheusDbMetrics( db, eventBus, - schedulerService, stores.settingStore, ); + await schedulerService.schedule( + collectStaticCounters.bind(this), + hoursToMilliseconds(2), + 'collectStaticCounters', + ); + await schedulerService.schedule( + async () => + this.registerPoolMetrics.bind(this, db.client.pool, eventBus), + minutesToMilliseconds(1), + 'registerPoolMetrics', + 0, // no jitter + ); + return Promise.resolve(); } - async configureDbMetrics( + async registerPrometheusDbMetrics( db: Knex, eventBus: EventEmitter, - schedulerService: SchedulerService, settingStore: ISettingStore, ): Promise { if (db?.client) { @@ -1051,17 +1096,6 @@ export default class MetricsMonitor { dbPoolPendingAcquires.set(data.pendingAcquires); }); - await schedulerService.schedule( - async () => - this.registerPoolMetrics.bind( - this, - db.client.pool, - eventBus, - ), - minutesToMilliseconds(1), - 'registerPoolMetrics', - 0, // no jitter - ); const postgresVersion = await settingStore.postgresVersion(); const database_version = createGauge({ name: 'postgres_version', @@ -1084,26 +1118,8 @@ export default class MetricsMonitor { // eslint-disable-next-line no-empty } catch (e) {} } - - async resolveEnvironmentType( - environment: string, - cachedEnvironments: () => Promise, - ): Promise { - const environments = await cachedEnvironments(); - const env = environments.find((e) => e.name === environment); - - if (env) { - return env.type; - } else { - return 'unknown'; - } - } } export function createMetricsMonitor(): MetricsMonitor { return new MetricsMonitor(); } - -module.exports = { - createMetricsMonitor, -}; diff --git a/src/test/e2e/api/admin/instance-admin.e2e.test.ts b/src/test/e2e/api/admin/instance-admin.e2e.test.ts index 9b1cdb34ed1a..1a645195b430 100644 --- a/src/test/e2e/api/admin/instance-admin.e2e.test.ts +++ b/src/test/e2e/api/admin/instance-admin.e2e.test.ts @@ -6,6 +6,7 @@ import { import getLogger from '../../../fixtures/no-logger'; import type { IUnleashStores } from '../../../../lib/types'; import { ApiTokenType } from '../../../../lib/types/models/api-token'; +import { registerPrometheusMetrics } from '../../../../lib/metrics'; let app: IUnleashTest; let db: ITestDb; @@ -26,6 +27,14 @@ beforeAll(async () => { }, db.rawDatabase, ); + + registerPrometheusMetrics( + app.config, + stores, + undefined as unknown as string, + app.config.eventBus, + app.services.instanceStatsService, + ); }); afterAll(async () => { @@ -39,6 +48,8 @@ test('should return instance statistics', async () => { createdByUserId: 9999, }); + await app.services.instanceStatsService.dbMetrics.refreshDbMetrics(); + return app.request .get('/api/admin/instance-admin/statistics') .expect('Content-Type', /json/) From e7de20fc99c287033fd7fb5ea1eaf5ee520a15b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 17 Oct 2024 23:05:02 +0200 Subject: [PATCH 10/13] Instance stats should not rely on prometheus cause it can be disabled --- .../instance-stats-service.test.ts | 10 +- .../instance-stats/instance-stats-service.ts | 127 ++++++---- src/lib/metrics.test.ts | 32 ++- src/lib/metrics.ts | 226 +++++++++--------- .../e2e/api/admin/instance-admin.e2e.test.ts | 6 +- 5 files changed, 232 insertions(+), 169 deletions(-) diff --git a/src/lib/features/instance-stats/instance-stats-service.test.ts b/src/lib/features/instance-stats/instance-stats-service.test.ts index 24fb34a0f9b8..26a5f7009502 100644 --- a/src/lib/features/instance-stats/instance-stats-service.test.ts +++ b/src/lib/features/instance-stats/instance-stats-service.test.ts @@ -10,8 +10,10 @@ import type { IClientInstanceStore } from '../../types'; let instanceStatsService: InstanceStatsService; let versionService: VersionService; let clientInstanceStore: IClientInstanceStore; - +let updateMetrics: () => Promise; beforeEach(() => { + jest.clearAllMocks(); + register.clear(); const config = createTestConfig(); @@ -31,13 +33,14 @@ beforeEach(() => { createFakeGetProductionChanges(), ); - registerPrometheusMetrics( + const { collectDbMetrics } = registerPrometheusMetrics( config, stores, undefined as unknown as string, config.eventBus, instanceStatsService, ); + updateMetrics = collectDbMetrics; jest.spyOn(clientInstanceStore, 'getDistinctApplicationsCount'); jest.spyOn(instanceStatsService, 'getStats'); @@ -46,13 +49,12 @@ beforeEach(() => { }); test('get snapshot should not call getStats', async () => { - await instanceStatsService.dbMetrics.refreshDbMetrics(); + await updateMetrics(); expect( clientInstanceStore.getDistinctApplicationsCount, ).toHaveBeenCalledTimes(3); expect(instanceStatsService.getStats).toHaveBeenCalledTimes(0); - // subsequent calls to getStatsSnapshot don't call getStats for (let i = 0; i < 3; i++) { const { clientApps } = await instanceStatsService.getStats(); expect(clientApps).toStrictEqual([ diff --git a/src/lib/features/instance-stats/instance-stats-service.ts b/src/lib/features/instance-stats/instance-stats-service.ts index 1f2e20fa21e2..24e04ef3dfaa 100644 --- a/src/lib/features/instance-stats/instance-stats-service.ts +++ b/src/lib/features/instance-stats/instance-stats-service.ts @@ -29,7 +29,6 @@ import { CUSTOM_ROOT_ROLE_TYPE } from '../../util'; import type { GetActiveUsers } from './getActiveUsers'; import type { ProjectModeCount } from '../project/project-store'; import type { GetProductionChanges } from './getProductionChanges'; -import { DbMetricsMonitor } from '../../metrics-gauge'; export type TimeRange = 'allTime' | '30d' | '7d'; @@ -110,14 +109,12 @@ export class InstanceStatsService { private appCount?: Partial<{ [key in TimeRange]: number }>; - private getActiveUsers: GetActiveUsers; + getActiveUsers: GetActiveUsers; - private getProductionChanges: GetProductionChanges; + getProductionChanges: GetProductionChanges; private featureStrategiesReadModel: IFeatureStrategiesReadModel; - dbMetrics: DbMetricsMonitor; - constructor( { featureToggleStore, @@ -181,20 +178,18 @@ export class InstanceStatsService { this.clientMetricsStore = clientMetricsStoreV2; this.flagResolver = flagResolver; this.featureStrategiesReadModel = featureStrategiesReadModel; - this.dbMetrics = new DbMetricsMonitor({ getLogger }); - } - - async fromPrometheus( - name: string, - labels?: Record, - ): Promise { - return (await this.dbMetrics.findValue(name, labels)) ?? 0; } getProjectModeCount(): Promise { return this.projectStore.getProjectModeCounts(); } + getToggleCount(): Promise { + return this.featureToggleStore.count({ + archived: false, + }); + } + getArchivedToggleCount(): Promise { return this.featureToggleStore.count({ archived: true, @@ -217,9 +212,6 @@ export class InstanceStatsService { return settings?.enabled || false; } - /** - * use getStatsSnapshot for low latency, sacrificing data-freshness - */ async getStats(): Promise { const versionInfo = await this.versionService.getVersionInfo(); const [ @@ -249,24 +241,24 @@ export class InstanceStatsService { maxConstraintValues, maxConstraints, ] = await Promise.all([ - this.fromPrometheus('feature_toggles_total'), + this.getToggleCount(), this.getArchivedToggleCount(), - this.userStore.count(), - this.userStore.countServiceAccounts(), - this.apiTokenStore.countByType(), + this.getRegisteredUsers(), + this.countServiceAccounts(), + this.countApiTokensByType(), this.getActiveUsers(), this.getProjectModeCount(), - this.contextFieldStore.count(), - this.groupStore.count(), - this.roleStore.count(), - this.roleStore.filteredCount({ type: CUSTOM_ROOT_ROLE_TYPE }), - this.roleStore.filteredCountInUse({ type: CUSTOM_ROOT_ROLE_TYPE }), - this.environmentStore.count(), - this.segmentStore.count(), - this.strategyStore.count(), + this.contextFieldCount(), + this.groupCount(), + this.roleCount(), + this.customRolesCount(), + this.customRolesCountInUse(), + this.environmentCount(), + this.segmentCount(), + this.strategiesCount(), this.hasSAML(), this.hasOIDC(), - this.clientAppCounts(), + this.appCount ? this.appCount : this.getLabeledAppCounts(), this.eventStore.deprecatedFilteredCount({ type: FEATURES_EXPORTED, }), @@ -274,7 +266,7 @@ export class InstanceStatsService { type: FEATURES_IMPORTED, }), this.getProductionChanges(), - this.clientMetricsStore.countPreviousDayHourlyMetricsBuckets(), + this.countPreviousDayHourlyMetricsBuckets(), this.featureStrategiesReadModel.getMaxFeatureEnvironmentStrategies(), this.featureStrategiesReadModel.getMaxConstraintValues(), this.featureStrategiesReadModel.getMaxConstraintsPerStrategy(), @@ -315,17 +307,72 @@ export class InstanceStatsService { maxConstraints: maxConstraints?.count ?? 0, }; } - async clientAppCounts(): Promise> { + + groupCount(): Promise { + return this.groupStore.count(); + } + + roleCount(): Promise { + return this.roleStore.count(); + } + + customRolesCount(): Promise { + return this.roleStore.filteredCount({ type: CUSTOM_ROOT_ROLE_TYPE }); + } + + customRolesCountInUse(): Promise { + return this.roleStore.filteredCountInUse({ + type: CUSTOM_ROOT_ROLE_TYPE, + }); + } + + segmentCount(): Promise { + return this.segmentStore.count(); + } + + contextFieldCount(): Promise { + return this.contextFieldStore.count(); + } + + strategiesCount(): Promise { + return this.strategyStore.count(); + } + + environmentCount(): Promise { + return this.environmentStore.count(); + } + + countPreviousDayHourlyMetricsBuckets(): Promise<{ + enabledCount: number; + variantCount: number; + }> { + return this.clientMetricsStore.countPreviousDayHourlyMetricsBuckets(); + } + + countApiTokensByType(): Promise> { + return this.apiTokenStore.countByType(); + } + + getRegisteredUsers(): Promise { + return this.userStore.count(); + } + + countServiceAccounts(): Promise { + return this.userStore.countServiceAccounts(); + } + + async getLabeledAppCounts(): Promise< + Partial<{ [key in TimeRange]: number }> + > { + const [t7d, t30d, allTime] = await Promise.all([ + this.clientInstanceStore.getDistinctApplicationsCount(7), + this.clientInstanceStore.getDistinctApplicationsCount(30), + this.clientInstanceStore.getDistinctApplicationsCount(), + ]); this.appCount = { - '7d': await this.fromPrometheus('client_apps_total', { - range: '7d', - }), - '30d': await this.fromPrometheus('client_apps_total', { - range: '30d', - }), - allTime: await this.fromPrometheus('client_apps_total', { - range: 'allTime', - }), + '7d': t7d, + '30d': t30d, + allTime, }; return this.appCount; } diff --git a/src/lib/metrics.test.ts b/src/lib/metrics.test.ts index afff08c245c8..8cfe78aa5d0a 100644 --- a/src/lib/metrics.test.ts +++ b/src/lib/metrics.test.ts @@ -15,7 +15,11 @@ import { FEATURE_UPDATED, PROJECT_ENVIRONMENT_REMOVED, } from './types/events'; -import { createMetricsMonitor } from './metrics'; +import { + createMetricsMonitor, + registerPrometheusMetrics, + registerPrometheusPostgresMetrics, +} from './metrics'; import createStores from '../test/fixtures/store'; import { InstanceStatsService } from './features/instance-stats/instance-stats-service'; import VersionService from './services/version-service'; @@ -46,6 +50,7 @@ let schedulerService: SchedulerService; let featureLifeCycleStore: IFeatureLifecycleStore; let featureLifeCycleReadModel: IFeatureLifecycleReadModel; let db: ITestDb; +let refreshDbMetrics: () => Promise; beforeAll(async () => { const config = createTestConfig({ @@ -102,16 +107,16 @@ beforeAll(async () => { }, }; - await monitor.startMonitoring( - config, - stores, - '4.0.0', - eventBus, - statsService, - schedulerService, - // @ts-ignore - We don't want a full knex implementation for our tests, it's enough that it actually yields the numbers we want. - metricsDbConf, - ); + const { collectDbMetrics, collectStaticCounters } = + registerPrometheusMetrics( + config, + stores, + '4.0.0', + eventBus, + statsService, + ); + refreshDbMetrics = collectDbMetrics; + await collectStaticCounters(); }); afterAll(async () => { @@ -212,7 +217,7 @@ test('should collect metrics for function timings', async () => { }); test('should collect metrics for feature flag size', async () => { - await statsService.dbMetrics.refreshDbMetrics(); + await refreshDbMetrics(); const metrics = await prometheusRegister.metrics(); expect(metrics).toMatch(/feature_toggles_total\{version="(.*)"\} 0/); }); @@ -223,12 +228,13 @@ test('should collect metrics for archived feature flag size', async () => { }); test('should collect metrics for total client apps', async () => { - await statsService.dbMetrics.refreshDbMetrics(); + await refreshDbMetrics(); const metrics = await prometheusRegister.metrics(); expect(metrics).toMatch(/client_apps_total\{range="(.*)"\} 0/); }); test('Should collect metrics for database', async () => { + registerPrometheusPostgresMetrics(db.rawDatabase, eventBus, '15.0.0'); const metrics = await prometheusRegister.metrics(); expect(metrics).toMatch(/db_pool_max/); expect(metrics).toMatch(/db_pool_min/); diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 05ba0e98859f..1b69a462ac2e 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -25,7 +25,7 @@ import { PROJECT_DELETED, } from './types/events'; import type { IUnleashConfig } from './types/option'; -import type { ISettingStore, IUnleashStores } from './types/stores'; +import type { IUnleashStores } from './types/stores'; import { hoursToMilliseconds, minutesToMilliseconds } from 'date-fns'; import type { InstanceStatsService } from './features/instance-stats/instance-stats-service'; import type { IEnvironment, ISdkHeartbeat } from './types'; @@ -37,6 +37,56 @@ import { } from './util/metrics'; import type { SchedulerService } from './services'; import type { IClientMetricsEnv } from './features/metrics/client-metrics/client-metrics-store-v2-type'; +import { DbMetricsMonitor } from './metrics-gauge'; + +export function registerPrometheusPostgresMetrics( + db: Knex, + eventBus: EventEmitter, + postgresVersion: string, +) { + if (db?.client) { + const dbPoolMin = createGauge({ + name: 'db_pool_min', + help: 'Minimum DB pool size', + }); + dbPoolMin.set(db.client.pool.min); + const dbPoolMax = createGauge({ + name: 'db_pool_max', + help: 'Maximum DB pool size', + }); + dbPoolMax.set(db.client.pool.max); + const dbPoolFree = createGauge({ + name: 'db_pool_free', + help: 'Current free connections in DB pool', + }); + const dbPoolUsed = createGauge({ + name: 'db_pool_used', + help: 'Current connections in use in DB pool', + }); + const dbPoolPendingCreates = createGauge({ + name: 'db_pool_pending_creates', + help: 'how many asynchronous create calls are running in DB pool', + }); + const dbPoolPendingAcquires = createGauge({ + name: 'db_pool_pending_acquires', + help: 'how many acquires are waiting for a resource to be released in DB pool', + }); + + eventBus.on(DB_POOL_UPDATE, (data) => { + dbPoolFree.set(data.free); + dbPoolUsed.set(data.used); + dbPoolPendingCreates.set(data.pendingCreates); + dbPoolPendingAcquires.set(data.pendingAcquires); + }); + + const database_version = createGauge({ + name: 'postgres_version', + help: 'Which version of postgres is running (SHOW server_version)', + labelNames: ['version'], + }); + database_version.labels({ version: postgresVersion }).set(1); + } +} export function registerPrometheusMetrics( config: IUnleashConfig, @@ -60,8 +110,8 @@ export function registerPrometheusMetrics( }; const { eventStore, environmentStore } = stores; - const { flagResolver } = config; - const dbMetrics = instanceStatsService.dbMetrics; + const { flagResolver, db } = config; + const dbMetrics = new DbMetricsMonitor(config); const cachedEnvironments: () => Promise = memoizee( async () => environmentStore.getAll(), @@ -124,10 +174,7 @@ export function registerPrometheusMetrics( name: 'feature_toggles_total', help: 'Number of feature flags', labelNames: ['version'], - query: () => - stores.featureToggleStore.count({ - archived: false, - }), + query: () => instanceStatsService.getToggleCount(), map: (value) => ({ value, labels: { version } }), }); @@ -268,18 +315,7 @@ export function registerPrometheusMetrics( name: 'client_apps_total', help: 'Number of registered client apps aggregated by range by last seen', labelNames: ['range'], - query: async () => { - const [t7d, t30d, allTime] = await Promise.all([ - stores.clientInstanceStore.getDistinctApplicationsCount(7), - stores.clientInstanceStore.getDistinctApplicationsCount(30), - stores.clientInstanceStore.getDistinctApplicationsCount(), - ]); - return { - '7d': t7d, - '30d': t30d, - allTime, - }; - }, + query: () => instanceStatsService.getLabeledAppCounts(), map: (result) => Object.entries(result).map(([range, count]) => ({ value: count, @@ -735,13 +771,10 @@ export function registerPrometheusMetrics( addonEventsHandledCounter.increment({ result, destination }); }); - // return an update function (temporarily) to allow for manual refresh return { + collectDbMetrics: dbMetrics.refreshDbMetrics, collectStaticCounters: async () => { try { - dbMetrics.refreshDbMetrics(); - - const stats = await instanceStatsService.getStats(); const [ maxConstraintValuesResult, maxConstraintsPerStrategyResult, @@ -773,13 +806,17 @@ export function registerPrometheusMetrics( ]); featureTogglesArchivedTotal.reset(); - featureTogglesArchivedTotal.set(stats.archivedFeatureToggles); + featureTogglesArchivedTotal.set( + await instanceStatsService.getArchivedToggleCount(), + ); usersTotal.reset(); - usersTotal.set(stats.users); + usersTotal.set(await instanceStatsService.getRegisteredUsers()); serviceAccounts.reset(); - serviceAccounts.set(stats.serviceAccounts); + serviceAccounts.set( + await instanceStatsService.countServiceAccounts(), + ); stageDurationByProject.forEach((stage) => { featureLifecycleStageDuration @@ -802,7 +839,10 @@ export function registerPrometheusMetrics( apiTokens.reset(); - for (const [type, value] of stats.apiTokens) { + for (const [ + type, + value, + ] of await instanceStatsService.countApiTokensByType()) { apiTokens.labels({ type }).set(value); } @@ -887,67 +927,84 @@ export function registerPrometheusMetrics( resourceLimit.labels({ resource }).set(limit); } + const previousDayMetricsBucketsCount = + await instanceStatsService.countPreviousDayHourlyMetricsBuckets(); enabledMetricsBucketsPreviousDay.reset(); enabledMetricsBucketsPreviousDay.set( - stats.previousDayMetricsBucketsCount.enabledCount, + previousDayMetricsBucketsCount.enabledCount, ); variantMetricsBucketsPreviousDay.reset(); variantMetricsBucketsPreviousDay.set( - stats.previousDayMetricsBucketsCount.variantCount, + previousDayMetricsBucketsCount.variantCount, ); + const activeUsers = await instanceStatsService.getActiveUsers(); usersActive7days.reset(); - usersActive7days.set(stats.activeUsers.last7); + usersActive7days.set(activeUsers.last7); usersActive30days.reset(); - usersActive30days.set(stats.activeUsers.last30); + usersActive30days.set(activeUsers.last30); usersActive60days.reset(); - usersActive60days.set(stats.activeUsers.last60); + usersActive60days.set(activeUsers.last60); usersActive90days.reset(); - usersActive90days.set(stats.activeUsers.last90); + usersActive90days.set(activeUsers.last90); + const productionChanges = + await instanceStatsService.getProductionChanges(); productionChanges30.reset(); - productionChanges30.set(stats.productionChanges.last30); + productionChanges30.set(productionChanges.last30); productionChanges60.reset(); - productionChanges60.set(stats.productionChanges.last60); + productionChanges60.set(productionChanges.last60); productionChanges90.reset(); - productionChanges90.set(stats.productionChanges.last90); + productionChanges90.set(productionChanges.last90); + const projects = + await instanceStatsService.getProjectModeCount(); projectsTotal.reset(); - stats.projects.forEach((projectStat) => { + projects.forEach((projectStat) => { projectsTotal .labels({ mode: projectStat.mode }) .set(projectStat.count); }); environmentsTotal.reset(); - environmentsTotal.set(stats.environments); + environmentsTotal.set( + await instanceStatsService.environmentCount(), + ); groupsTotal.reset(); - groupsTotal.set(stats.groups); + groupsTotal.set(await instanceStatsService.groupCount()); rolesTotal.reset(); - rolesTotal.set(stats.roles); + rolesTotal.set(await instanceStatsService.roleCount()); customRootRolesTotal.reset(); - customRootRolesTotal.set(stats.customRootRoles); + customRootRolesTotal.set( + await instanceStatsService.customRolesCount(), + ); customRootRolesInUseTotal.reset(); - customRootRolesInUseTotal.set(stats.customRootRolesInUse); + customRootRolesInUseTotal.set( + await instanceStatsService.customRolesCountInUse(), + ); segmentsTotal.reset(); - segmentsTotal.set(stats.segments); + segmentsTotal.set(await instanceStatsService.segmentCount()); contextTotal.reset(); - contextTotal.set(stats.contextFields); + contextTotal.set( + await instanceStatsService.contextFieldCount(), + ); strategiesTotal.reset(); - strategiesTotal.set(stats.strategies); + strategiesTotal.set( + await instanceStatsService.strategiesCount(), + ); samlEnabled.reset(); - samlEnabled.set(stats.SAMLenabled ? 1 : 0); + samlEnabled.set((await instanceStatsService.hasSAML()) ? 1 : 0); oidcEnabled.reset(); - oidcEnabled.set(stats.OIDCenabled ? 1 : 0); + oidcEnabled.set((await instanceStatsService.hasOIDC()) ? 1 : 0); rateLimits.reset(); rateLimits @@ -1026,22 +1083,21 @@ export default class MetricsMonitor { collectDefaultMetrics(); - const { collectStaticCounters } = registerPrometheusMetrics( - config, - stores, - version, - eventBus, - instanceStatsService, - ); + const { collectStaticCounters, collectDbMetrics } = + registerPrometheusMetrics( + config, + stores, + version, + eventBus, + instanceStatsService, + ); - await this.registerPrometheusDbMetrics( - db, - eventBus, - stores.settingStore, - ); + const postgresVersion = await stores.settingStore.postgresVersion(); + registerPrometheusPostgresMetrics(db, eventBus, postgresVersion); await schedulerService.schedule( - collectStaticCounters.bind(this), + async () => + Promise.all([collectStaticCounters(), collectDbMetrics()]), hoursToMilliseconds(2), 'collectStaticCounters', ); @@ -1056,56 +1112,6 @@ export default class MetricsMonitor { return Promise.resolve(); } - async registerPrometheusDbMetrics( - db: Knex, - eventBus: EventEmitter, - settingStore: ISettingStore, - ): Promise { - if (db?.client) { - const dbPoolMin = createGauge({ - name: 'db_pool_min', - help: 'Minimum DB pool size', - }); - dbPoolMin.set(db.client.pool.min); - const dbPoolMax = createGauge({ - name: 'db_pool_max', - help: 'Maximum DB pool size', - }); - dbPoolMax.set(db.client.pool.max); - const dbPoolFree = createGauge({ - name: 'db_pool_free', - help: 'Current free connections in DB pool', - }); - const dbPoolUsed = createGauge({ - name: 'db_pool_used', - help: 'Current connections in use in DB pool', - }); - const dbPoolPendingCreates = createGauge({ - name: 'db_pool_pending_creates', - help: 'how many asynchronous create calls are running in DB pool', - }); - const dbPoolPendingAcquires = createGauge({ - name: 'db_pool_pending_acquires', - help: 'how many acquires are waiting for a resource to be released in DB pool', - }); - - eventBus.on(DB_POOL_UPDATE, (data) => { - dbPoolFree.set(data.free); - dbPoolUsed.set(data.used); - dbPoolPendingCreates.set(data.pendingCreates); - dbPoolPendingAcquires.set(data.pendingAcquires); - }); - - const postgresVersion = await settingStore.postgresVersion(); - const database_version = createGauge({ - name: 'postgres_version', - help: 'Which version of postgres is running (SHOW server_version)', - labelNames: ['version'], - }); - database_version.labels({ version: postgresVersion }).set(1); - } - } - // eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types registerPoolMetrics(pool: any, eventBus: EventEmitter) { try { diff --git a/src/test/e2e/api/admin/instance-admin.e2e.test.ts b/src/test/e2e/api/admin/instance-admin.e2e.test.ts index 1a645195b430..a3e5c08c7386 100644 --- a/src/test/e2e/api/admin/instance-admin.e2e.test.ts +++ b/src/test/e2e/api/admin/instance-admin.e2e.test.ts @@ -11,6 +11,7 @@ import { registerPrometheusMetrics } from '../../../../lib/metrics'; let app: IUnleashTest; let db: ITestDb; let stores: IUnleashStores; +let refreshDbMetrics: () => Promise; beforeAll(async () => { db = await dbInit('instance_admin_api_serial', getLogger); @@ -28,13 +29,14 @@ beforeAll(async () => { db.rawDatabase, ); - registerPrometheusMetrics( + const { collectDbMetrics } = registerPrometheusMetrics( app.config, stores, undefined as unknown as string, app.config.eventBus, app.services.instanceStatsService, ); + refreshDbMetrics = collectDbMetrics; }); afterAll(async () => { @@ -48,7 +50,7 @@ test('should return instance statistics', async () => { createdByUserId: 9999, }); - await app.services.instanceStatsService.dbMetrics.refreshDbMetrics(); + await refreshDbMetrics(); return app.request .get('/api/admin/instance-admin/statistics') From e97f74891f81e27433662bb978e0435450b4e204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 17 Oct 2024 23:18:13 +0200 Subject: [PATCH 11/13] Replace removed method --- src/lib/features/scheduler/schedule-services.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/features/scheduler/schedule-services.ts b/src/lib/features/scheduler/schedule-services.ts index 95e03c114b19..dd00eb0574ce 100644 --- a/src/lib/features/scheduler/schedule-services.ts +++ b/src/lib/features/scheduler/schedule-services.ts @@ -60,7 +60,7 @@ export const scheduleServices = async ( ); schedulerService.schedule( - instanceStatsService.refreshAppCountSnapshot.bind(instanceStatsService), + instanceStatsService.getLabeledAppCounts.bind(instanceStatsService), minutesToMilliseconds(5), 'refreshAppCountSnapshot', ); From add3beda05b62d7c11ea86ac8b8c036ee2d9c1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 17 Oct 2024 23:22:49 +0200 Subject: [PATCH 12/13] Add comment --- src/lib/features/scheduler/schedule-services.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/features/scheduler/schedule-services.ts b/src/lib/features/scheduler/schedule-services.ts index dd00eb0574ce..69d3a218fc1e 100644 --- a/src/lib/features/scheduler/schedule-services.ts +++ b/src/lib/features/scheduler/schedule-services.ts @@ -59,6 +59,10 @@ export const scheduleServices = async ( 'updateLastSeen', ); + // TODO this works fine for keeping labeledAppCounts up to date, but + // it would be nice if we can keep client_apps_total prometheus metric + // up to date. We'd need to have access to DbMetricsMonitor, which is + // where the metric is registered and call an update only for that metric schedulerService.schedule( instanceStatsService.getLabeledAppCounts.bind(instanceStatsService), minutesToMilliseconds(5), From 1a977fe347dad779bb94e7b683c1a69b2dfca992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gast=C3=B3n=20Fournier?= Date: Thu, 17 Oct 2024 23:38:50 +0200 Subject: [PATCH 13/13] Move rate limits outside the refresh function as they're static --- src/lib/metrics.ts | 105 +++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 56 deletions(-) diff --git a/src/lib/metrics.ts b/src/lib/metrics.ts index 1b69a462ac2e..4251dd3aa477 100644 --- a/src/lib/metrics.ts +++ b/src/lib/metrics.ts @@ -367,6 +367,55 @@ export function registerPrometheusMetrics( help: 'Rate limits (per minute) for METHOD/ENDPOINT pairs', labelNames: ['endpoint', 'method'], }); + rateLimits + .labels({ + endpoint: '/api/client/metrics', + method: 'POST', + }) + .set(config.metricsRateLimiting.clientMetricsMaxPerMinute); + rateLimits + .labels({ + endpoint: '/api/client/register', + method: 'POST', + }) + .set(config.metricsRateLimiting.clientRegisterMaxPerMinute); + rateLimits + .labels({ + endpoint: '/api/frontend/metrics', + method: 'POST', + }) + .set(config.metricsRateLimiting.frontendMetricsMaxPerMinute); + rateLimits + .labels({ + endpoint: '/api/frontend/register', + method: 'POST', + }) + .set(config.metricsRateLimiting.frontendRegisterMaxPerMinute); + rateLimits + .labels({ + endpoint: '/api/admin/user-admin', + method: 'POST', + }) + .set(config.rateLimiting.createUserMaxPerMinute); + rateLimits + .labels({ + endpoint: '/auth/simple', + method: 'POST', + }) + .set(config.rateLimiting.simpleLoginMaxPerMinute); + rateLimits + .labels({ + endpoint: '/auth/reset/password-email', + method: 'POST', + }) + .set(config.rateLimiting.passwordResetMaxPerMinute); + rateLimits + .labels({ + endpoint: '/api/signal-endpoint/:name', + method: 'POST', + }) + .set(config.rateLimiting.callSignalEndpointMaxPerSecond * 60); + const featureCreatedByMigration = createCounter({ name: 'feature_created_by_migration_count', help: 'Feature createdBy migration count', @@ -1005,62 +1054,6 @@ export function registerPrometheusMetrics( oidcEnabled.reset(); oidcEnabled.set((await instanceStatsService.hasOIDC()) ? 1 : 0); - - rateLimits.reset(); - rateLimits - .labels({ - endpoint: '/api/client/metrics', - method: 'POST', - }) - .set(config.metricsRateLimiting.clientMetricsMaxPerMinute); - rateLimits - .labels({ - endpoint: '/api/client/register', - method: 'POST', - }) - .set(config.metricsRateLimiting.clientRegisterMaxPerMinute); - rateLimits - .labels({ - endpoint: '/api/frontend/metrics', - method: 'POST', - }) - .set( - config.metricsRateLimiting.frontendMetricsMaxPerMinute, - ); - rateLimits - .labels({ - endpoint: '/api/frontend/register', - method: 'POST', - }) - .set( - config.metricsRateLimiting.frontendRegisterMaxPerMinute, - ); - rateLimits - .labels({ - endpoint: '/api/admin/user-admin', - method: 'POST', - }) - .set(config.rateLimiting.createUserMaxPerMinute); - rateLimits - .labels({ - endpoint: '/auth/simple', - method: 'POST', - }) - .set(config.rateLimiting.simpleLoginMaxPerMinute); - rateLimits - .labels({ - endpoint: '/auth/reset/password-email', - method: 'POST', - }) - .set(config.rateLimiting.passwordResetMaxPerMinute); - rateLimits - .labels({ - endpoint: '/api/signal-endpoint/:name', - method: 'POST', - }) - .set( - config.rateLimiting.callSignalEndpointMaxPerSecond * 60, - ); } catch (e) {} }, };