Skip to content

Commit

Permalink
feat(metrics): add ElapsedTimeSinceLastBackup and RPODuration gauges …
Browse files Browse the repository at this point in the history
…for schedules
  • Loading branch information
ulya-sidorina committed Jan 22, 2025
1 parent 6252d73 commit 2fd817b
Showing 1 changed file with 42 additions and 4 deletions.
46 changes: 42 additions & 4 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,12 @@ type MetricsRegistryImpl struct {
backupsSucceededCount *prometheus.GaugeVec

// schedule metrics
scheduleActionFailedCount *prometheus.CounterVec
scheduleActionSucceededCount *prometheus.CounterVec
scheduleLastBackupTimestamp *prometheus.GaugeVec
scheduleRPOMarginRatio *prometheus.GaugeVec
scheduleActionFailedCount *prometheus.CounterVec
scheduleActionSucceededCount *prometheus.CounterVec
scheduleLastBackupTimestamp *prometheus.GaugeVec
scheduleRPOMarginRatio *prometheus.GaugeVec
scheduleElapsedTimeSinceLastBackup *prometheus.GaugeVec
scheduleRPODuration *prometheus.GaugeVec
}

func (s *MetricsRegistryImpl) ReportHealthCheck() {
Expand Down Expand Up @@ -241,6 +243,13 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
schedule.ID,
scheduleNameLabel,
).Set(float64(schedule.RecoveryPoint.Unix()))

s.scheduleElapsedTimeSinceLastBackup.WithLabelValues(
schedule.ContainerID,
schedule.DatabaseName,
schedule.ID,
scheduleNameLabel,
).Set(s.clock.Since(*schedule.RecoveryPoint).Seconds())
} else if schedule.Audit != nil && schedule.Audit.CreatedAt != nil {
// Report schedule creation time as last backup time if no backups were made
s.scheduleLastBackupTimestamp.WithLabelValues(
Expand All @@ -249,7 +258,24 @@ func (s *MetricsRegistryImpl) IncScheduleCounters(schedule *types.BackupSchedule
schedule.ID,
scheduleNameLabel,
).Set(float64(schedule.Audit.CreatedAt.AsTime().Unix()))

s.scheduleElapsedTimeSinceLastBackup.WithLabelValues(
schedule.ContainerID,
schedule.DatabaseName,
schedule.ID,
scheduleNameLabel,
).Set(s.clock.Since(schedule.Audit.CreatedAt.AsTime()).Seconds())
}

if schedule.ScheduleSettings.RecoveryPointObjective != nil {
s.scheduleRPODuration.WithLabelValues(
schedule.ContainerID,
schedule.DatabaseName,
schedule.ID,
scheduleNameLabel,
).Set(float64(schedule.ScheduleSettings.RecoveryPointObjective.Seconds))
}

info := schedule.GetBackupInfo(s.clock)
if info != nil {
s.scheduleRPOMarginRatio.WithLabelValues(
Expand Down Expand Up @@ -392,6 +418,18 @@ func newMetricsRegistry(ctx context.Context, wg *sync.WaitGroup, cfg *config.Met
Help: "if RPO is set for schedule, calculates a ratio to which RPO is satisfied",
}, []string{"container_id", "database", "schedule_id", "schedule_name"})

s.scheduleElapsedTimeSinceLastBackup = promauto.With(s.reg).NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "schedules",
Name: "elapsed_seconds_since_last_backup",
Help: "Amount of time elapsed since last successful backup for this schedule",
}, []string{"container_id", "database", "schedule_id", "schedule_name"})

s.scheduleRPODuration = promauto.With(s.reg).NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "schedules",
Name: "rpo_duration_seconds",
Help: "Maximum length of time permitted, that backup can be restored for this schedule",
}, []string{"container_id", "database", "schedule_id", "schedule_name"})

mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(s.reg, promhttp.HandlerOpts{Registry: s.reg}))

Expand Down

0 comments on commit 2fd817b

Please sign in to comment.