Skip to content

Commit

Permalink
Monitor zuul PVs for disk usage, simplify podmonitor
Browse files Browse the repository at this point in the history
For every statefulset used with Zuul, export disk usage for the PV
mounted on /var/lib/zuul (zuul-web is a deployment and doesn't use a
PV).
Remove specific zuul-monitor podmonitor and move zuul pod monitoring
to the generic sf-monitor.

Change-Id: Id94d27d525ade1303314dbc843477355a242de0c
  • Loading branch information
mhuin authored and morucci committed Dec 15, 2023
1 parent 08dc9dd commit 7035471
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 49 deletions.
14 changes: 14 additions & 0 deletions controllers/softwarefactory_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ func (r *SFController) cleanup() {
if r.GetM(GitServerIdent+"-monitor", &currentGSpm) {
r.DeleteR(&currentGSpm)
}
currentZPM := monitoringv1.PodMonitor{}
if r.GetM("zuul-monitor", &currentZPM) {
r.DeleteR(&currentZPM)
}
}

func (r *SFController) Step() sfv1.SoftwareFactoryStatus {
Expand Down Expand Up @@ -201,6 +205,16 @@ func (r *SFController) Step() sfv1.SoftwareFactoryStatus {
}

if services["Zuul"] {
monitoredPorts = append(
monitoredPorts,
sfmonitoring.GetTruncatedPortName("zuul-scheduler", sfmonitoring.NodeExporterPortNameSuffix),
sfmonitoring.GetTruncatedPortName("zuul-executor", sfmonitoring.NodeExporterPortNameSuffix),
sfmonitoring.GetTruncatedPortName("zuul-merger", sfmonitoring.NodeExporterPortNameSuffix),
sfmonitoring.GetTruncatedPortName("zuul-web", sfmonitoring.NodeExporterPortNameSuffix),
ZuulPrometheusPortName,
ZuulStatsdExporterPortName,
)
selectorRunList = append(selectorRunList, "zuul-scheduler", "zuul-executor", "zuul-merger", "zuul-web")
services["Config"] = r.SetupConfigJob()
if services["Config"] {
conds.RefreshCondition(&r.cr.Status.Conditions, "ConfigReady", metav1.ConditionTrue, "Ready", "Config is ready")
Expand Down
93 changes: 44 additions & 49 deletions controllers/zuul.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ const (
zuulExecutorPort = 7900

zuulPrometheusPort = 9090
zuulPrometheusPortName = "zuul-metrics"
ZuulPrometheusPortName = "zuul-metrics"
)

var (
zuulStatsdExporterPortName = monitoring.GetStatsdExporterPort("zuul")
ZuulStatsdExporterPortName = monitoring.GetStatsdExporterPort("zuul")

//go:embed static/zuul/zuul.conf
zuulDotconf string
Expand Down Expand Up @@ -352,8 +352,16 @@ func (r *SFController) EnsureZuulScheduler(cfg *ini.File) bool {
zuulContainers[0].Env = append(zuulContainers[0].Env, extraLoggingEnvVars...)

statsdSidecar := monitoring.MkStatsdExporterSideCarContainer("zuul", "statsd-config", relayAddress)
nodeExporterSidecar := monitoring.MkNodeExporterSideCarContainer(
"zuul-scheduler",
[]apiv1.VolumeMount{
{
Name: "zuul-scheduler",
MountPath: "/var/lib/zuul",
},
})

zuulContainers = append(zuulContainers, statsdSidecar)
zuulContainers = append(zuulContainers, statsdSidecar, nodeExporterSidecar)

var setAdditionalContainers = func(sts *appsv1.StatefulSet) {
sts.Spec.Template.Spec.InitContainers = []apiv1.Container{r.mkInitSchedulerConfigContainer()}
Expand All @@ -375,7 +383,7 @@ func (r *SFController) EnsureZuulScheduler(cfg *ini.File) bool {
zs.Spec.Template.Spec.Containers[0].LivenessProbe = base.MkLiveHTTPProbe("/health/live", zuulPrometheusPort)
zs.Spec.Template.Spec.Containers[0].StartupProbe = base.MkStartupHTTPProbe("/health/ready", zuulPrometheusPort)
zs.Spec.Template.Spec.Containers[0].Ports = []apiv1.ContainerPort{
base.MkContainerPort(zuulPrometheusPort, zuulPrometheusPortName),
base.MkContainerPort(zuulPrometheusPort, ZuulPrometheusPortName),
}

current, changed := r.ensureStatefulset(zs)
Expand Down Expand Up @@ -413,14 +421,29 @@ func (r *SFController) EnsureZuulExecutor(cfg *ini.File) bool {
ze.Spec.Template.Spec.Containers[0].ReadinessProbe = base.MkReadinessHTTPProbe("/health/ready", zuulPrometheusPort)
ze.Spec.Template.Spec.Containers[0].LivenessProbe = base.MkReadinessHTTPProbe("/health/live", zuulPrometheusPort)
ze.Spec.Template.Spec.Containers[0].Ports = []apiv1.ContainerPort{
base.MkContainerPort(zuulPrometheusPort, zuulPrometheusPortName),
base.MkContainerPort(zuulPrometheusPort, ZuulPrometheusPortName),
base.MkContainerPort(zuulExecutorPort, zuulExecutorPortName),
}
// NOTE(dpawlik): Zuul Executor needs to privileged pod, due error in the console log:
// "bwrap: Can't bind mount /oldroot/etc/resolv.conf on /newroot/etc/resolv.conf: Permission denied""
ze.Spec.Template.Spec.Containers[0].SecurityContext = base.MkSecurityContext(true)
ze.Spec.Template.Spec.Containers[0].SecurityContext.RunAsUser = pointer.Int64(1000)

nodeExporterSidecar := monitoring.MkNodeExporterSideCarContainer(
"zuul-executor",
[]apiv1.VolumeMount{
{
Name: "zuul-executor",
MountPath: "/var/lib/zuul",
},
})
ze.Spec.Template.Spec.Containers = append(ze.Spec.Template.Spec.Containers, nodeExporterSidecar)
// FIXME: OpenShift doesn't seem very happy when containers in the same pod don't share
// the same security context; or maybe it is because a volume is shared between the two?
// Anyhow, the simplest fix is to elevate privileges on the node exporter sidecar.
ze.Spec.Template.Spec.Containers[1].SecurityContext = base.MkSecurityContext(true)
ze.Spec.Template.Spec.Containers[1].SecurityContext.RunAsUser = pointer.Int64(1000)

current, changed := r.ensureStatefulset(ze)
if changed {
return false
Expand Down Expand Up @@ -454,12 +477,22 @@ func (r *SFController) EnsureZuulMerger(cfg *ini.File) bool {
extraLoggingEnvVars := logging.SetupLogForwarding(service, r.cr.Spec.FluentBitLogForwarding, zuulFluentBitLabels, annotations)
zm.Spec.Template.Spec.Containers[0].Env = append(zm.Spec.Template.Spec.Containers[0].Env, extraLoggingEnvVars...)

nodeExporterSidecar := monitoring.MkNodeExporterSideCarContainer(
service,
[]apiv1.VolumeMount{
{
Name: service,
MountPath: "/var/lib/zuul",
},
})
zm.Spec.Template.Spec.Containers = append(zm.Spec.Template.Spec.Containers, nodeExporterSidecar)

zm.Spec.Template.ObjectMeta.Annotations = annotations

zm.Spec.Template.Spec.Containers[0].ReadinessProbe = base.MkReadinessHTTPProbe("/health/ready", zuulPrometheusPort)
zm.Spec.Template.Spec.Containers[0].LivenessProbe = base.MkReadinessHTTPProbe("/health/live", zuulPrometheusPort)
zm.Spec.Template.Spec.Containers[0].Ports = []apiv1.ContainerPort{
base.MkContainerPort(zuulPrometheusPort, zuulPrometheusPortName),
base.MkContainerPort(zuulPrometheusPort, ZuulPrometheusPortName),
}

current, changed := r.ensureStatefulset(zm)
Expand Down Expand Up @@ -500,7 +533,7 @@ func (r *SFController) EnsureZuulWeb(cfg *ini.File) bool {
zw.Spec.Template.Spec.Containers[0].LivenessProbe = base.MkLiveHTTPProbe("/api/info", zuulWEBPort)
zw.Spec.Template.Spec.Containers[0].StartupProbe = base.MkStartupHTTPProbe("/api/info", zuulWEBPort)
zw.Spec.Template.Spec.Containers[0].Ports = []apiv1.ContainerPort{
base.MkContainerPort(zuulPrometheusPort, zuulPrometheusPortName),
base.MkContainerPort(zuulPrometheusPort, ZuulPrometheusPortName),
}

current := appsv1.Deployment{}
Expand Down Expand Up @@ -545,43 +578,6 @@ func (r *SFController) EnsureZuulComponents(cfg *ini.File) bool {
return zuulServices["scheduler"] && zuulServices["executor"] && zuulServices["web"] && zuulServices["merger"]
}

func (r *SFController) EnsureZuulPodMonitor() bool {
selector := metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "run",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"zuul-scheduler", "zuul-merger", "zuul-executor", "zuul-web"},
},
{
Key: "app",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"sf"},
},
},
}
desiredZuulPodMonitor := monitoring.MkPodMonitor("zuul-monitor", r.ns, []string{zuulPrometheusPortName, zuulStatsdExporterPortName}, selector)
// add annotations so we can handle lifecycle
annotations := map[string]string{
"version": "2",
}
desiredZuulPodMonitor.ObjectMeta.Annotations = annotations
currentZPM := monitoringv1.PodMonitor{}
if !r.GetM(desiredZuulPodMonitor.Name, &currentZPM) {
r.CreateR(&desiredZuulPodMonitor)
return false
} else {
if !utils.MapEquals(&currentZPM.ObjectMeta.Annotations, &annotations) {
r.log.V(1).Info("Zuul PodMonitor configuration changed, updating...")
currentZPM.Spec = desiredZuulPodMonitor.Spec
currentZPM.ObjectMeta.Annotations = annotations
r.UpdateR(&currentZPM)
return false
}
}
return true
}

// create default alerts
func (r *SFController) ensureZuulPromRule() bool {
/* Alert when a config-update job fails on the config repository */
Expand Down Expand Up @@ -610,7 +606,7 @@ func (r *SFController) ensureZuulPromRule() bool {
notEnoughExecutors := monitoring.MkPrometheusAlertRule(
"NotEnoughExecutors",
intstr.FromString(
"rate(zuul_executors_jobs_queued[1h]) > 0"),
"increase(zuul_executors_jobs_queued[1h]) > 0"),
"1h",
monitoring.WarningSeverityLabel,
notEnoughExecutorsAnnotations,
Expand All @@ -624,7 +620,7 @@ func (r *SFController) ensureZuulPromRule() bool {
notEnoughMergers := monitoring.MkPrometheusAlertRule(
"NotEnoughMergers",
intstr.FromString(
"rate(zuul_mergers_jobs_queued[1h]) > 0"),
"increase(zuul_mergers_jobs_queued[1h]) > 0"),
"1h",
monitoring.WarningSeverityLabel,
notEnoughMergersAnnotations,
Expand All @@ -638,7 +634,7 @@ func (r *SFController) ensureZuulPromRule() bool {
notEnoughNodes := monitoring.MkPrometheusAlertRule(
"NotEnoughTestNodes",
intstr.FromString(
"rate(zuul_nodepool_current_requests[1h]) > 0"),
"increase(zuul_nodepool_current_requests[1h]) > 0"),
"1h",
monitoring.WarningSeverityLabel,
notEnoughNodesAnnotations,
Expand Down Expand Up @@ -1020,8 +1016,7 @@ func (r *SFController) DeployZuul() bool {

r.EnsureZuulConfigSecret(cfgINI)
r.EnsureZuulComponentsFrontServices()
// We could condition readiness to the state of the PodMonitor, but we don't.
r.EnsureZuulPodMonitor()

r.ensureZuulPromRule()

return r.EnsureZuulComponents(cfgINI) && r.setupZuulIngress()
Expand Down

0 comments on commit 7035471

Please sign in to comment.