Skip to content

Commit

Permalink
added service and servicemonitor to collect MCE operator metrics (#550)
Browse files Browse the repository at this point in the history
Signed-off-by: Disaiah Bennett <[email protected]>
  • Loading branch information
dislbenn authored Nov 27, 2023
1 parent 526ed59 commit bdb6ac1
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 27 deletions.
20 changes: 10 additions & 10 deletions api/v1/multiclusterengine_methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ var MCEComponents = []string{

var LegacyPrometheusKind = []string{"PrometheusRule", "ServiceMonitor"}

// MCEPrometheusRules is a map that associates certain component names with their corresponding prometheus rules.
var MCEPrometheusRules = map[string]string{
// MCELegacyPrometheusRules is a map that associates certain component names with their corresponding prometheus rules.
var MCELegacyPrometheusRules = map[string]string{
ConsoleMCE: "acm-console-prometheus-rules",
// Add other components here when PrometheusRules is required.
}

// MCEServiceMonitors is a map that associates certain component names with their corresponding service monitors.
var MCEServiceMonitors = map[string]string{
// MCELegacyServiceMonitors is a map that associates certain component names with their corresponding service monitors.
var MCELegacyServiceMonitors = map[string]string{
ClusterLifecycle: "clusterlifecycle-state-metrics-v2",
ConsoleMCE: "console-mce-monitor",
// Add other components here when ServiceMonitors is required.
Expand Down Expand Up @@ -217,18 +217,18 @@ func GetLegacyPrometheusKind() []string {
return LegacyPrometheusKind
}

// GetPrometheusRulesName returns the name of the PrometheusRules based on the provided component name.
func GetPrometheusRulesName(component string) (string, error) {
if val, ok := MCEPrometheusRules[component]; !ok {
// GetLegacyPrometheusRulesName returns the name of the PrometheusRules based on the provided component name.
func GetLegacyPrometheusRulesName(component string) (string, error) {
if val, ok := MCELegacyPrometheusRules[component]; !ok {
return val, fmt.Errorf("failed to find PrometheusRules name for: %s component", component)
} else {
return val, nil
}
}

// GetServiceMonitorName returns the name of the ServiceMonitors based on the provided component name.
func GetServiceMonitorName(component string) (string, error) {
if val, ok := MCEServiceMonitors[component]; !ok {
// GetLegacyServiceMonitorName returns the name of the ServiceMonitors based on the provided component name.
func GetLegacyServiceMonitorName(component string) (string, error) {
if val, ok := MCELegacyServiceMonitors[component]; !ok {
return val, fmt.Errorf("failed to find ServiceMonitors name for: %s component", component)
} else {
return val, nil
Expand Down
24 changes: 12 additions & 12 deletions api/v1/multiclusterengine_methods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func TestGetLegacyPrometheusKind(t *testing.T) {
}
}

func TestGetPrometheusRulesName(t *testing.T) {
func TestGetLegacyPrometheusRulesName(t *testing.T) {
tests := []struct {
name string
component string
Expand All @@ -118,30 +118,30 @@ func TestGetPrometheusRulesName(t *testing.T) {
{
name: "console PrometheusRule",
component: api.ConsoleMCE,
want: api.MCEPrometheusRules[api.ConsoleMCE],
want: api.MCELegacyPrometheusRules[api.ConsoleMCE],
},
{
name: "unknown PrometheusRule",
component: "unknown",
want: api.MCEPrometheusRules["unknown"],
want: api.MCELegacyPrometheusRules["unknown"],
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := api.GetPrometheusRulesName(tt.component)
got, err := api.GetLegacyPrometheusRulesName(tt.component)
if err != nil && tt.component != "unknown" {
t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
}

if got != tt.want {
t.Errorf("GetPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want)
t.Errorf("GetLegacyPrometheusRulesName(%v) = %v, want: %v", tt.component, got, tt.want)
}
})
}
}

func TestGetServiceMonitorName(t *testing.T) {
func TestGetLegacyServiceMonitorName(t *testing.T) {
tests := []struct {
name string
component string
Expand All @@ -150,24 +150,24 @@ func TestGetServiceMonitorName(t *testing.T) {
{
name: "console ServiceMonitor",
component: api.ConsoleMCE,
want: api.MCEServiceMonitors[api.ConsoleMCE],
want: api.MCELegacyServiceMonitors[api.ConsoleMCE],
},
{
name: "unknown ServiceMonitor",
component: "unknown",
want: api.MCEServiceMonitors["unknown"],
want: api.MCELegacyServiceMonitors["unknown"],
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := api.GetServiceMonitorName(tt.component)
got, err := api.GetLegacyServiceMonitorName(tt.component)
if err != nil && tt.component != "unknown" {
t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, err.Error(), tt.want)
}

if got != tt.want {
t.Errorf("GetServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want)
t.Errorf("GetLegacyServiceMonitorName(%v) = %v, want: %v", tt.component, got, tt.want)
}
})
}
Expand Down
155 changes: 152 additions & 3 deletions controllers/backplaneconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/util/workqueue"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand Down Expand Up @@ -78,6 +79,8 @@ const (

trustBundleNameEnvVar = "TRUSTED_CA_BUNDLE"
defaultTrustBundleName = "trusted-ca-bundle"

controlPlane = "backplane-operator"
)

//+kubebuilder:rbac:groups=multicluster.openshift.io,resources=multiclusterengines,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -234,7 +237,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
MultiClusterEngine to avoid conflicts with the openshift-* namespace when deploying PrometheusRules and
ServiceMonitors in ACM and MCE.
*/
result, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig)
_, err = r.ensureOpenShiftNamespaceLabel(ctx, backplaneConfig)
if err != nil {
log.Error(err, "Failed to add to %s label to namespace: %s", utils.OpenShiftClusterMonitoringLabel,
backplaneConfig.Spec.TargetNamespace)
Expand Down Expand Up @@ -307,7 +310,7 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
}

for _, kind := range backplanev1.GetLegacyPrometheusKind() {
err = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind)
_ = r.removeLegacyPrometheusConfigurations(ctx, "openshift-monitoring", kind)
}

result, err = r.ensureToggleableComponents(ctx, backplaneConfig)
Expand All @@ -320,6 +323,16 @@ func (r *MultiClusterEngineReconciler) Reconcile(ctx context.Context, req ctrl.R
return result, err
}

result, err = r.createMetricsService(ctx, backplaneConfig)
if err != nil {
return result, err
}

result, err = r.createMetricsServiceMonitor(ctx, backplaneConfig)
if err != nil {
return result, err
}

result, err = r.ensureRemovalsGone(backplaneConfig)
if err != nil {
return result, err
Expand Down Expand Up @@ -440,7 +453,8 @@ func (r *MultiClusterEngineReconciler) SetupWithManager(mgr ctrl.Manager) error

// createTrustBundleConfigmap creates a configmap that will be injected with the
// trusted CA bundle for use with the OCP cluster wide proxy
func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context, mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

// Get Trusted Bundle configmap name
Expand Down Expand Up @@ -494,6 +508,141 @@ func (r *MultiClusterEngineReconciler) createTrustBundleConfigmap(ctx context.Co
return ctrl.Result{}, nil
}

func (r *MultiClusterEngineReconciler) createMetricsService(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

const Port = 8080

sName := utils.MCEOperatorMetricsServiceName
sNamespace := mce.Spec.TargetNamespace

namespacedName := types.NamespacedName{
Name: sName,
Namespace: sNamespace,
}

// Check if service exists
if err := r.Client.Get(ctx, namespacedName, &corev1.Service{}); err != nil {
if !apierrors.IsNotFound(err) {
// Unknown error. Requeue
log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s",
sNamespace, sName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

// Create metrics service
s := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: sName,
Namespace: sNamespace,
Labels: map[string]string{
"control-plane": controlPlane,
},
},
Spec: corev1.ServiceSpec{
Ports: []corev1.ServicePort{
{
Name: "metrics",
Port: int32(Port),
Protocol: "TCP",
TargetPort: intstr.FromInt(Port),
},
},
Selector: map[string]string{
"control-plane": controlPlane,
},
},
}

if err = ctrl.SetControllerReference(mce, s, r.Scheme); err != nil {
return ctrl.Result{}, pkgerrors.Wrapf(
err, "error setting controller reference on metrics service: %s", sName,
)
}

if err = r.Client.Create(ctx, s); err != nil {
// Error creating metrics service
log.Error(err, fmt.Sprintf("error creating multicluster-engine metrics service: %s", sName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

log.Info(fmt.Sprintf("Created multicluster-engine metrics service: %s", sName))
}

return ctrl.Result{}, nil
}

func (r *MultiClusterEngineReconciler) createMetricsServiceMonitor(ctx context.Context,
mce *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)

smName := utils.MCEOperatorMetricsServiceMonitorName
smNamespace := mce.Spec.TargetNamespace

namespacedName := types.NamespacedName{
Name: smName,
Namespace: smNamespace,
}

// Check if service exists
if err := r.Client.Get(ctx, namespacedName, &monitorv1.ServiceMonitor{}); err != nil {
if !apierrors.IsNotFound(err) {
// Unknown error. Requeue
log.Error(err, fmt.Sprintf("error while getting multicluster-engine metrics service: %s/%s",
smNamespace, smName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

// Create metrics service
sm := &monitorv1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: smName,
Namespace: smNamespace,
Labels: map[string]string{
"control-plane": controlPlane,
},
},
Spec: monitorv1.ServiceMonitorSpec{
Endpoints: []monitorv1.Endpoint{
{
BearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
BearerTokenSecret: corev1.SecretKeySelector{
Key: "",
},
Port: "metrics",
},
},
NamespaceSelector: monitorv1.NamespaceSelector{
MatchNames: []string{
mce.Spec.TargetNamespace,
},
},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"control-plane": controlPlane,
},
},
},
}

if err = ctrl.SetControllerReference(mce, sm, r.Scheme); err != nil {
return ctrl.Result{}, pkgerrors.Wrapf(
err, "error setting controller reference on multicluster-engine metrics servicemonitor: %s", smName)
}

if err = r.Client.Create(ctx, sm); err != nil {
// Error creating metrics servicemonitor
log.Error(err, fmt.Sprintf("error creating metrics servicemonitor: %s", smName))
return ctrl.Result{RequeueAfter: requeuePeriod}, err
}

log.Info(fmt.Sprintf("Created multicluster-engine metrics servicemonitor: %s", smName))
}

return ctrl.Result{}, nil
}

// DeployAlwaysSubcomponents ensures all subcomponents exist
func (r *MultiClusterEngineReconciler) DeployAlwaysSubcomponents(ctx context.Context, backplaneConfig *backplanev1.MultiClusterEngine) (ctrl.Result, error) {
log := log.FromContext(ctx)
Expand Down
4 changes: 2 additions & 2 deletions controllers/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,9 @@ func (r *MultiClusterEngineReconciler) removeLegacyPrometheusConfigurations(ctx
for _, c := range backplanev1.MCEComponents {
res, err := func() (string, error) {
if configType == "PrometheusRule" {
return backplanev1.GetPrometheusRulesName(c)
return backplanev1.GetLegacyPrometheusRulesName(c)
}
return backplanev1.GetServiceMonitorName(c)
return backplanev1.GetLegacyServiceMonitorName(c)
}()

if err != nil {
Expand Down
14 changes: 14 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ const (
OpenShiftClusterMonitoringLabel = "openshift.io/cluster-monitoring"
)

const (
/*
MCEOperatorMetricsServiceName is the name of the service used to expose the metrics
endpoint for the multicluster-engine-operator.
*/
MCEOperatorMetricsServiceName = "multicluster-engine-operator-metrics"

/*
MCEOperatorMetricsServiceMonitorName is the name of the service monitor used to expose
the metrics for the multicluster-engine-operator.
*/
MCEOperatorMetricsServiceMonitorName = "multicluster-engine-operator-metrics"
)

var onComponents = []string{
backplanev1.AssistedService,
backplanev1.ClusterLifecycle,
Expand Down

0 comments on commit bdb6ac1

Please sign in to comment.