Skip to content

Commit

Permalink
koord-manager: add metrics for webhook (#2330)
Browse files Browse the repository at this point in the history
Signed-off-by: nce3xin <[email protected]>
Co-authored-by: nce3xin <[email protected]>
  • Loading branch information
nce3xin and nce3xin authored Feb 10, 2025
1 parent 37bd292 commit cc55177
Show file tree
Hide file tree
Showing 15 changed files with 319 additions and 0 deletions.
7 changes: 7 additions & 0 deletions pkg/webhook/cm/validating/validating_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"net/http"
"time"

admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
Expand All @@ -31,6 +32,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/util"
"github.com/koordinator-sh/koordinator/pkg/webhook/cm/plugins"
"github.com/koordinator-sh/koordinator/pkg/webhook/cm/plugins/sloconfig"
"github.com/koordinator-sh/koordinator/pkg/webhook/metrics"
)

// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch
Expand Down Expand Up @@ -100,9 +102,14 @@ func (h *ConfigMapValidatingHandler) Handle(ctx context.Context, req admission.R
pls := h.getPlugins()

for _, plugin := range pls {
start := time.Now()
if err = plugin.Validate(ctx, req, obj, oldObj); err != nil {
metrics.RecordWebhookDurationMilliseconds(metrics.ValidatingWebhook,
metrics.ConfigMap, string(req.Operation), err, plugin.Name(), time.Since(start).Seconds())
return admission.Errored(http.StatusBadRequest, err)
}
metrics.RecordWebhookDurationMilliseconds(metrics.ValidatingWebhook,
metrics.ConfigMap, string(req.Operation), nil, plugin.Name(), time.Since(start).Seconds())
}

return admission.ValidationResponse(true, "")
Expand Down
7 changes: 7 additions & 0 deletions pkg/webhook/elasticquota/mutating/mutating_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"encoding/json"
"net/http"
"reflect"
"time"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
Expand All @@ -31,6 +32,7 @@ import (
"github.com/koordinator-sh/koordinator/apis/thirdparty/scheduler-plugins/pkg/apis/scheduling/v1alpha1"

"github.com/koordinator-sh/koordinator/pkg/webhook/elasticquota"
"github.com/koordinator-sh/koordinator/pkg/webhook/metrics"
)

// ElasticQuotaMutatingHandler handles ElasticQuota
Expand Down Expand Up @@ -67,10 +69,15 @@ func (h *ElasticQuotaMutatingHandler) Handle(ctx context.Context, request admiss
klog.V(5).Infof("Webhook start mutating quota %s", obj.Name)

plugin := elasticquota.NewPlugin(h.Decoder, h.Client)
start := time.Now()
if err := plugin.AdmitQuota(ctx, request, copied); err != nil {
klog.Errorf("Failed to mutating Quota %s/%s by quotaTopology, err: %v", obj.Namespace, obj.Name, err)
metrics.RecordWebhookDurationMilliseconds(metrics.MutatingWebhook,
metrics.ElasticQuota, string(request.Operation), err, plugin.Name(), time.Since(start).Seconds())
return admission.Errored(http.StatusBadRequest, err)
}
metrics.RecordWebhookDurationMilliseconds(metrics.MutatingWebhook,
metrics.ElasticQuota, string(request.Operation), nil, plugin.Name(), time.Since(start).Seconds())

if reflect.DeepEqual(obj, copied) {
return admission.Allowed("")
Expand Down
4 changes: 4 additions & 0 deletions pkg/webhook/elasticquota/plugin_check_quota_meta_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ var (
}
)

func (c *QuotaMetaChecker) Name() string {
return "QuotaMetaChecker"
}

func NewPlugin(decoder *admission.Decoder, client client.Client) *QuotaMetaChecker {
quotaMetaCheck.Client = client
quotaMetaCheck.Decoder = decoder
Expand Down
2 changes: 2 additions & 0 deletions pkg/webhook/elasticquota/quota_topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

"github.com/koordinator-sh/koordinator/apis/extension"
utilclient "github.com/koordinator-sh/koordinator/pkg/util/client"
"github.com/koordinator-sh/koordinator/pkg/webhook/metrics"
)

type quotaTopology struct {
Expand Down Expand Up @@ -234,6 +235,7 @@ func (qt *quotaTopology) fillQuotaDefaultInformation(quota *v1alpha1.ElasticQuot
}
if sharedWeight, exist := quota.Annotations[extension.AnnotationSharedWeight]; !exist || len(sharedWeight) == 0 {
quota.Annotations[extension.AnnotationSharedWeight] = string(maxQuota)
metrics.RecordQuotaSharedWeight(quota.Name, quota.Spec.Max)
klog.V(5).Infof("fill quota %v sharedWeight as max", quota.Name)
}

Expand Down
7 changes: 7 additions & 0 deletions pkg/webhook/elasticquota/validating/validating_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"net/http"
"time"

v1 "k8s.io/api/admission/v1"
"k8s.io/klog/v2"
Expand All @@ -31,6 +32,7 @@ import (

"github.com/koordinator-sh/koordinator/pkg/util"
"github.com/koordinator-sh/koordinator/pkg/webhook/elasticquota"
"github.com/koordinator-sh/koordinator/pkg/webhook/metrics"
)

// +kubebuilder:rbac:groups=scheduling.sigs.k8s.io,resources=elasticquotas,verbs=get;list;watch
Expand Down Expand Up @@ -81,9 +83,14 @@ func (h *ElasticQuotaValidatingHandler) Handle(ctx context.Context, request admi
}()

plugin := elasticquota.NewPlugin(h.Decoder, h.Client)
start := time.Now()
if err = plugin.ValidateQuota(ctx, request, obj); err != nil {
metrics.RecordWebhookDurationMilliseconds(metrics.ValidatingWebhook,
metrics.ElasticQuota, string(request.Operation), err, plugin.Name(), time.Since(start).Seconds())
return admission.Errored(http.StatusBadRequest, err)
}
metrics.RecordWebhookDurationMilliseconds(metrics.ValidatingWebhook,
metrics.ElasticQuota, string(request.Operation), nil, plugin.Name(), time.Since(start).Seconds())

return admission.ValidationResponse(true, "")
}
Expand Down
42 changes: 42 additions & 0 deletions pkg/webhook/metrics/elasticquota.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"github.com/prometheus/client_golang/prometheus"
v1 "k8s.io/api/core/v1"
)

var (
quotaSharedWeight = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: KoordManagerWebhookSubsystem,
Name: "quota_shared_weight",
Help: "The shared weight of the quota",
},
[]string{ElasticQuotaNameKey, ResourceNameKey},
)
ElasticQuotaCollector = []prometheus.Collector{
quotaSharedWeight,
}
)

func RecordQuotaSharedWeight(quotaName string, max v1.ResourceList) {
for k, v := range max {
quotaSharedWeight.WithLabelValues(quotaName, string(k)).Set(float64(v.Value()))
}
}
34 changes: 34 additions & 0 deletions pkg/webhook/metrics/elasticquota_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"testing"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)

func TestElasticQuotaCollectors(t *testing.T) {
testingMaximum := corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
}
t.Run("test not panic", func(t *testing.T) {
RecordQuotaSharedWeight("test-quota", testingMaximum)
})
}
26 changes: 26 additions & 0 deletions pkg/webhook/metrics/internal_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
kmmetrics "github.com/koordinator-sh/koordinator/pkg/util/metrics/koordmanager"
)

func init() {
kmmetrics.InternalMustRegister(WebhookDurationCollectors...)
kmmetrics.InternalMustRegister(ElasticQuotaCollector...)
}
36 changes: 36 additions & 0 deletions pkg/webhook/metrics/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

const (
KoordManagerWebhookSubsystem = "koord_manager_webhook"
ElasticQuotaNameKey = "elasticquota_name"
ResourceNameKey = "resource_name"
OperationKey = "operation"
PluginNameKey = "plugin_name"
StatusKey = "status"
StatusAllowed = "allowed"
StatusRejected = "rejected"
ObjectTypeKey = "object_type"
WebhookTypeKey = "webhook_type"
MutatingWebhook = "mutate"
ValidatingWebhook = "validate"
ConfigMap = "configmap"
ElasticQuota = "elasticquota"
Node = "node"
Pod = "pod"
)
50 changes: 50 additions & 0 deletions pkg/webhook/metrics/webhook_duration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"github.com/prometheus/client_golang/prometheus"
)

var (
WebhookDurationMilliseconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: KoordManagerWebhookSubsystem,
Name: "webhook_duration_milliseconds",
Help: "webhook_duration_milliseconds",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
[]string{WebhookTypeKey, ObjectTypeKey, OperationKey, PluginNameKey, StatusKey},
)
WebhookDurationCollectors = []prometheus.Collector{
WebhookDurationMilliseconds,
}
)

func RecordWebhookDurationMilliseconds(webhookType, objectType, operation string, err error, pluginName string, seconds float64) {
labels := prometheus.Labels{}
labels[WebhookTypeKey] = webhookType
labels[ObjectTypeKey] = objectType
labels[OperationKey] = operation
labels[PluginNameKey] = pluginName
labels[StatusKey] = StatusAllowed
// TODO Add detailed error codes for ACS integration to better identify specific issues
if err != nil {
labels[StatusKey] = StatusRejected
}
WebhookDurationMilliseconds.With(labels).Observe(seconds * 1000)
}
27 changes: 27 additions & 0 deletions pkg/webhook/metrics/webhook_duration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"testing"
)

func TestWebhookDurationCollectors(t *testing.T) {
t.Run("test not panic", func(t *testing.T) {
RecordWebhookDurationMilliseconds(MutatingWebhook, Pod, "CREATE", nil, "test-plugin", 0.1)
})
}
7 changes: 7 additions & 0 deletions pkg/webhook/node/mutating/mutating_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ import (
"encoding/json"
"net/http"
"reflect"
"time"

admissionv1 "k8s.io/api/admission/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"github.com/koordinator-sh/koordinator/pkg/webhook/metrics"
"github.com/koordinator-sh/koordinator/pkg/webhook/node/plugins"
"github.com/koordinator-sh/koordinator/pkg/webhook/node/plugins/resourceamplification"
)
Expand Down Expand Up @@ -119,9 +121,14 @@ func (h *NodeMutatingHandler) Handle(ctx context.Context, req admission.Request)
clone := obj.DeepCopy()

for _, plugin := range nodeMutatingPlugins {
start := time.Now()
if err := plugin.Admit(ctx, req, obj, oldObj); err != nil {
metrics.RecordWebhookDurationMilliseconds(metrics.MutatingWebhook,
metrics.Node, string(req.Operation), err, plugin.Name(), time.Since(start).Seconds())
return admission.Errored(http.StatusInternalServerError, err)
}
metrics.RecordWebhookDurationMilliseconds(metrics.MutatingWebhook,
metrics.Node, string(req.Operation), nil, plugin.Name(), time.Since(start).Seconds())
}

if reflect.DeepEqual(obj, clone) {
Expand Down
Loading

0 comments on commit cc55177

Please sign in to comment.