Skip to content

Commit

Permalink
koord-manager: improve batch resource calculation (#1751)
Browse files Browse the repository at this point in the history
Signed-off-by: saintube <[email protected]>
  • Loading branch information
saintube authored Nov 28, 2023
1 parent 661dd71 commit e410b79
Show file tree
Hide file tree
Showing 11 changed files with 732 additions and 140 deletions.
4 changes: 4 additions & 0 deletions apis/slo/v1alpha1/nodemetric_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ type PodMetricInfo struct {
Name string `json:"name,omitempty"`
Namespace string `json:"namespace,omitempty"`
PodUsage ResourceMap `json:"podUsage,omitempty"`
// Priority class of the application
Priority apiext.PriorityClass `json:"priority,omitempty"`
// QoS class of the application
QoS apiext.QoSClass `json:"qos,omitempty"`
// Third party extensions for PodMetric
Extensions *ExtensionsMap `json:"extensions,omitempty"`
}
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/slo.koordinator.sh_nodemetrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,12 @@ spec:
pairs.
type: object
type: object
priority:
description: Priority class of the application
type: string
qos:
description: QoS class of the application
type: string
type: object
type: array
prodReclaimableMetric:
Expand Down
2 changes: 1 addition & 1 deletion pkg/koordlet/prediction/peak_predictor.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (p *podReclaimablePredictor) AddPod(pod *v1.Pod) error {
}

if p.pods[string(pod.UID)] {
return fmt.Errorf("Pod %s already exist in the pod reclaimable predictor", util.GetPodKey(pod))
return fmt.Errorf("pod %s already exist in the pod reclaimable predictor", util.GetPodKey(pod))
}
p.pods[string(pod.UID)] = true

Expand Down
118 changes: 118 additions & 0 deletions pkg/koordlet/qosmanager/helpers/calculator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package helpers

import (
corev1 "k8s.io/api/core/v1"
quotav1 "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/klog/v2"

apiext "github.com/koordinator-sh/koordinator/apis/extension"
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
"github.com/koordinator-sh/koordinator/pkg/util"
)

// CalculateFilterPodsUsed calculates the sum used of filtered pods and hostApps.
// It returns the sum used of the filtered pods, the sum used of the filtered hostApps and the system used.
// If hostApps not passed, the hostApps part will be counted into the system used.
//
// e.g. To calculate non-BE used,
// - filterPodsUsed := sum(podUsed[i] if f_pod(pod[i]))
// - filterHostAppUsed := sum(hostAppUsed[i] if f_hostApp(hostApp[i]))
// - systemdUsed := max(nodeReserved, nodeUsed - sum(podUsed[i]) - sum(hostAppUsed[i]))
func CalculateFilterPodsUsed(nodeUsed float64, nodeReserved float64,
podMetas []*statesinformer.PodMeta, podUsedMap map[string]float64,
hostApps []slov1alpha1.HostApplicationSpec, hostAppMetrics map[string]float64,
podFilterFn func(*corev1.Pod) bool,
hostAppFilterFn func(*slov1alpha1.HostApplicationSpec) bool) (float64, float64, float64) {
var podsAllUsed, podsFilterUsed, hostAppsAllUsed, hostAppsFilterUsed float64

podMetaMap := map[string]*statesinformer.PodMeta{}
for _, podMeta := range podMetas {
podMetaMap[string(podMeta.Pod.UID)] = podMeta
}

for podUID, podUsed := range podUsedMap {
podsAllUsed += podUsed

podMeta, ok := podMetaMap[podUID]
if !ok { // NOTE: consider podMeta-missing pods as filtered
klog.V(4).Infof("pod metric not included in the podMetas, uid %v", podUID)
podsFilterUsed += podUsed
} else if podFilterFn(podMeta.Pod) {
podsFilterUsed += podUsed
}
}

for _, hostApp := range hostApps {
hostAppUsed, exist := hostAppMetrics[hostApp.Name]
if !exist {
klog.V(4).Infof("host app metric not included in the hostAppMetrics, name %v", hostApp.Name)
continue
}

hostAppsAllUsed += hostAppUsed
if hostAppFilterFn(&hostApp) {
hostAppsFilterUsed += hostAppUsed
}
}

// systemUsed means the remain used excluding the filtered pods and hostApps
systemUsed := nodeUsed - podsAllUsed - hostAppsAllUsed
if systemUsed < 0 { // set systemUsed always no less than 0
systemUsed = 0
}
// systemUsed = max(nodeUsed - podsAllUsed - hostAppsAllUsed, nodeAnnoReserved, nodeKubeletReserved)
if systemUsed < nodeReserved {
systemUsed = nodeReserved
}

return podsFilterUsed, hostAppsFilterUsed, systemUsed
}

func NotBatchOrFreePodFilter(pod *corev1.Pod) bool {
priority := apiext.GetPodPriorityClassWithDefault(pod)
return priority != apiext.PriorityBatch && priority != apiext.PriorityFree
}

func NonBEPodFilter(pod *corev1.Pod) bool {
return apiext.GetPodQoSClassRaw(pod) != apiext.QoSBE && util.GetKubeQosClass(pod) != corev1.PodQOSBestEffort
}

func NonBEHostAppFilter(hostAppSpec *slov1alpha1.HostApplicationSpec) bool {
// host app qos must be BE and also run under best-effort dir
return hostAppSpec.QoS != apiext.QoSBE || hostAppSpec.CgroupPath == nil || hostAppSpec.CgroupPath.Base != slov1alpha1.CgroupBaseTypeKubeBesteffort
}

func NonePodHighPriority(_ *corev1.Pod) bool {
return false
}

func NoneHostAppHighPriority(_ *slov1alpha1.HostApplicationSpec) bool {
return false
}

func GetNodeResourceReserved(node *corev1.Node) corev1.ResourceList {
// nodeReserved := max(nodeKubeletReserved, nodeAnnoReserved)
nodeReserved := util.GetNodeReservationFromKubelet(node)
if node.Annotations != nil {
nodeAnnoReserved := util.GetNodeReservationFromAnnotation(node.Annotations)
nodeReserved = quotav1.Max(nodeReserved, nodeAnnoReserved)
}
return nodeReserved
}
188 changes: 188 additions & 0 deletions pkg/koordlet/qosmanager/helpers/calculator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
Copyright 2022 The Koordinator Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package helpers

import (
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/koordinator-sh/koordinator/apis/extension"
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
)

func TestCalculateFilterPodsUsed(t *testing.T) {
type args struct {
nodeUsed float64
nodeReserved float64
podMetas []*statesinformer.PodMeta
podUsedMap map[string]float64
hostApps []slov1alpha1.HostApplicationSpec
hostAppMetrics map[string]float64
podFilterFn func(*corev1.Pod) bool
hostAppFilterFn func(*slov1alpha1.HostApplicationSpec) bool
}
tests := []struct {
name string
args args
want float64
want1 float64
want2 float64
}{
{
name: "no pod to calculate",
args: args{
nodeUsed: 10.0,
nodeReserved: 0,
podMetas: nil,
podUsedMap: nil,
hostApps: nil,
hostAppMetrics: nil,
podFilterFn: NonePodHighPriority,
hostAppFilterFn: NoneHostAppHighPriority,
},
want: 0,
want1: 0,
want2: 10.0,
},
{
name: "calculate for non-BE",
args: args{
nodeUsed: 80.0,
nodeReserved: 8.0,
podMetas: []*statesinformer.PodMeta{
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "ls-pod",
UID: "xxxxxx",
Labels: map[string]string{
extension.LabelPodQoS: string(extension.QoSLS),
},
},
Status: corev1.PodStatus{
QOSClass: corev1.PodQOSBurstable,
},
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "be-pod",
UID: "yyyyyy",
Labels: map[string]string{
extension.LabelPodQoS: string(extension.QoSBE),
},
},
Status: corev1.PodStatus{
QOSClass: corev1.PodQOSBestEffort,
},
},
},
},
podUsedMap: map[string]float64{
"xxxxxx": 20.0,
"yyyyyy": 32.0,
},
hostApps: []slov1alpha1.HostApplicationSpec{
{
Name: "ls-app",
Priority: extension.PriorityProd,
QoS: extension.QoSLS,
},
{
Name: "be-app",
Priority: extension.PriorityBatch,
QoS: extension.QoSBE,
CgroupPath: &slov1alpha1.CgroupPath{
Base: slov1alpha1.CgroupBaseTypeKubeBesteffort,
},
},
},
hostAppMetrics: map[string]float64{
"ls-app": 4.0,
"be-app": 8.0,
},
podFilterFn: NonBEPodFilter,
hostAppFilterFn: NonBEHostAppFilter,
},
want: 20.0,
want1: 4.0,
want2: 16.0,
},
{
name: "calculate for non-Batch",
args: args{
nodeUsed: 80.0,
nodeReserved: 8.0,
podMetas: []*statesinformer.PodMeta{
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "ls-pod",
UID: "xxxxxx",
Labels: map[string]string{
extension.LabelPodQoS: string(extension.QoSLS),
},
},
Status: corev1.PodStatus{
QOSClass: corev1.PodQOSBurstable,
},
},
},
{
Pod: &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "be-pod",
UID: "yyyyyy",
Labels: map[string]string{
extension.LabelPodQoS: string(extension.QoSBE),
},
},
Status: corev1.PodStatus{
QOSClass: corev1.PodQOSBestEffort,
},
},
},
},
podUsedMap: map[string]float64{
"xxxxxx": 20.0,
"yyyyyy": 32.0,
},
hostApps: nil,
hostAppMetrics: nil,
podFilterFn: NotBatchOrFreePodFilter,
hostAppFilterFn: NoneHostAppHighPriority,
},
want: 20.0,
want1: 0,
want2: 28.0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, got1, got2 := CalculateFilterPodsUsed(tt.args.nodeUsed, tt.args.nodeReserved, tt.args.podMetas,
tt.args.podUsedMap, tt.args.hostApps, tt.args.hostAppMetrics, tt.args.podFilterFn, tt.args.hostAppFilterFn)
assert.Equal(t, tt.want, got)
assert.Equal(t, tt.want1, got1)
assert.Equal(t, tt.want2, got2)
})
}
}
Loading

0 comments on commit e410b79

Please sign in to comment.