Skip to content

Commit

Permalink
feat: implement in-place pod request scaling
Browse files Browse the repository at this point in the history
If enabled, we make use of the (still alpha) feature gate
InPlacePodVerticalScaling to change the pod requests to a minimum on
scaling down and increasing them to the original value on restore. This
has been marked as experimental and also needs to be enabled by setting
a flag on the manager and of course the feature flag needs to be
enabled.
  • Loading branch information
ctrox committed Jun 1, 2024
1 parent 9b6091c commit 1439719
Show file tree
Hide file tree
Showing 14 changed files with 483 additions and 17 deletions.
27 changes: 24 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,13 +285,34 @@ the shim otherwise. For example, loading eBPF programs can be quite memory
intensive so they have been moved from the shim to the manager to keep the
shim memory usage as minimal as possible.

In addition to that it collects metrics from all the shim processes and
exposes those metrics on an HTTP endpoint.
These are the responsibilities of the manager:

- Loading eBPF programs that the shim(s) rely on.
- Collect metrics from all shim processes and expose them on HTTP for scraping.
- Subscribes to shim scaling events and adjusts Pod requests.

#### In-place Resource scaling (Experimental)

This makes use of the feature flag
[InPlacePodVerticalScaling](https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/1287-in-place-update-pod-resources)
to automatically update the pod resource requests to a minimum on scale down
events and revert them again on scale up. Once the Kubernetes feature flag is
enabled, it also needs to be enabled using the manager flag
`-in-place-scaling=true`.

#### Flags

```
-metrics-addr=":8080" sets the address of the metrics server
-debug enables debug logging
-in-place-scaling=false enable in-place resource scaling, requires InPlacePodVerticalScaling feature flag
```
## Metrics
The zeropod-node pod exposes metrics on `0.0.0.0:8080/metrics` in Prometheus
format on each installed node. The following metrics are currently available:
format on each installed node. The metrics address can be configured with the
`-metrics-addr` flag. The following metrics are currently available:
```bash
# HELP zeropod_checkpoint_duration_seconds The duration of the last checkpoint in seconds.
Expand Down
21 changes: 19 additions & 2 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,18 @@ import (
)

var (
metricsAddr = flag.String("metrics-addr", ":8080", "address of the metrics server")
metricsAddr = flag.String("metrics-addr", ":8080", "address of the metrics server")
debug = flag.Bool("debug", true, "enable debug logs")
inPlaceScaling = flag.Bool("in-place-scaling", false,
"enable in-place resource scaling, requires InPlacePodVerticalScaling feature flag")
)

func main() {
flag.Parse()

if *debug {
slog.SetLogLoggerLevel(slog.LevelDebug)
}
slog.Info("starting manager", "metrics-addr", *metricsAddr)

ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
Expand All @@ -36,7 +43,17 @@ func main() {
os.Exit(1)
}

if err := manager.StartSubscribers(ctx); err != nil {
subscribers := []manager.StatusHandler{}
if *inPlaceScaling {
podScaler, err := manager.NewPodScaler()
if err != nil {
slog.Error("podScaler init", "err", err)
os.Exit(1)
}
subscribers = append(subscribers, podScaler)
}

if err := manager.StartSubscribers(ctx, subscribers...); err != nil {
slog.Error("starting subscribers", "err", err)
os.Exit(1)
}
Expand Down
3 changes: 3 additions & 0 deletions config/base/node-daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ spec:
- name: manager
image: manager
imagePullPolicy: IfNotPresent
command: ["/zeropod-manager"]
args:
- -metrics-addr=:8080
ports:
- name: metrics
containerPort: 8080
Expand Down
33 changes: 30 additions & 3 deletions config/base/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: runtimeclass-installer
name: zeropod:runtimeclass-installer
rules:
- apiGroups:
- node.k8s.io
Expand All @@ -22,11 +22,38 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: runtimeclass-installer
name: zeropod:runtimeclass-installer
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: runtimeclass-installer
name: zeropod:runtimeclass-installer
subjects:
- kind: ServiceAccount
name: zeropod-node
namespace: zeropod-system
---
# the manager needs to get/update pods for dynamic resource requests
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: zeropod:pod-updater
rules:
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: zeropod:pod-updater
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: zeropod:pod-updater
subjects:
- kind: ServiceAccount
name: zeropod-node
Expand Down
6 changes: 5 additions & 1 deletion config/examples/nginx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: Deployment
metadata:
name: nginx
spec:
replicas: 3
replicas: 1
selector:
matchLabels:
app: nginx
Expand All @@ -21,3 +21,7 @@ spec:
name: nginx
ports:
- containerPort: 80
resources:
requests:
cpu: 100m
memory: 128Mi
7 changes: 7 additions & 0 deletions config/kind/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,10 @@ images:
- name: installer
newName: ghcr.io/ctrox/zeropod-installer
newTag: dev
patches:
- patch: |-
- op: add
path: /spec/template/spec/containers/0/args/-
value: -in-place-scaling=true
target:
kind: DaemonSet
29 changes: 29 additions & 0 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ import (
"testing"
"time"

"github.com/ctrox/zeropod/manager"
"github.com/ctrox/zeropod/zeropod"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/utils/ptr"
)

Expand Down Expand Up @@ -193,6 +195,33 @@ func TestE2E(t *testing.T) {
// exec and should test the deletion in the restored state.
})

t.Run("resources scaling", func(t *testing.T) {
pod := testPod(scaleDownAfter(0), agnContainer("agn", 8080), resources(corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
},
}))

cleanupPod := createPodAndWait(t, ctx, client, pod)
defer cleanupPod()
require.Eventually(t, func() bool {
if err := client.Get(ctx, objectName(pod), pod); err != nil {
return false
}

resourcesScaledDown := false
for _, container := range pod.Status.ContainerStatuses {
t.Logf("allocated resources: %v", container.AllocatedResources)
resourcesScaledDown = container.AllocatedResources != nil &&
container.AllocatedResources[corev1.ResourceCPU] == manager.ScaledDownCPU &&
container.AllocatedResources[corev1.ResourceMemory] == manager.ScaledDownMemory
}

return resourcesScaledDown
}, time.Second*10, time.Second)
})

t.Run("metrics", func(t *testing.T) {
// create two pods to test metric merging
runningPod := testPod(scaleDownAfter(time.Hour))
Expand Down
2 changes: 2 additions & 0 deletions e2e/kind.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
featureGates:
InPlacePodVerticalScaling: true
nodes:
- role: control-plane
extraMounts:
Expand Down
11 changes: 11 additions & 0 deletions e2e/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ func startKind(t testing.TB, name string, port int) (c *rest.Config, err error)
if err := provider.Create(name,
cluster.CreateWithV1Alpha4Config(&v1alpha4.Cluster{
Name: name,
FeatureGates: map[string]bool{
"InPlacePodVerticalScaling": true,
},
Nodes: []v1alpha4.Node{{
Labels: map[string]string{zeropod.NodeLabel: "true"},
// setup port map for our node port
Expand Down Expand Up @@ -349,6 +352,14 @@ func portsAnnotation(portsMap string) podOption {
})
}

func resources(res corev1.ResourceRequirements) podOption {
return func(p *pod) {
for i := range p.Spec.Containers {
p.Spec.Containers[i].Resources = res
}
}
}

const agnHostImage = "registry.k8s.io/e2e-test-images/agnhost:2.39"

func agnContainer(name string, port int) podOption {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ require (
github.com/docker/go-metrics v0.0.1 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.2 // indirect
github.com/evanphx/json-patch v5.6.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.7.0 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-errors/errors v1.4.2 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4=
github.com/evanphx/json-patch/v5 v5.7.0 h1:nJqP7uwL84RJInrohHfW0Fx3awjbm8qZeFv0nW9SYGc=
github.com/evanphx/json-patch/v5 v5.7.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
Expand Down
Loading

0 comments on commit 1439719

Please sign in to comment.