Skip to content

Commit

Permalink
Merge pull request #10 from ctrox/release-v0.2.0
Browse files Browse the repository at this point in the history
Prepare Release v0.2.0
  • Loading branch information
ctrox authored May 9, 2024
2 parents 5004678 + 7cb11f4 commit f2d618f
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 25 deletions.
4 changes: 2 additions & 2 deletions config/production/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ resources:
images:
- name: manager
newName: ghcr.io/ctrox/zeropod-manager
newTag: v0.1.0
newTag: v0.2.0
- name: installer
newName: ghcr.io/ctrox/zeropod-installer
newTag: v0.1.0
newTag: v0.2.0
patches:
- patch: |-
- op: add
Expand Down
25 changes: 11 additions & 14 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,31 +164,28 @@ func TestE2E(t *testing.T) {
cleanupPod := createPodAndWait(t, ctx, client, pod)
defer cleanupPod()

require.Eventually(t, func() bool {
return isCheckpointed(t, client, cfg, pod)
}, time.Second*10, time.Second)

stdout, stderr, err := podExec(cfg, pod, "date")
require.NoError(t, err)
t.Log(stdout, stderr)

// as we can't yet reliably check if the pod is fully checkpointed and
// ready for another exec, we simply retry
require.Eventually(t, func() bool {
stdout, stderr, err = podExec(cfg, pod, "date")
t.Log(stdout, stderr)
return err == nil
}, time.Second*10, time.Second)

assert.GreaterOrEqual(t, restoreCount(t, client, cfg, pod), 2, "pod should have been restored 2 times")
assert.GreaterOrEqual(t, restoreCount(t, client, cfg, pod), 1, "pod should have been restored at least once")
})

t.Run("delete in restored state", func(t *testing.T) {
// as we want to delete the pod when it is in a restored state, we
// first need to make sure it has checkpointed at least once. We give
// it 2 seconds to checkpoint initially and wait 5 seconds to ensure
// it has finished checkpointing.
pod := testPod(scaleDownAfter(time.Second * 2))
// first need to make sure it has checkpointed at least once.
pod := testPod(scaleDownAfter(0))
cleanupPod := createPodAndWait(t, ctx, client, pod)
defer cleanupPod()

time.Sleep(time.Second * 5)
require.Eventually(t, func() bool {
return isCheckpointed(t, client, cfg, pod)
}, time.Second*10, time.Second)

stdout, stderr, err := podExec(cfg, pod, "date")
require.NoError(t, err)
t.Log(stdout, stderr)
Expand Down
67 changes: 61 additions & 6 deletions e2e/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,30 +533,85 @@ func podExec(cfg *rest.Config, pod *corev1.Pod, command string) (string, string,
func restoreCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) int {
mfs := getNodeMetrics(t, client, cfg)

running := prometheus.BuildFQName(zeropod.MetricsNamespace, "", zeropod.MetricRestoreDuration)
val, ok := mfs[running]
restoreDuration := prometheus.BuildFQName(zeropod.MetricsNamespace, "", zeropod.MetricRestoreDuration)
val, ok := mfs[restoreDuration]
if !ok {
t.Errorf("could not find expected metric: %s", restoreDuration)
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Errorf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
}

if metric.Histogram == nil {
t.Errorf("found metric that is not a histogram")
}

if metric.Histogram.SampleCount == nil {
t.Errorf("histogram sample count is nil")
}

return int(*metric.Histogram.SampleCount)
}

func checkpointCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) int {
mfs := getNodeMetrics(t, client, cfg)

checkpointDuration := prometheus.BuildFQName(zeropod.MetricsNamespace, "", zeropod.MetricCheckPointDuration)
val, ok := mfs[checkpointDuration]
if !ok {
t.Fatalf("could not find expected metric: %s", running)
t.Errorf("could not find expected metric: %s", checkpointDuration)
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Fatalf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
t.Errorf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
}

if metric.Histogram == nil {
t.Fatalf("found metric that is not a histogram")
t.Errorf("found metric that is not a histogram")
}

if metric.Histogram.SampleCount == nil {
t.Fatalf("histogram sample count is nil")
t.Errorf("histogram sample count is nil")
}

return int(*metric.Histogram.SampleCount)
}

func isCheckpointed(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) bool {
mfs := getNodeMetrics(t, client, cfg)

running := prometheus.BuildFQName(zeropod.MetricsNamespace, "", zeropod.MetricRunning)
val, ok := mfs[running]
if !ok {
t.Errorf("could not find expected metric: %s", running)
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Errorf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
}

if metric.Gauge == nil {
t.Errorf("found metric that is not a gauge")
}

if metric.Gauge.Value == nil {
t.Errorf("gauge value is nil")
}

return *metric.Gauge.Value == 0 && checkpointCount(t, client, cfg, pod) >= 1
}

func findMetricByLabelMatch(metrics []*dto.Metric, labels map[string]string) (*dto.Metric, bool) {
Expand Down
3 changes: 0 additions & 3 deletions runc/task/service_zeropod.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,6 @@ func (w *wrapper) Start(ctx context.Context, r *taskAPI.StartRequest) (*taskAPI.
return nil
})

// TODO: this is not a good idea (the 10s). A better idea is probably to
// wait whenever we try to first get the Port from the app (retry until
// the app is listening).
if err := zeropodContainer.ScheduleScaleDown(); err != nil {
return nil, err
}
Expand Down

0 comments on commit f2d618f

Please sign in to comment.