Skip to content

Commit

Permalink
test: increase wait on e2e tests
Browse files Browse the repository at this point in the history
Some of these fail quite often when run in github actions so we
increase the timeouts to a minute.
  • Loading branch information
ctrox committed Jun 23, 2024
1 parent b58414d commit de1d9ac
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 25 deletions.
38 changes: 32 additions & 6 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,26 @@ func TestE2E(t *testing.T) {
defer cleanupPod()

require.Eventually(t, func() bool {
return isCheckpointed(t, client, cfg, pod)
}, time.Second*10, time.Second)
checkpointed, err := isCheckpointed(t, client, cfg, pod)
if err != nil {
t.Logf("error checking if checkpointed: %s", err)
return false
}
return checkpointed
}, time.Minute, time.Second)

stdout, stderr, err := podExec(cfg, pod, "date")
require.NoError(t, err)
t.Log(stdout, stderr)

assert.GreaterOrEqual(t, restoreCount(t, client, cfg, pod), 1, "pod should have been restored at least once")
require.Eventually(t, func() bool {
count, err := restoreCount(t, client, cfg, pod)
if err != nil {
t.Logf("error checking if restored: %s", err)
return false
}
return assert.GreaterOrEqual(t, count, 1, "pod should have been restored at least once")
}, time.Minute, time.Second)
})

t.Run("delete in restored state", func(t *testing.T) {
Expand All @@ -185,8 +197,13 @@ func TestE2E(t *testing.T) {
defer cleanupPod()

require.Eventually(t, func() bool {
return isCheckpointed(t, client, cfg, pod)
}, time.Second*10, time.Second)
checkpointed, err := isCheckpointed(t, client, cfg, pod)
if err != nil {
t.Logf("error checking if checkpointed: %s", err)
return false
}
return checkpointed
}, time.Minute, time.Second)

stdout, stderr, err := podExec(cfg, pod, "date")
require.NoError(t, err)
Expand Down Expand Up @@ -239,7 +256,16 @@ func TestE2E(t *testing.T) {
// exec into pod to ensure it has been restored at least once
require.Eventually(t, func() bool {
_, _, err := podExec(cfg, restoredPod, "date")
return err == nil && isCheckpointed(t, client, cfg, restoredPod)
if err != nil {
t.Logf("error during pod exec: %s", err)
return false
}
checkpointed, err := isCheckpointed(t, client, cfg, restoredPod)
if err != nil {
t.Logf("error checking if checkpointed: %s", err)
return false
}
return checkpointed
}, time.Minute, time.Second)

mfs := getNodeMetrics(t, client, cfg)
Expand Down
46 changes: 27 additions & 19 deletions e2e/setup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ func createServiceAndWait(t testing.TB, ctx context.Context, client client.Clien
}

return len(endpoints.Subsets[0].Addresses) == replicas
}, time.Second*30, time.Second, "waiting for service endpoints to be ready") {
}, time.Minute, time.Second, "waiting for service endpoints to be ready") {
t.Log("service did not get ready")
}

Expand Down Expand Up @@ -541,79 +541,87 @@ func podExec(cfg *rest.Config, pod *corev1.Pod, command string) (string, string,
return buf.String(), errBuf.String(), nil
}

func restoreCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) int {
func restoreCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) (int, error) {
val, err := getNodeMetric(t, client, cfg, zeropod.MetricRestoreDuration)
if err != nil {
t.Fatal(err)
return 0, err
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Fatalf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
return 0, fmt.Errorf("could not find restore duration metric that matches pod: %s/%s: %w",
pod.Name, pod.Namespace, err)
}

if metric.Histogram == nil {
t.Fatalf("found metric that is not a histogram")
return 0, fmt.Errorf("found metric that is not a histogram")
}

if metric.Histogram.SampleCount == nil {
t.Fatalf("histogram sample count is nil")
return 0, fmt.Errorf("histogram sample count is nil")
}

return int(*metric.Histogram.SampleCount)
return int(*metric.Histogram.SampleCount), nil
}

func checkpointCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) int {
func checkpointCount(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) (int, error) {
val, err := getNodeMetric(t, client, cfg, zeropod.MetricCheckPointDuration)
if err != nil {
t.Fatal(err)
return 0, err
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Fatalf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
return 0, fmt.Errorf("could not find checkpoint duration metric that matches pod: %s/%s: %w",
pod.Name, pod.Namespace, err)
}

if metric.Histogram == nil {
t.Fatalf("found metric that is not a histogram")
return 0, fmt.Errorf("found metric that is not a histogram")
}

if metric.Histogram.SampleCount == nil {
t.Fatalf("histogram sample count is nil")
return 0, fmt.Errorf("histogram sample count is nil")
}

return int(*metric.Histogram.SampleCount)
return int(*metric.Histogram.SampleCount), nil
}

func isCheckpointed(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) bool {
func isCheckpointed(t testing.TB, client client.Client, cfg *rest.Config, pod *corev1.Pod) (bool, error) {
val, err := getNodeMetric(t, client, cfg, zeropod.MetricRunning)
if err != nil {
t.Fatal(err)
return false, err
}

metric, ok := findMetricByLabelMatch(val.Metric, map[string]string{
zeropod.LabelPodName: pod.Name,
zeropod.LabelPodNamespace: pod.Namespace,
})
if !ok {
t.Fatalf("could not find running metric that matches pod: %s/%s", pod.Name, pod.Namespace)
return false, fmt.Errorf("could not find running metric that matches pod: %s/%s: %w",
pod.Name, pod.Namespace, err)
}

if metric.Gauge == nil {
t.Fatalf("found metric that is not a gauge")
return false, fmt.Errorf("found metric that is not a gauge")
}

if metric.Gauge.Value == nil {
t.Fatalf("gauge value is nil")
return false, fmt.Errorf("gauge value is nil")
}

count, err := checkpointCount(t, client, cfg, pod)
if err != nil {
return false, err
}

return *metric.Gauge.Value == 0 && checkpointCount(t, client, cfg, pod) >= 1
return *metric.Gauge.Value == 0 && count >= 1, nil
}

func findMetricByLabelMatch(metrics []*dto.Metric, labels map[string]string) (*dto.Metric, bool) {
Expand Down

0 comments on commit de1d9ac

Please sign in to comment.