Skip to content

Commit

Permalink
Fix e2e test metrics presence (#1458)
Browse files Browse the repository at this point in the history
* print metrics

Signed-off-by: Thibault Mange <[email protected]>

* set focus

Signed-off-by: Thibault Mange <[email protected]>

* refactor test

Signed-off-by: Thibault Mange <[email protected]>

* fix kind

Signed-off-by: Thibault Mange <[email protected]>

* trigger metrics test

Signed-off-by: Thibault Mange <[email protected]>

* clean

Signed-off-by: Thibault Mange <[email protected]>

---------

Signed-off-by: Thibault Mange <[email protected]>
  • Loading branch information
thibaultmg authored May 31, 2024
1 parent ca066d0 commit 810c3ff
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 110 deletions.
73 changes: 0 additions & 73 deletions tests/pkg/testdata/ignored-metric-list

This file was deleted.

122 changes: 101 additions & 21 deletions tests/pkg/tests/observability_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ package tests
import (
"context"
"fmt"
"os"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"

"github.com/stolostron/multicluster-observability-operator/tests/pkg/kustomize"
"github.com/stolostron/multicluster-observability-operator/tests/pkg/utils"
Expand Down Expand Up @@ -152,37 +154,70 @@ var _ = Describe("Observability:", func() {
}, EventuallyTimeoutMinute*10, EventuallyIntervalSecond*5).Should(Succeed())
})

It("[P2][Sev2][observability][Integration] Should have metrics which used grafana dashboard (ssli/g1)", func() {
// Ensures that the allowList is current by checking that the metrics are being collected
It("[P2][Sev2][observability][Integration] Should collect expected metrics from spokes (metrics/g0)", func() {
// Get the metrics from the deployed allowList configMap
metricList, dynamicMetricList := utils.GetDefaultMetricList(testOptions)
ignoreMetricMap := utils.GetIgnoreMetricMap()
_, etcdPodList := utils.GetPodList(
testOptions,
true,
"openshift-etcd",
"app=etcd",
)
// ignore etcd network peer metrics for SNO cluster
if etcdPodList != nil && len(etcdPodList.Items) <= 0 {
ignoreMetricMap["etcd_network_peer_received_bytes_total"] = true
ignoreMetricMap["etcd_network_peer_sent_bytes_total"] = true
allowMetricsMap := make(map[string]struct{}, len(metricList)+len(dynamicMetricList))
for _, name := range metricList {
allowMetricsMap[name] = struct{}{}
}
for _, name := range dynamicMetricList {
ignoreMetricMap[name] = true
allowMetricsMap[name] = struct{}{}
}
for _, name := range metricList {
_, ok := ignoreMetricMap[name]
if !ok {
Eventually(func() error {
res, err := utils.QueryGrafana(testOptions, name)

// Log ignored metrics that are not found in the allowlist to verify that both lists are in sync
for name := range ignoredMetrics {
if _, ok := allowMetricsMap[name]; !ok {
klog.V(1).Infof("ignored metric %s is not found in the allowlist", name)
}
}

// Ensure that expected metrics are being collected
Eventually(func() error {
for _, cluster := range clusters {
for _, name := range metricList {
if _, ok := ignoredMetrics[name]; ok {
continue
}

query := fmt.Sprintf("%s{cluster=\"%s\"}", name, cluster)
res, err := utils.QueryGrafana(testOptions, query)
if err != nil {
return fmt.Errorf("failed to get metrics %s: %v", name, err)
return fmt.Errorf("failed to get metrics %s in cluster %s: %v", name, cluster, err)
}

if len(res.Data.Result) == 0 {
return fmt.Errorf("no data found for %s", name)
return fmt.Errorf("no data found for %s in cluster %s", name, cluster)
}

return nil
}, EventuallyTimeoutMinute*2, EventuallyIntervalSecond*3).Should(Succeed())
}
}
return nil
}, EventuallyTimeoutMinute*3, EventuallyIntervalSecond*5).Should(Succeed())

// Ensure that ignored metrics are not being collected
// This is to ensure that the ignoredMetrics list is in sync with the actual metrics being collected
// Do not run if kind environment because metrics differ
if os.Getenv("IS_KIND_ENV") != trueStr {
Eventually(func() error {
for _, cluster := range clusters {
for name := range ignoredMetrics {
query := fmt.Sprintf("%s{cluster=\"%s\"}", name, cluster)
res, err := utils.QueryGrafana(testOptions, query)
if err != nil {
return fmt.Errorf("failed to get metrics %s in cluster %s: %v", name, cluster, err)
}

if len(res.Data.Result) != 0 {
return fmt.Errorf("found data for %s in cluster %s", name, cluster)
}
}
}

return nil
}, EventuallyTimeoutMinute*1, EventuallyIntervalSecond*5).Should(Succeed())
}
})

Expand All @@ -197,3 +232,48 @@ var _ = Describe("Observability:", func() {
testFailed = testFailed || CurrentGinkgoTestDescription().Failed
})
})

// List of metrics that are not collected in the e2e environment
// It might be because they are deprecated or not relevant for our test environment
// These metrics are ignored in the test
var ignoredMetrics = map[string]struct{}{
"cluster:policy_governance_info:propagated_count": {},
"cluster:policy_governance_info:propagated_noncompliant_count": {},
"cnv:vmi_status_running:count": {},
"container_cpu_cfs_periods_total": {},
"container_cpu_cfs_throttled_periods_total": {},
"container_memory_cache": {},
"container_memory_rss": {},
"container_memory_swap": {},
"container_memory_working_set_bytes": {},
"coredns_forward_responses_total": {},
"csv_abnormal": {},
"etcd_mvcc_db_total_size_in_bytes": {},
"etcd_network_peer_received_bytes_total": {},
"etcd_network_peer_sent_bytes_total": {},
"etcd_object_counts": {},
"instance:node_filesystem_usage:sum": {},
"kube_node_status_allocatable_cpu_cores": {},
"kube_node_status_allocatable_memory_bytes": {},
"kube_node_status_capacity_cpu_cores": {},
"kube_node_status_capacity_pods": {},
"kube_pod_container_resource_limits": {},
"kube_pod_container_resource_limits_cpu_cores": {},
"kube_pod_container_resource_limits_memory_bytes": {},
"kube_pod_container_resource_requests": {},
"kube_pod_container_resource_requests_cpu_cores": {},
"kube_pod_container_resource_requests_memory_bytes": {},
"kubelet_running_container_count": {},
"kubelet_runtime_operations": {},
"kubevirt_hyperconverged_operator_health_status": {},
"mce_hs_addon_hosted_control_planes_status_gauge": {},
"mce_hs_addon_request_based_hcp_capacity_current_gauge": {},
"mixin_pod_workload": {},
"namespace:kube_pod_container_resource_requests_cpu_cores:sum": {},
"namespace_workload_pod:kube_pod_owner:relabel": {},
"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate": {},
"node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate": {},
"policy:policy_governance_info:propagated_count": {},
"policy:policy_governance_info:propagated_noncompliant_count": {},
"policyreport_info": {},
}
16 changes: 0 additions & 16 deletions tests/pkg/utils/mco_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package utils

import (
"bufio"
"context"
"crypto/tls"
"fmt"
Expand Down Expand Up @@ -211,21 +210,6 @@ func GetDefaultMetricList(opt TestOptions) ([]string, []string) {
return allDefaultMetricName, dynamicMetricsName
}

func GetIgnoreMetricMap() map[string]bool {
txtlines := map[string]bool{}
file, err := os.Open("../testdata/ignored-metric-list")
if err != nil {
klog.Errorf("failed to open the ignored-metric-list file: %+v\n", err)
}

scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
txtlines[scanner.Text()] = true
}
return txtlines
}

func getNameInMatch(match string) string {
r := regexp.MustCompile(`__name__="([^,]*)"`)
m := r.FindAllStringSubmatch(match, -1)
Expand Down

0 comments on commit 810c3ff

Please sign in to comment.