diff --git a/tests/Makefile b/tests/Makefile index 6b32e47d07..c76514171a 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,6 +13,8 @@ GINKGO_LABEL= GINKGO_FLAGS= NGF_VERSION= CI=false +TELEMETRY_ENDPOINT= +TELEMETRY_ENDPOINT_INSECURE= ifneq ($(GINKGO_LABEL),) override GINKGO_FLAGS += -ginkgo.label-filter "$(GINKGO_LABEL)" @@ -34,11 +36,11 @@ create-kind-cluster: ## Create a kind cluster .PHONY: build-images build-images: ## Build NGF and NGINX images - cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images + cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images .PHONY: build-images-with-plus build-images-with-plus: ## Build NGF and NGINX Plus images - cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images-with-plus + cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images-with-plus .PHONY: load-images load-images: ## Load NGF and NGINX images on configured kind cluster @@ -48,6 +50,32 @@ load-images: ## Load NGF and NGINX images on configured kind cluster load-images-with-plus: ## Load NGF and NGINX Plus images on configured kind cluster cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) load-images-with-plus +.PHONY: update-ngf-manifest +update-ngf-manifest: ## Update the NGF deployment manifest image names and imagePullPolicies + cd .. \ + && make generate-manifests HELM_TEMPLATE_COMMON_ARGS="\ + --set nginxGateway.image.repository=$(PREFIX) \ + --set nginxGateway.image.tag=$(TAG) \ + --set nginxGateway.image.pullPolicy=Never \ + --set nginx.image.repository=$(NGINX_PREFIX) \ + --set nginx.image.tag=$(TAG) \ + --set nginx.image.pullPolicy=Never" \ + && cd - + +.PHONY: update-ngf-manifest-with-plus +update-ngf-manifest-with-plus: ## Update the NGF deployment manifest image names and imagePullPolicies including nginx-plus + cd .. \ + && make generate-manifests HELM_TEMPLATE_COMMON_ARGS="\ + --set nginxGateway.image.repository=$(PREFIX) \ + --set nginxGateway.image.tag=$(TAG) \ + --set nginxGateway.image.pullPolicy=Never \ + --set nginx.image.repository=$(NGINX_PLUS_PREFIX) \ + --set nginx.image.tag=$(TAG) \ + --set nginx.image.pullPolicy=Never \ + --set nginx.plus=true" \ + && cd - + + test: ## Run the system tests against your default k8s cluster go test -v ./suite $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ diff --git a/tests/README.md b/tests/README.md index 07a8ea141e..5a29a3f5f6 100644 --- a/tests/README.md +++ b/tests/README.md @@ -55,24 +55,29 @@ load-images Load NGF and NGINX images on configured kind clus run-tests-on-vm Run the tests on a GCP VM setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the tests test Run the system tests against your default k8s cluster +update-ngf-manifest-with-plus Update the NGF deployment manifest image names and imagePullPolicies including nginx-plus +update-ngf-manifest Update the NGF deployment manifest image names and imagePullPolicies ``` **Note:** The following variables are configurable when running the below `make` commands: -| Variable | Default | Description | -| ------------------- | ------------------------------- | -------------------------------------------------------------- | -| TAG | edge | tag for the locally built NGF images | -| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image | -| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image | -| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image | -| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled | -| PULL_POLICY | Never | NGF image pull policy | -| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install | -| K8S_VERSION | latest | version of k8s that the tests are run on | -| GW_SERVICE_TYPE | NodePort | type of Service that should be created | -| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service | -| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run | -| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command | +| Variable | Default | Description | +|------------------------------|---------------------------------|---------------------------------------------------------------------| +| TAG | edge | tag for the locally built NGF images | +| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image | +| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image | +| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image | +| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled | +| PULL_POLICY | Never | NGF image pull policy | +| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install | +| K8S_VERSION | latest | version of k8s that the tests are run on | +| GW_SERVICE_TYPE | NodePort | type of Service that should be created | +| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service | +| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run | +| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command | +| TELEMETRY_ENDPOINT | Set in the main Makefile | The endpoint to which telemetry reports are sent | +| TELEMETRY_ENDPOINT_INSECURE= | Set in the main Makefile | Controls whether TLS should be used when sending telemetry reports. | + ## Step 1 - Create a Kubernetes cluster @@ -126,7 +131,27 @@ Or, to build NGF with NGINX Plus enabled (NGINX Plus cert and key must exist in make build-images-with-plus load-images-with-plus TAG=$(whoami) ``` -## Step 3 - Run the tests +For the telemetry test, which requires a OTel collector, build an image with the following variables set: + +```makefile +TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317 TELEMETRY_ENDPOINT_INSECURE=true +``` + +## Step 3 - Update Manifests for a Local Run + +For NGINX OSS: + +```makefile +make update-ngf-manifest TAG=$(whoami) +``` + +For NGINX Plus: + +```makefile +make update-ngf-manifest-with-plus TAG=$(whoami) +``` + +## Step 4 - Run the tests ### 3a - Run the tests locally @@ -140,7 +165,7 @@ Or, to run the tests with NGINX Plus enabled: make test TAG=$(whoami) PLUS_ENABLED=true ``` -### 3b - Run the tests on a GKE cluster from a GCP VM +### 4b - Run the tests on a GKE cluster from a GCP VM This step only applies if you would like to run the tests on a GKE cluster from a GCP based VM. @@ -185,6 +210,14 @@ or to pass a specific flag, e.g. run a specific test, use the GINKGO_FLAGS varia make test TAG=$(whoami) GINKGO_FLAGS='-ginkgo.focus "writes the system info to a results file"' ``` +To run the telemetry test, which requires a specially built image (see above), run: + +```makefile +make test TAG=$(whoami) GINKGO_LABEL=telemetry +``` + +Otherwise, the test will be skipped. + If you are running the tests in GCP, add your required label/ flags to `scripts/var.env`. You can also modify the tests code for a similar outcome. To run a specific test, you can "focus" it by adding the `F` diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index 72c3992cb4..e62cb5db87 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -30,6 +30,7 @@ import ( "strings" "time" + apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -37,20 +38,24 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" v1 "sigs.k8s.io/gateway-api/apis/v1" ) // ResourceManager handles creating/updating/deleting Kubernetes resources. type ResourceManager struct { - K8sClient client.Client - FS embed.FS - TimeoutConfig TimeoutConfig + K8sClient client.Client + ClientGoClient kubernetes.Interface // used when k8sClient is not enough + FS embed.FS + TimeoutConfig TimeoutConfig } // ClusterInfo holds the cluster metadata type ClusterInfo struct { - K8sVersion string + K8sVersion string + // ID is the UID of kube-system namespace + ID string MemoryPerNode string GkeInstanceType string GkeZone string @@ -406,9 +411,89 @@ func (rm *ResourceManager) GetClusterInfo() (ClusterInfo, error) { ci.GkeZone = node.Labels["topology.kubernetes.io/zone"] } + var ns core.Namespace + key := types.NamespacedName{Name: "kube-system"} + + if err := rm.K8sClient.Get(ctx, key, &ns); err != nil { + return *ci, fmt.Errorf("error getting kube-system namespace: %w", err) + } + + ci.ID = string(ns.UID) + return *ci, nil } +// GetPodNames returns the names of all Pods in the specified namespace that match the given labels. +func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingLabels) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var podList core.PodList + if err := rm.K8sClient.List( + ctx, + &podList, + client.InNamespace(namespace), + labels, + ); err != nil { + return nil, fmt.Errorf("error getting list of Pods: %w", err) + } + + names := make([]string, 0, len(podList.Items)) + + for _, pod := range podList.Items { + names = append(names, pod.Name) + } + + return names, nil +} + +// GetPodLogs returns the logs from the specified Pod +func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + req := rm.ClientGoClient.CoreV1().Pods(namespace).GetLogs(name, opts) + + logs, err := req.Stream(ctx) + if err != nil { + return "", fmt.Errorf("error getting logs from Pod: %w", err) + } + defer logs.Close() + + buf := new(bytes.Buffer) + if _, err := buf.ReadFrom(logs); err != nil { + return "", fmt.Errorf("error reading logs from Pod: %w", err) + } + + return buf.String(), nil +} + +// GetNGFDeployment returns the NGF Deployment in the specified namespace with the given release name. +func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*apps.Deployment, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var deployments apps.DeploymentList + + if err := rm.K8sClient.List( + ctx, + &deployments, + client.InNamespace(namespace), + client.MatchingLabels{ + "app.kubernetes.io/instance": releaseName, + }, + ); err != nil { + return nil, fmt.Errorf("error getting list of Deployments: %w", err) + } + + if len(deployments.Items) != 1 { + return nil, fmt.Errorf("expected 1 NGF Deployment, got %d", len(deployments.Items)) + } + + deployment := deployments.Items[0] + return &deployment, nil +} + // GetReadyNGFPodNames returns the name(s) of the NGF Pod(s). func GetReadyNGFPodNames( k8sClient client.Client, diff --git a/tests/suite/manifests/telemetry/collector-values.yaml b/tests/suite/manifests/telemetry/collector-values.yaml new file mode 100644 index 0000000000..5c2f08d953 --- /dev/null +++ b/tests/suite/manifests/telemetry/collector-values.yaml @@ -0,0 +1,31 @@ +mode: deployment +replicaCount: 1 +config: + exporters: + debug: + verbosity: detailed + logging: {} + extensions: + health_check: {} + memory_ballast: + size_in_percentage: 40 + processors: + batch: {} + memory_limiter: + check_interval: 5s + limit_percentage: 80 + spike_limit_percentage: 25 + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + service: + extensions: + - health_check + pipelines: + traces: + exporters: + - debug + receivers: + - otlp diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 8d2af38b57..72f81cc9db 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -21,6 +21,7 @@ import ( k8sRuntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" ctlr "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -100,11 +101,15 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { k8sClient, err = client.New(k8sConfig, options) Expect(err).ToNot(HaveOccurred()) + clientGoClient, err := kubernetes.NewForConfig(k8sConfig) + Expect(err).ToNot(HaveOccurred()) + timeoutConfig = framework.DefaultTimeoutConfig() resourceManager = framework.ResourceManager{ - K8sClient: k8sClient, - FS: manifests, - TimeoutConfig: timeoutConfig, + K8sClient: k8sClient, + ClientGoClient: clientGoClient, + FS: manifests, + TimeoutConfig: timeoutConfig, } clusterInfo, err = resourceManager.GetClusterInfo() @@ -197,22 +202,26 @@ func teardown() { )).To(Succeed()) } -var _ = BeforeSuite(func() { +func getDefaultSetupCfg() setupConfig { _, file, _, _ := runtime.Caller(0) fileDir := path.Join(path.Dir(file), "../") basepath := filepath.Dir(fileDir) localChartPath = filepath.Join(basepath, "deploy/helm-chart") - cfg := setupConfig{ + return setupConfig{ chartPath: localChartPath, gwAPIVersion: *gatewayAPIVersion, deploy: true, } +} + +var _ = BeforeSuite(func() { + cfg := getDefaultSetupCfg() // If we are running the upgrade test only, then skip the initial deployment. // The upgrade test will deploy its own version of NGF. suiteConfig, _ := GinkgoConfiguration() - if suiteConfig.LabelFilter == "upgrade" { + if suiteConfig.LabelFilter == "upgrade" || suiteConfig.LabelFilter == "telemetry" { cfg.deploy = false } diff --git a/tests/suite/telemetry_test.go b/tests/suite/telemetry_test.go new file mode 100644 index 0000000000..ac29eb06e0 --- /dev/null +++ b/tests/suite/telemetry_test.go @@ -0,0 +1,177 @@ +package suite + +import ( + "fmt" + "os/exec" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + core "k8s.io/api/core/v1" + crClient "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + collectorNamespace = "collector" + collectorChartReleaseName = "otel-collector" + // FIXME(pleshakov): Find a automated way to keep the version updated here similar to dependabot. + collectorChatVersion = "0.73.1" +) + +var _ = Describe("Telemetry test with OTel collector", Label("telemetry"), func() { + var skipped bool + + BeforeEach(func() { + suiteConfig, _ := GinkgoConfiguration() + if suiteConfig.LabelFilter != "telemetry" { + skipped = true + Skip("Telemetry test should be run separately with 'telemetry' label because of its NGF installation requirements") + } + + // Because NGF reports telemetry on start, we need to install the collector first. + + // Install collector + output, err := installCollector() + Expect(err).NotTo(HaveOccurred(), string(output)) + + // Install NGF + // Note: the suite doesn't install NGF for 'telemetry' label + + setup(getDefaultSetupCfg()) + }) + + AfterEach(func() { + if skipped { + Skip("") + } + + output, err := uninstallCollector() + Expect(err).NotTo(HaveOccurred(), string(output)) + }) + + It("sends telemetry", func() { + names, err := resourceManager.GetPodNames( + collectorNamespace, + crClient.MatchingLabels{ + "app.kubernetes.io/name": "opentelemetry-collector", + }, + ) + + Expect(err).NotTo(HaveOccurred()) + Expect(names).To(HaveLen(1)) + + name := names[0] + + // We assert that all data points were sent + // For some data points, as a sanity check, we assert on sent values. + + info, err := resourceManager.GetClusterInfo() + Expect(err).NotTo(HaveOccurred()) + + ngfDeployment, err := resourceManager.GetNGFDeployment(ngfNamespace, releaseName) + Expect(err).NotTo(HaveOccurred()) + + matchFirstExpectedLine := func() bool { + logs, err := resourceManager.GetPodLogs(collectorNamespace, name, &core.PodLogOptions{}) + Expect(err).NotTo(HaveOccurred()) + return strings.Contains(logs, "dataType: Str(ngf-product-telemetry)") + } + + // Wait until the collector has received the telemetry data + Eventually(matchFirstExpectedLine, "30s", "5s").Should(BeTrue()) + + logs, err := resourceManager.GetPodLogs(collectorNamespace, name, &core.PodLogOptions{}) + Expect(err).NotTo(HaveOccurred()) + + assertConsecutiveLinesInLogs( + logs, + []string{ + "ImageSource:", + "ProjectName: Str(NGF)", + "ProjectVersion:", + "ProjectArchitecture:", + fmt.Sprintf("ClusterID: Str(%s)", info.ID), + "ClusterVersion:", + "ClusterPlatform:", + fmt.Sprintf("InstallationID: Str(%s)", ngfDeployment.UID), + fmt.Sprintf("ClusterNodeCount: Int(%d)", info.NodeCount), + "FlagNames: Slice", + "FlagValues: Slice", + "GatewayCount: Int(0)", + "GatewayClassCount: Int(1)", + "HTTPRouteCount: Int(0)", + "SecretCount: Int(0)", + "ServiceCount: Int(0)", + "EndpointCount: Int(0)", + "NGFReplicaCount: Int(1)", + }, + ) + }) +}) + +func installCollector() ([]byte, error) { + repoAddArgs := []string{ + "repo", + "add", + "open-telemetry", + "https://open-telemetry.github.io/opentelemetry-helm-charts", + } + + if output, err := exec.Command("helm", repoAddArgs...).CombinedOutput(); err != nil { + return output, err + } + + args := []string{ + "install", + collectorChartReleaseName, + "open-telemetry/opentelemetry-collector", + "--create-namespace", + "--namespace", collectorNamespace, + "--version", collectorChatVersion, + "-f", "manifests/telemetry/collector-values.yaml", + "--wait", + } + + return exec.Command("helm", args...).CombinedOutput() +} + +func uninstallCollector() ([]byte, error) { + args := []string{ + "uninstall", collectorChartReleaseName, + "--namespace", collectorNamespace, + } + + return exec.Command("helm", args...).CombinedOutput() +} + +func assertConsecutiveLinesInLogs(logs string, expectedLines []string) { + lines := strings.Split(logs, "\n") + + // find first expected line in lines + + i := 0 + + for ; i < len(lines); i++ { + if strings.Contains(lines[i], expectedLines[0]) { + i++ + break + } + } + + if i == len(lines) { + Fail(fmt.Sprintf("Expected first line not found: %s, \n%s", expectedLines[0], logs)) + } + + linesLeft := len(lines) - i + expectedLinesLeft := len(expectedLines) - 1 + + if linesLeft < expectedLinesLeft { + format := "Not enough lines remains in the logs, expected %d, got %d\n%s" + Fail(fmt.Sprintf(format, linesLeft, expectedLinesLeft, logs)) + } + + for j := 1; j < len(expectedLines); j++ { + Expect(lines[i]).To(ContainSubstring(expectedLines[j])) + i++ + } +}