Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functional test for product telemetry #1659

Merged
merged 4 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ jobs:
AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }}
AZURE_BUCKET_NAME: ${{ secrets.AZURE_BUCKET_NAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_COMMUNITY }}
TELEMETRY_ENDPOINT: "" # disables sending telemetry
TELEMETRY_ENDPOINT_INSECURE: "false"

- name: Cache Artifacts
uses: actions/cache@ab5e6d0c87105b4c9c2047343972218f562e4319 # v4.0.1
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/conformance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ jobs:
with:
version: latest
args: build --snapshot --clean
env:
TELEMETRY_ENDPOINT: "" # disables sending telemetry
TELEMETRY_ENDPOINT_INSECURE: "false"

- name: Build NGF Docker Image
uses: docker/build-push-action@af5a7ed5ba88268d5278f7203fb52cd833f66d6e # v5.2.0
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/functional.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ jobs:
with:
version: latest
args: build --snapshot --clean
env:
TELEMETRY_ENDPOINT: otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317
TELEMETRY_ENDPOINT_INSECURE: "true"

- name: Build NGF Docker Image
uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56 # v5.1.0
Expand Down Expand Up @@ -116,9 +119,9 @@ jobs:
make load-images${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag}
working-directory: ./tests

- name: Run functional tests
- name: Run functional telemetry tests
run: |
ngf_prefix=ghcr.io/nginxinc/nginx-gateway-fabric
ngf_tag=${{ steps.ngf-meta.outputs.version }}
make test${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag}
make test${{ matrix.nginx-image == 'nginx-plus' && '-with-plus' || ''}} PREFIX=${ngf_prefix} TAG=${ngf_tag} GINKGO_LABEL=telemetry
working-directory: ./tests
8 changes: 7 additions & 1 deletion .goreleaser.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ builds:
asmflags:
- all=-trimpath={{.Env.GOPATH}}
ldflags:
- -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}} -X main.telemetryReportPeriod=24h -X main.telemetryEndpointInsecure=false
- -s -w
- -X main.version={{.Version}}
- -X main.commit={{.Commit}}
- -X main.date={{.Date}}
- -X main.telemetryReportPeriod=24h
- -X main.telemetryEndpoint={{.Env.TELEMETRY_ENDPOINT}}
- -X main.telemetryEndpointInsecure={{.Env.TELEMETRY_ENDPOINT_INSECURE}}
main: ./cmd/gateway/
binary: gateway

Expand Down
6 changes: 4 additions & 2 deletions tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ GINKGO_LABEL=
GINKGO_FLAGS=
NGF_VERSION=
CI=false
TELEMETRY_ENDPOINT=
TELEMETRY_ENDPOINT_INSECURE=
pleshakov marked this conversation as resolved.
Show resolved Hide resolved

ifneq ($(GINKGO_LABEL),)
override GINKGO_FLAGS += -ginkgo.label-filter "$(GINKGO_LABEL)"
Expand All @@ -38,11 +40,11 @@ delete-kind-cluster: ## Delete kind cluster

.PHONY: build-images
build-images: ## Build NGF and NGINX images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images

.PHONY: build-images-with-plus
build-images-with-plus: ## Build NGF and NGINX Plus images
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images-with-plus
cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) TELEMETRY_ENDPOINT=$(TELEMETRY_ENDPOINT) TELEMETRY_ENDPOINT_INSECURE=$(TELEMETRY_ENDPOINT_INSECURE) build-images-with-plus

.PHONY: load-images
load-images: ## Load NGF and NGINX images on configured kind cluster
Expand Down
46 changes: 32 additions & 14 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,23 @@ test-with-plus Runs the functional tests for NGF with NGINX Plus

**Note:** The following variables are configurable when running the below `make` commands:

| Variable | Default | Description |
| ------------------- | ------------------------------- | -------------------------------------------------------------- |
| TAG | edge | tag for the locally built NGF images |
| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image |
| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image |
| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image |
| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled |
| PULL_POLICY | Never | NGF image pull policy |
| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install |
| K8S_VERSION | latest | version of k8s that the tests are run on |
| GW_SERVICE_TYPE | NodePort | type of Service that should be created |
| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service |
| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run |
| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command |
| Variable | Default | Description |
|------------------------------|---------------------------------|---------------------------------------------------------------------|
| TAG | edge | tag for the locally built NGF images |
| PREFIX | nginx-gateway-fabric | prefix for the locally built NGF image |
| NGINX_PREFIX | nginx-gateway-fabric/nginx | prefix for the locally built NGINX image |
| NGINX_PLUS_PREFIX | nginx-gateway-fabric/nginx-plus | prefix for the locally built NGINX Plus image |
| PLUS_ENABLED | false | Flag to indicate if NGINX Plus should be enabled |
| PULL_POLICY | Never | NGF image pull policy |
| GW_API_VERSION | 1.0.0 | version of Gateway API resources to install |
| K8S_VERSION | latest | version of k8s that the tests are run on |
| GW_SERVICE_TYPE | NodePort | type of Service that should be created |
| GW_SVC_GKE_INTERNAL | false | specifies if the LoadBalancer should be a GKE internal service |
| GINKGO_LABEL | "" | name of the ginkgo label that will filter the tests to run |
| GINKGO_FLAGS | "" | other ginkgo flags to pass to the go test command |
| TELEMETRY_ENDPOINT | Set in the main Makefile | The endpoint to which telemetry reports are sent |
| TELEMETRY_ENDPOINT_INSECURE= | Set in the main Makefile | Controls whether TLS should be used when sending telemetry reports. |


## Step 1 - Create a Kubernetes cluster

Expand Down Expand Up @@ -137,6 +140,12 @@ Or, to build NGF with NGINX Plus enabled (NGINX Plus cert and key must exist in
make build-images-with-plus load-images-with-plus TAG=$(whoami)
```

For the telemetry test, which requires a OTel collector, build an image with the following variables set:

```makefile
TELEMETRY_ENDPOINT=otel-collector-opentelemetry-collector.collector.svc.cluster.local:4317 TELEMETRY_ENDPOINT_INSECURE=true
```

## Step 3 - Run the tests

### 3a - Run the functional tests locally
Expand All @@ -151,6 +160,15 @@ Or, to run the tests with NGINX Plus enabled:
make test TAG=$(whoami) PLUS_ENABLED=true
```

> The command above doesn't run the telemetry functional test, which requires a dedicated invocation because it uses a
> specially built image (see above) and it needs to deploy NGF differently from the rest of functional tests.

To run the telemetry test:

```makefile
make test TAG=$(whoami) GINKGO_LABEL=telemetry
```

### 3b - Run the tests on a GKE cluster from a GCP VM

This step only applies if you are running the NFR tests, or would like to run the functional tests on a GKE cluster from a GCP based VM.
Expand Down
93 changes: 89 additions & 4 deletions tests/framework/resourcemanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,32 @@ import (
"strings"
"time"

apps "k8s.io/api/apps/v1"
core "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/controller-runtime/pkg/client"
v1 "sigs.k8s.io/gateway-api/apis/v1"
)

// ResourceManager handles creating/updating/deleting Kubernetes resources.
type ResourceManager struct {
K8sClient client.Client
FS embed.FS
TimeoutConfig TimeoutConfig
K8sClient client.Client
ClientGoClient kubernetes.Interface // used when k8sClient is not enough
FS embed.FS
TimeoutConfig TimeoutConfig
}

// ClusterInfo holds the cluster metadata
type ClusterInfo struct {
K8sVersion string
K8sVersion string
// ID is the UID of kube-system namespace
ID string
MemoryPerNode string
GkeInstanceType string
GkeZone string
Expand Down Expand Up @@ -406,9 +411,89 @@ func (rm *ResourceManager) GetClusterInfo() (ClusterInfo, error) {
ci.GkeZone = node.Labels["topology.kubernetes.io/zone"]
}

var ns core.Namespace
key := types.NamespacedName{Name: "kube-system"}

if err := rm.K8sClient.Get(ctx, key, &ns); err != nil {
return *ci, fmt.Errorf("error getting kube-system namespace: %w", err)
}

ci.ID = string(ns.UID)

return *ci, nil
}

// GetPodNames returns the names of all Pods in the specified namespace that match the given labels.
func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingLabels) ([]string, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

var podList core.PodList
if err := rm.K8sClient.List(
ctx,
&podList,
client.InNamespace(namespace),
labels,
); err != nil {
return nil, fmt.Errorf("error getting list of Pods: %w", err)
}

names := make([]string, 0, len(podList.Items))

for _, pod := range podList.Items {
names = append(names, pod.Name)
}

return names, nil
}

// GetPodLogs returns the logs from the specified Pod
func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

req := rm.ClientGoClient.CoreV1().Pods(namespace).GetLogs(name, opts)

logs, err := req.Stream(ctx)
if err != nil {
return "", fmt.Errorf("error getting logs from Pod: %w", err)
}
defer logs.Close()

buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(logs); err != nil {
return "", fmt.Errorf("error reading logs from Pod: %w", err)
}

return buf.String(), nil
}

// GetNGFDeployment returns the NGF Deployment in the specified namespace with the given release name.
func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*apps.Deployment, error) {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout)
defer cancel()

var deployments apps.DeploymentList

if err := rm.K8sClient.List(
ctx,
&deployments,
client.InNamespace(namespace),
client.MatchingLabels{
"app.kubernetes.io/instance": releaseName,
},
); err != nil {
return nil, fmt.Errorf("error getting list of Deployments: %w", err)
}

if len(deployments.Items) != 1 {
return nil, fmt.Errorf("expected 1 NGF Deployment, got %d", len(deployments.Items))
}

deployment := deployments.Items[0]
return &deployment, nil
}

// GetReadyNGFPodNames returns the name(s) of the NGF Pod(s).
func GetReadyNGFPodNames(
k8sClient client.Client,
Expand Down
31 changes: 31 additions & 0 deletions tests/suite/manifests/telemetry/collector-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
mode: deployment
replicaCount: 1
config:
exporters:
debug:
verbosity: detailed
logging: {}
extensions:
health_check: {}
memory_ballast:
size_in_percentage: 40
processors:
batch: {}
memory_limiter:
check_interval: 5s
limit_percentage: 80
spike_limit_percentage: 25
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
service:
extensions:
- health_check
pipelines:
traces:
exporters:
- debug
receivers:
- otlp
24 changes: 18 additions & 6 deletions tests/suite/system_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
k8sRuntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
ctlr "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
Expand Down Expand Up @@ -103,11 +104,15 @@ func setup(cfg setupConfig, extraInstallArgs ...string) {
k8sClient, err = client.New(k8sConfig, options)
Expect(err).ToNot(HaveOccurred())

clientGoClient, err := kubernetes.NewForConfig(k8sConfig)
Expect(err).ToNot(HaveOccurred())

timeoutConfig = framework.DefaultTimeoutConfig()
resourceManager = framework.ResourceManager{
K8sClient: k8sClient,
FS: manifests,
TimeoutConfig: timeoutConfig,
K8sClient: k8sClient,
ClientGoClient: clientGoClient,
FS: manifests,
TimeoutConfig: timeoutConfig,
}

clusterInfo, err = resourceManager.GetClusterInfo()
Expand Down Expand Up @@ -210,26 +215,33 @@ func teardown(relName string) {
)).To(Succeed())
}

var _ = BeforeSuite(func() {
func getDefaultSetupCfg() setupConfig {
_, file, _, _ := runtime.Caller(0)
fileDir := path.Join(path.Dir(file), "../")
basepath := filepath.Dir(fileDir)
localChartPath = filepath.Join(basepath, "deploy/helm-chart")

cfg := setupConfig{
return setupConfig{
releaseName: releaseName,
chartPath: localChartPath,
gwAPIVersion: *gatewayAPIVersion,
deploy: true,
}
}

var _ = BeforeSuite(func() {
cfg := getDefaultSetupCfg()

labelFilter := GinkgoLabelFilter()
cfg.nfr = isNFR(labelFilter)

// Skip deployment if:
// - running upgrade test (this test will deploy its own version)
// - running longevity teardown (deployment will already exist)
if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") {
// - running telemetry test (NGF will be deployed as part of the test)
if strings.Contains(labelFilter, "upgrade") ||
strings.Contains(labelFilter, "longevity-teardown") ||
strings.Contains(labelFilter, "telemetry") {
cfg.deploy = false
}

Expand Down
Loading
Loading