diff --git a/.github/workflows/deps.yml b/.github/workflows/deps.yml index cbd82b64c9a..8612ad4068f 100644 --- a/.github/workflows/deps.yml +++ b/.github/workflows/deps.yml @@ -26,14 +26,14 @@ jobs: - id: govulncheck uses: ./.github/actions/govulncheck with: - go-version-input: 1.21.5 + go-version-input: 1.21.6 go-version-file: go.mod cache: false repo-checkout: false - id: govulncheck-tests-agent uses: ./.github/actions/govulncheck with: - go-version-input: 1.21.5 + go-version-input: 1.21.6 go-version-file: test/agent/go.mod cache: false repo-checkout: false diff --git a/Makefile b/Makefile index e18ab3b26c6..f94cf95aeda 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ # VERSION is the source revision that executables and images are built from. VERSION ?= $(shell git describe --tags --always --dirty || echo "unknown") # GOLANG_IMAGE is the building golang container image used. -GOLANG_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/golang:1.21.5-6-gcc-al2 +GOLANG_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/golang:1.21.6-7-gcc-al2 # BASE_IMAGE_CNI is the base layer image for the primary AWS VPC CNI plugin container BASE_IMAGE_CNI ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-iptables:latest.2 # BASE_IMAGE_CNI_INIT is the base layer image for the AWS VPC CNI init container diff --git a/README.md b/README.md index 5dedb76b7b1..1eafcf0e7d0 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ See [here](./docs/iam-policy.md) for required IAM policies. * `make` defaults to `make build-linux` that builds the Linux binaries. * `unit-test`, `format`,`lint` and `vet` provide ways to run the respective tests/tools and should be run before submitting a PR. -* `make docker` will create a docker container using the docker-build with the finished binaries, with a tag of `amazon/amazon-k8s-cni:latest` -* `make docker-build` uses a docker container (golang:1.16) to build the binaries. -* `make docker-unit-tests` uses a docker container (golang:1.16) to run all unit tests. +* `make docker` will create a docker container using `docker buildx` that contains the finished binaries, with a tag of `amazon/amazon-k8s-cni:latest` +* `make docker-unit-tests` uses a docker container to run all unit tests. +* builds for all build and test actions run in docker containers based on `golang:1.21.5-6-gcc-al2` unless a different `GOLANG_IMAGE` tag is passed in. ## Components diff --git a/charts/aws-vpc-cni/README.md b/charts/aws-vpc-cni/README.md index 3ef9a6dddd7..776ccb02230 100644 --- a/charts/aws-vpc-cni/README.md +++ b/charts/aws-vpc-cni/README.md @@ -122,6 +122,11 @@ for kind in daemonSet clusterRole clusterRoleBinding serviceAccount; do kubectl -n kube-system annotate --overwrite $kind aws-node meta.helm.sh/release-namespace=kube-system kubectl -n kube-system label --overwrite $kind aws-node app.kubernetes.io/managed-by=Helm done + +kubectl -n kube-system annotate --overwrite configmap amazon-vpc-cni meta.helm.sh/release-name=YOUR_HELM_RELEASE_NAME_HERE +kubectl -n kube-system annotate --overwrite configmap amazon-vpc-cni meta.helm.sh/release-namespace=kube-system +kubectl -n kube-system label --overwrite configmap amazon-vpc-cni app.kubernetes.io/managed-by=Helm + ``` ## Migrate from Helm v2 to Helm v3 diff --git a/charts/cni-metrics-helper/README.md b/charts/cni-metrics-helper/README.md index b79d9c3e06c..da05538c6bc 100644 --- a/charts/cni-metrics-helper/README.md +++ b/charts/cni-metrics-helper/README.md @@ -59,6 +59,7 @@ The following table lists the configurable parameters for this chart and their d | serviceAccount.name | The name of the ServiceAccount to use | nil | | serviceAccount.create | Specifies whether a ServiceAccount should be created | true | | serviceAccount.annotations | Specifies the annotations for ServiceAccount | {} | +| podAnnotations | Specifies the annotations for pods | {} | | revisionHistoryLimit | The number of revisions to keep | 10 | | podSecurityContext | SecurityContext to set on the pod | {} | | containerSecurityContext | SecurityContext to set on the container | {} | diff --git a/charts/cni-metrics-helper/templates/deployment.yaml b/charts/cni-metrics-helper/templates/deployment.yaml index 70f75d4e202..adadf2bf7d4 100644 --- a/charts/cni-metrics-helper/templates/deployment.yaml +++ b/charts/cni-metrics-helper/templates/deployment.yaml @@ -12,6 +12,12 @@ spec: k8s-app: cni-metrics-helper template: metadata: + {{- if .Values.podAnnotations }} + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} labels: k8s-app: cni-metrics-helper spec: diff --git a/charts/cni-metrics-helper/values.yaml b/charts/cni-metrics-helper/values.yaml index 644b2d94e6a..7a90e1c57fc 100644 --- a/charts/cni-metrics-helper/values.yaml +++ b/charts/cni-metrics-helper/values.yaml @@ -34,3 +34,5 @@ revisionHistoryLimit: 10 podSecurityContext: {} containerSecurityContext: {} + +podAnnotations: {} diff --git a/misc/10-aws.conflist b/misc/10-aws.conflist index 6304f9d887a..4a549899184 100644 --- a/misc/10-aws.conflist +++ b/misc/10-aws.conflist @@ -1,5 +1,5 @@ { - "cniVersion": "1.0.0", + "cniVersion": "0.4.0", "name": "aws-cni", "disableCheck": true, "plugins": [ diff --git a/scripts/run-static-canary.sh b/scripts/run-static-canary.sh index 59621474fd7..e70b23fb9f8 100755 --- a/scripts/run-static-canary.sh +++ b/scripts/run-static-canary.sh @@ -29,6 +29,7 @@ function run_ginkgo_test() { --ng-name-label-key="kubernetes.io/os" \ --ng-name-label-val="linux" \ --test-image-registry=$TEST_IMAGE_REGISTRY \ + --publish-cw-metrics=true \ $ENDPOINT_OPTION) } diff --git a/test/agent/Dockerfile b/test/agent/Dockerfile index 82ddfa2daf7..4b828713f6e 100644 --- a/test/agent/Dockerfile +++ b/test/agent/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/eks-distro-build-tooling/golang:1.21.4-5-gcc-al2 as builder +FROM public.ecr.aws/eks-distro-build-tooling/golang:1.21.6-7-gcc-al2 as builder WORKDIR /workspace ENV GOPROXY direct diff --git a/test/framework/options.go b/test/framework/options.go index 058bcb381eb..d99eda6c2db 100644 --- a/test/framework/options.go +++ b/test/framework/options.go @@ -48,6 +48,7 @@ type Options struct { PublicRouteTableID string NgK8SVersion string TestImageRegistry string + PublishCWMetrics bool } func (options *Options) BindFlags() { @@ -72,6 +73,7 @@ func (options *Options) BindFlags() { flag.StringVar(&options.PublicRouteTableID, "public-route-table-id", "", "Public route table ID (optional, if specified you must specify all of public/private-subnets, public-route-table-id, and availability-zones)") flag.StringVar(&options.NgK8SVersion, "ng-kubernetes-version", "1.25", `Kubernetes version for self-managed node groups (optional, default is "1.25")`) flag.StringVar(&options.TestImageRegistry, "test-image-registry", "617930562442.dkr.ecr.us-west-2.amazonaws.com", `AWS registry where the e2e test images are stored`) + flag.BoolVar(&options.PublishCWMetrics, "publish-cw-metrics", false, "Option to publish cloudwatch metrics from the test.") } func (options *Options) Validate() error { diff --git a/test/framework/resources/aws/services/cloudwatch.go b/test/framework/resources/aws/services/cloudwatch.go index 297baf9904f..9680ad1bb4f 100644 --- a/test/framework/resources/aws/services/cloudwatch.go +++ b/test/framework/resources/aws/services/cloudwatch.go @@ -21,6 +21,7 @@ import ( type CloudWatch interface { GetMetricStatistics(getMetricStatisticsInput *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) + PutMetricData(input *cloudwatch.PutMetricDataInput) (*cloudwatch.PutMetricDataOutput, error) } type defaultCloudWatch struct { @@ -36,3 +37,7 @@ func NewCloudWatch(session *session.Session) CloudWatch { func (d *defaultCloudWatch) GetMetricStatistics(getMetricStatisticsInput *cloudwatch.GetMetricStatisticsInput) (*cloudwatch.GetMetricStatisticsOutput, error) { return d.CloudWatchAPI.GetMetricStatistics(getMetricStatisticsInput) } + +func (d *defaultCloudWatch) PutMetricData(input *cloudwatch.PutMetricDataInput) (*cloudwatch.PutMetricDataOutput, error) { + return d.CloudWatchAPI.PutMetricData(input) +} diff --git a/test/framework/resources/aws/services/ec2.go b/test/framework/resources/aws/services/ec2.go index 3a5d3ff1f53..4c7f7a4d00b 100644 --- a/test/framework/resources/aws/services/ec2.go +++ b/test/framework/resources/aws/services/ec2.go @@ -47,6 +47,8 @@ type EC2 interface { DeleteKey(keyName string) error DescribeKey(keyName string) (*ec2.DescribeKeyPairsOutput, error) ModifyNetworkInterfaceSecurityGroups(securityGroupIds []*string, networkInterfaceId *string) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) + + DescribeAvailabilityZones() (*ec2.DescribeAvailabilityZonesOutput, error) } type defaultEC2 struct { @@ -67,6 +69,11 @@ func (d *defaultEC2) DescribeInstanceType(instanceType string) ([]*ec2.InstanceT return describeInstanceOp.InstanceTypes, nil } +func (d *defaultEC2) DescribeAvailabilityZones() (*ec2.DescribeAvailabilityZonesOutput, error) { + describeAvailabilityZonesInput := &ec2.DescribeAvailabilityZonesInput{} + return d.EC2API.DescribeAvailabilityZones(describeAvailabilityZonesInput) +} + func (d *defaultEC2) ModifyNetworkInterfaceSecurityGroups(securityGroupIds []*string, networkInterfaceId *string) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) { return d.EC2API.ModifyNetworkInterfaceAttribute(&ec2.ModifyNetworkInterfaceAttributeInput{ NetworkInterfaceId: networkInterfaceId, diff --git a/test/integration/cni/pod_traffic_across_az_test.go b/test/integration/cni/pod_traffic_across_az_test.go index 753744a55fe..175769d9f00 100644 --- a/test/integration/cni/pod_traffic_across_az_test.go +++ b/test/integration/cni/pod_traffic_across_az_test.go @@ -4,6 +4,9 @@ import ( "fmt" "strconv" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/cloudwatch" + "github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest" "github.com/aws/amazon-vpc-cni-k8s/test/framework/utils" "github.com/aws/amazon-vpc-cni-k8s/test/integration/common" @@ -19,6 +22,8 @@ var ( retries = 3 ) +const MetricNamespace = "NetworkingAZConnectivity" + // Tests pod networking across AZs. It similar to pod connectivity test, but launches a daemonset, so that // there is a pod on each node across AZs. It then tests connectivity between pods on different nodes across AZs. var _ = Describe("[STATIC_CANARY] test pod networking", FlakeAttempts(retries), func() { @@ -50,6 +55,9 @@ var _ = Describe("[STATIC_CANARY] test pod networking", FlakeAttempts(retries), // Map of AZ name, string to pod of testDaemonSet azToTestPod map[string]coreV1.Pod + + // Map of AZ name, string to AZ ID for the account. + azToazID map[string]string ) JustBeforeEach(func() { @@ -88,7 +96,7 @@ var _ = Describe("[STATIC_CANARY] test pod networking", FlakeAttempts(retries), Expect(err).ToNot(HaveOccurred()) - azToTestPod = GetAZtoPod(nodes) + azToTestPod, azToazID = GetAZMappings(nodes) }) JustAfterEach(func() { @@ -128,14 +136,26 @@ var _ = Describe("[STATIC_CANARY] test pod networking", FlakeAttempts(retries), }) It("Should allow TCP traffic across AZs.", func() { - CheckConnectivityBetweenPods(azToTestPod, serverPort, testerExpectedStdOut, testerExpectedStdErr, testConnectionCommandFunc) + CheckConnectivityBetweenPods(azToTestPod, azToazID, serverPort, testerExpectedStdOut, testerExpectedStdErr, testConnectionCommandFunc) }) }) }) -func GetAZtoPod(nodes coreV1.NodeList) map[string]coreV1.Pod { +// Functio to Az to Pod mapping and Az to AZ ID mapping +func GetAZMappings(nodes coreV1.NodeList) (map[string]coreV1.Pod, map[string]string) { // Map of AZ name to Pod from Daemonset running on nodes azToPod := make(map[string]coreV1.Pod) + // Map of AZ name to AZ ID + azToazID := make(map[string]string) + + describeAZOutput, err := f.CloudServices.EC2().DescribeAvailabilityZones() + + if err != nil { + // Don't fail the test if we can't describe AZs. The failure will be caught by the test + // We use describe AZs to get the AZ ID for metrics. + fmt.Println("Error while describing AZs", err) + } + for i := range nodes.Items { // node label key "topology.kubernetes.io/zone" is well known label populated by cloud controller manager // guaranteed to be present and represent the AZ name @@ -149,18 +169,21 @@ func GetAZtoPod(nodes coreV1.NodeList) map[string]coreV1.Pod { if len(interfaceToPodList.PodsOnPrimaryENI) > 0 { azToPod[azName] = interfaceToPodList.PodsOnPrimaryENI[0] } + + azToazID[azName] = *describeAZOutput.AvailabilityZones[i].ZoneId } - return azToPod + return azToPod, azToazID } -var _ = Describe("[STATIC_CANARY2] API Server Connectivity from AZs", FlakeAttempts(retries), func() { +var _ = Describe("[STATIC_CANARY] API Server Connectivity from AZs", FlakeAttempts(retries), func() { var ( err error testDaemonSet *v1.DaemonSet // Map of AZ name to Pod of testDaemonSet running on nodes - azToPod map[string]coreV1.Pod + azToPod map[string]coreV1.Pod + azToazID map[string]string ) JustBeforeEach(func() { @@ -190,7 +213,8 @@ var _ = Describe("[STATIC_CANARY2] API Server Connectivity from AZs", FlakeAttem nodes, err := f.K8sResourceManagers.NodeManager().GetNodes(f.Options.NgNameLabelKey, f.Options.NgNameLabelVal) Expect(err).ToNot(HaveOccurred()) - azToPod = GetAZtoPod(nodes) + azToPod, azToazID = GetAZMappings(nodes) + }) JustAfterEach(func() { @@ -208,21 +232,22 @@ var _ = Describe("[STATIC_CANARY2] API Server Connectivity from AZs", FlakeAttem APIServerNLBEndpoint := fmt.Sprintf("%s/api", *describeClusterOutput.Cluster.Endpoint) APIServerInternalEndpoint := "https://kubernetes.default.svc/api" - CheckAPIServerConnectivityFromPods(azToPod, APIServerInternalEndpoint) + CheckAPIServerConnectivityFromPods(azToPod, azToazID, APIServerInternalEndpoint) - CheckAPIServerConnectivityFromPods(azToPod, APIServerNLBEndpoint) + CheckAPIServerConnectivityFromPods(azToPod, azToazID, APIServerNLBEndpoint) }) }) }) -func CheckAPIServerConnectivityFromPods(azToPod map[string]coreV1.Pod, api_server_url string) { +func CheckAPIServerConnectivityFromPods(azToPod map[string]coreV1.Pod, azToazId map[string]string, api_server_url string) { // Standard paths for SA token, CA cert and API Server URL token_path := "/var/run/secrets/kubernetes.io/serviceaccount/token" cacert := "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + const MetricName = "APIServerConnectivity" for az := range azToPod { - fmt.Printf("Testing API Server %s Connectivity from AZ %s \n", api_server_url, az) + fmt.Printf("Testing API Server %s Connectivity from AZ %s AZID %s \n", api_server_url, az, azToazId[az]) sa_token := []string{"cat", token_path} token_value, _, err := RunCommandOnPod(azToPod[az], sa_token) @@ -240,19 +265,63 @@ func CheckAPIServerConnectivityFromPods(azToPod map[string]coreV1.Pod, api_serve Expect(api_server_stdout).ToNot(BeEmpty()) Expect(api_server_stdout).To(ContainSubstring("APIVersions")) fmt.Printf("API Server %s Connectivity from AZ %s was successful.\n", api_server_url, az) + + if f.Options.PublishCWMetrics { + putmetricData := cloudwatch.PutMetricDataInput{ + Namespace: aws.String(MetricNamespace), + MetricData: []*cloudwatch.MetricDatum{ + { + MetricName: aws.String(MetricName), + Unit: aws.String("Count"), + Value: aws.Float64(1), + Dimensions: []*cloudwatch.Dimension{ + { + Name: aws.String("AZID"), + Value: aws.String(azToazId[az]), + }, + }, + }, + }, + } + + _, err = f.CloudServices.CloudWatch().PutMetricData(&putmetricData) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error while putting metric data for API Server Connectivity from %s", az)) + } } } -func CheckConnectivityBetweenPods(azToPod map[string]coreV1.Pod, port int, testerExpectedStdOut string, testerExpectedStdErr string, getTestCommandFunc func(serverPod coreV1.Pod, port int) []string) { +func CheckConnectivityBetweenPods(azToPod map[string]coreV1.Pod, azToazId map[string]string, port int, testerExpectedStdOut string, testerExpectedStdErr string, getTestCommandFunc func(serverPod coreV1.Pod, port int) []string) { + const MetricName = "InterAZConnectivity" By("checking connection on same node, primary to primary") for az1 := range azToPod { for az2 := range azToPod { if az1 != az2 { - fmt.Printf("Testing Connectivity from Pod IP1 %s (%s) to Pod IP2 %s (%s) \n", - azToPod[az1].Status.PodIP, az1, azToPod[az2].Status.PodIP, az2) + fmt.Printf("Testing Connectivity from Pod IP1 %s (%s, %s) to Pod IP2 %s (%s, %s) \n", + azToPod[az1].Status.PodIP, az1, azToazId[az1], azToPod[az2].Status.PodIP, az2, azToazId[az2]) testConnectivity(azToPod[az1], azToPod[az2], testerExpectedStdOut, testerExpectedStdErr, port, getTestCommandFunc) + + if f.Options.PublishCWMetrics { + putmetricData := cloudwatch.PutMetricDataInput{ + Namespace: aws.String(MetricNamespace), + MetricData: []*cloudwatch.MetricDatum{ + { + MetricName: aws.String(MetricName), + Unit: aws.String("Count"), + Value: aws.Float64(1), + Dimensions: []*cloudwatch.Dimension{ + { + Name: aws.String("AZID"), + Value: aws.String(azToazId[az1]), + }, + }, + }, + }, + } + _, err := f.CloudServices.CloudWatch().PutMetricData(&putmetricData) + Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("Error while putting metric data for API Server Connectivity from %s", azToazId[az1])) + } } } } diff --git a/testdata/deploy-130-pods.yaml b/testdata/deploy-130-pods.yaml index 48537f46eb5..7d6c86bc14e 100644 --- a/testdata/deploy-130-pods.yaml +++ b/testdata/deploy-130-pods.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: hello - image: registry.k8s.io/pause:latest + image: public.ecr.aws/eks-distro/kubernetes/pause:3.9 ports: - name: http containerPort: 80 diff --git a/testdata/deploy-5000-pods.yaml b/testdata/deploy-5000-pods.yaml index 5534fa5965c..a5929079aa7 100644 --- a/testdata/deploy-5000-pods.yaml +++ b/testdata/deploy-5000-pods.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: hello - image: registry.k8s.io/pause:latest + image: public.ecr.aws/eks-distro/kubernetes/pause:3.9 ports: - name: http containerPort: 80 diff --git a/testdata/deploy-730-pods.yaml b/testdata/deploy-730-pods.yaml index 0d9d1d9a832..74bdcb07917 100644 --- a/testdata/deploy-730-pods.yaml +++ b/testdata/deploy-730-pods.yaml @@ -17,7 +17,7 @@ spec: spec: containers: - name: hello - image: registry.k8s.io/pause:latest + image: public.ecr.aws/eks-distro/kubernetes/pause:3.9 ports: - name: http containerPort: 80