Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pod identity credential integration test #450

Merged
merged 5 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions environment/eksdeploymenttype/eks_deployment_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@ import "strings"
type EKSDeploymentType string

const (
DAEMON EKSDeploymentType = "DAEMON"
REPLICA EKSDeploymentType = "REPLICA"
SIDECAR EKSDeploymentType = "SIDECAR"
STATEFUL EKSDeploymentType = "STATEFUL"
DAEMON EKSDeploymentType = "DAEMON"
REPLICA EKSDeploymentType = "REPLICA"
SIDECAR EKSDeploymentType = "SIDECAR"
STATEFUL EKSDeploymentType = "STATEFUL"
PODIDENTITY EKSDeploymentType = "PODIDENTITY"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems kinda interesting to put PodIdentity here...it's not necessarily a deployment type but a credential provider. Technically we can have podidentity enabled with any of these deployment options right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. I added here just as an easy way to have a different way of running EKS tests with basics without adding another variable in the class which felt redundant.

)

var (
eksDeploymentTypes = map[string]EKSDeploymentType{
"DAEMON": DAEMON,
"REPLICA": REPLICA,
"SIDECAR": SIDECAR,
"STATEFUL": STATEFUL,
"DAEMON": DAEMON,
"REPLICA": REPLICA,
"SIDECAR": SIDECAR,
"STATEFUL": STATEFUL,
"PODIDENTITY": PODIDENTITY,
}
)

Expand Down
4 changes: 4 additions & 0 deletions generator/test_case_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ var testTypeToTestConfig = map[string][]testConfig{
testDir: "./test/efa", terraformDir: "terraform/eks/daemon/efa",
targets: map[string]map[string]struct{}{"arc": {"amd64": {}}},
},
{
testDir: "./test/metric_value_benchmark", terraformDir: "terraform/eks/daemon/credentials/pod_identity",
targets: map[string]map[string]struct{}{"arc": {"amd64": {}}},
},
},
"eks_deployment": {
{testDir: "./test/metric_value_benchmark"},
Expand Down
324 changes: 324 additions & 0 deletions terraform/eks/daemon/credentials/pod_identity/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

module "common" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious -- is there a way we can re-use existing terraform modules from our other eks tests? So that we aren't adding all this terraform code for every new eks test we add

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think should be possible but probably need to refactor some code. Don't think we should do it in this PR since that adds additional code that's not necessarily related to the goal of this PR.

source = "../../../../common"
}

module "basic_components" {
source = "../../../../basic_components"

region = var.region
}

locals {
aws_eks = "aws eks --region ${var.region}"
}

data "aws_eks_cluster_auth" "this" {
name = aws_eks_cluster.this.name
}

resource "aws_eks_cluster" "this" {
name = "cwagent-eks-integ-${module.common.testing_id}"
role_arn = module.basic_components.role_arn
version = var.k8s_version
vpc_config {
subnet_ids = module.basic_components.public_subnet_ids
security_group_ids = [module.basic_components.security_group]
}
}

# EKS Node Groups
resource "aws_eks_node_group" "this" {
cluster_name = aws_eks_cluster.this.name
node_group_name = "cwagent-eks-integ-node-${module.common.testing_id}"
node_role_arn = aws_iam_role.node_role.arn
subnet_ids = module.basic_components.public_subnet_ids

scaling_config {
desired_size = 1
max_size = 1
min_size = 1
}

ami_type = var.ami_type
capacity_type = "ON_DEMAND"
disk_size = 20
instance_types = [var.instance_type]

depends_on = [
aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly,
aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy,
aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy,
aws_iam_role_policy_attachment.pod_CloudWatchAgentServerPolicy
]
}

resource "aws_eks_addon" "pod_identity_addon" {
cluster_name = aws_eks_cluster.this.name
addon_name = "eks-pod-identity-agent"
depends_on = [aws_eks_node_group.this]
}

# EKS Node IAM Role
resource "aws_iam_role" "node_role" {
name = "cwagent-eks-Worker-Role-${module.common.testing_id}"

assume_role_policy = <<POLICY
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
POLICY
}

data "aws_iam_policy_document" "pod-identity-policy" {
statement {
effect = "Allow"

principals {
type = "Service"
identifiers = ["pods.eks.amazonaws.com"]
}

actions = [
"sts:AssumeRole",
"sts:TagSession"
]
}
}


resource "aws_iam_role" "pod-identity-role" {
name = "cwagent-eks-pod-identity-role-${module.common.testing_id}"
assume_role_policy = data.aws_iam_policy_document.pod-identity-policy.json
}

resource "aws_iam_role_policy_attachment" "node_AmazonEKSWorkerNodePolicy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
role = aws_iam_role.node_role.name
}

resource "aws_iam_role_policy_attachment" "node_AmazonEKS_CNI_Policy" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
role = aws_iam_role.node_role.name
}

resource "aws_iam_role_policy_attachment" "node_AmazonEC2ContainerRegistryReadOnly" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
role = aws_iam_role.node_role.name
}

resource "aws_iam_role_policy_attachment" "pod_CloudWatchAgentServerPolicy" {
policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"
role = aws_iam_role.pod-identity-role.name
}

resource "aws_eks_pod_identity_association" "association" {
cluster_name = aws_eks_cluster.this.name
namespace = "amazon-cloudwatch"
service_account = "cloudwatch-agent"
role_arn = aws_iam_role.pod-identity-role.arn
depends_on = [aws_eks_cluster.this]
}

# TODO: these security groups be created once and then reused
# EKS Cluster Security Group
resource "aws_security_group" "eks_cluster_sg" {
name = "cwagent-eks-cluster-sg-${module.common.testing_id}"
description = "Cluster communication with worker nodes"
vpc_id = module.basic_components.vpc_id
}

resource "aws_security_group_rule" "cluster_inbound" {
description = "Allow worker nodes to communicate with the cluster API Server"
from_port = 443
protocol = "tcp"
security_group_id = aws_security_group.eks_cluster_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 443
type = "ingress"
}

resource "aws_security_group_rule" "cluster_outbound" {
description = "Allow cluster API Server to communicate with the worker nodes"
from_port = 1024
protocol = "tcp"
security_group_id = aws_security_group.eks_cluster_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 65535
type = "egress"
}


# EKS Node Security Group
resource "aws_security_group" "eks_nodes_sg" {
name = "cwagent-eks-node-sg-${module.common.testing_id}"
description = "Security group for all nodes in the cluster"
vpc_id = module.basic_components.vpc_id

egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

resource "aws_security_group_rule" "nodes_internal" {
description = "Allow nodes to communicate with each other"
from_port = 0
protocol = "-1"
security_group_id = aws_security_group.eks_nodes_sg.id
source_security_group_id = aws_security_group.eks_nodes_sg.id
to_port = 65535
type = "ingress"
}

resource "aws_security_group_rule" "nodes_cluster_inbound" {
description = "Allow worker Kubelets and pods to receive communication from the cluster control plane"
from_port = 1025
protocol = "tcp"
security_group_id = aws_security_group.eks_nodes_sg.id
source_security_group_id = aws_security_group.eks_cluster_sg.id
to_port = 65535
type = "ingress"
}

resource "null_resource" "clone_helm_chart" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we are using helm over the eks addon?

Wondering what it is this integ test is testing -- I would assume we would want to install the addon + pod identity enabled + the release artifact of the agent = verify functionality works / calls to cw work

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used helm chart for easier testing. For example, I can check out a new branch from helm chart repo with custom changes, then run terraform on my local with the said changes. Using add-on have the same effect but will be harder to test custom changes.

triggers = {
timestamp = "${timestamp()}" # Forces re-run on every apply
}
provisioner "local-exec" {
command = <<-EOT
if [ ! -d "./helm-charts" ]; then
git clone -b ${var.helm_chart_branch} https://github.com/aws-observability/helm-charts.git ./helm-charts
fi
EOT
}
}

resource "helm_release" "aws_observability" {
name = "amazon-cloudwatch-observability"
chart = "./helm-charts/charts/amazon-cloudwatch-observability"
namespace = "amazon-cloudwatch"
create_namespace = true

set {
name = "clusterName"
value = aws_eks_cluster.this.name
}

set {
name = "region"
value = "us-west-2"
}
depends_on = [
aws_eks_cluster.this,
aws_eks_node_group.this,
null_resource.clone_helm_chart,
aws_eks_addon.pod_identity_addon,
aws_eks_pod_identity_association.association,
]
}

resource "null_resource" "kubectl" {
depends_on = [
aws_eks_cluster.this,
aws_eks_node_group.this,
]
provisioner "local-exec" {
command = <<-EOT
${local.aws_eks} update-kubeconfig --name ${aws_eks_cluster.this.name}
${local.aws_eks} list-clusters --output text
${local.aws_eks} describe-cluster --name ${aws_eks_cluster.this.name} --output text
EOT
}
}

resource "null_resource" "update_image" {
depends_on = [helm_release.aws_observability, null_resource.kubectl]
triggers = {
timestamp = "${timestamp()}" # Forces re-run on every apply
}
provisioner "local-exec" {
command = <<-EOT
kubectl -n amazon-cloudwatch patch AmazonCloudWatchAgent cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": "${var.cwagent_image_repo}:${var.cwagent_image_tag}"}]'
kubectl set image deployment/amazon-cloudwatch-observability-controller-manager -n amazon-cloudwatch manager=public.ecr.aws/cloudwatch-agent/cloudwatch-agent-operator:latest
kubectl set image daemonset/fluent-bit -n amazon-cloudwatch fluent-bit=506463145083.dkr.ecr.us-west-2.amazonaws.com/fluent-bit-test:latest
sleep 10
EOT
}
}

resource "kubernetes_pod" "log_generator" {
depends_on = [aws_eks_node_group.this]
metadata {
name = "log-generator"
namespace = "default"
}

spec {
container {
name = "log-generator"
image = "busybox"

# Run shell script that generate a log line every second
command = ["/bin/sh", "-c"]
args = ["while true; do echo \"Log entry at $(date)\"; sleep 1; done"]
}
restart_policy = "Always"
}
}

# Get the single instance ID of the node in the node group
data "aws_instances" "eks_node" {
depends_on = [
aws_eks_node_group.this
]
filter {
name = "tag:eks:nodegroup-name"
values = [aws_eks_node_group.this.node_group_name]
}
}

# Retrieve details of the single instance to get private DNS
data "aws_instance" "eks_node_detail" {
depends_on = [
data.aws_instances.eks_node
]
instance_id = data.aws_instances.eks_node.ids[0]
}

resource "null_resource" "validator" {
depends_on = [
aws_eks_node_group.this,
aws_eks_addon.pod_identity_addon,
helm_release.aws_observability,
null_resource.update_image,
kubernetes_pod.log_generator,
]

triggers = {
always_run = timestamp()
}

provisioner "local-exec" {
command = <<-EOT
echo "Validating CloudWatch Agent and FluentBit with pod identity credential"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

technically...we are testing against cloudwatch agent, not fluentbit. Or are we pulling the latest fluentbit version in this test?

And if we are...I don't think we should block our agent release if fluentbit fails.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in new commit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/aws/amazon-cloudwatch-agent/actions/runs/12952146544 new integration test run. random surprise that all EKS tests passing

Alt Text

cd ../../../../..
go test ./test/metric_value_benchmark -timeout 1h -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=PODIDENTITY -instanceId=${data.aws_instance.eks_node_detail.instance_id} &&
go test ./test/fluent -eksClusterName=${aws_eks_cluster.this.name} -computeType=EKS -v -eksDeploymentStrategy=DAEMON
EOT
}
}

29 changes: 29 additions & 0 deletions terraform/eks/daemon/credentials/pod_identity/providers.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: MIT

provider "aws" {
region = var.region
}

provider "kubernetes" {
exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
args = ["eks", "get-token", "--cluster-name", aws_eks_cluster.this.name]
}
host = aws_eks_cluster.this.endpoint
cluster_ca_certificate = base64decode(aws_eks_cluster.this.certificate_authority.0.data)
token = data.aws_eks_cluster_auth.this.token
}

provider "helm" {
kubernetes {
host = aws_eks_cluster.this.endpoint
cluster_ca_certificate = base64decode(aws_eks_cluster.this.certificate_authority.0.data)
exec {
api_version = "client.authentication.k8s.io/v1beta1"
args = ["eks", "get-token", "--cluster-name", aws_eks_cluster.this.name]
command = "aws"
}
}
}
Loading
Loading