From a38f75972e84a23f9b063844ebc5d610cebe1713 Mon Sep 17 00:00:00 2001 From: Rafael Fonseca Date: Mon, 12 Aug 2024 17:04:15 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9Belbv2:=20wait=20for=20LB=20active?= =?UTF-8?q?=20state=20instead=20of=20resolving=20DNS=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using DNS name resolution as a way to check the load balancer is working can cause problems that are dependent on the host running CAPA. In some systems, the DNS resolution can fail with very large TTLs cached DNS responses, causing very long provisioning times. Instead of DNS resolution, let's use the AWS API to check for the load balancer "active" state. Waiting for resolvable DNS names should be left for the clients to do. --- controllers/awscluster_controller.go | 8 ------ .../awscluster_controller_unit_test.go | 25 ------------------- controllers/helpers_test.go | 3 +++ pkg/cloud/services/elb/loadbalancer.go | 11 ++++++++ pkg/cloud/services/elb/loadbalancer_test.go | 13 ++++++++++ 5 files changed, 27 insertions(+), 33 deletions(-) diff --git a/controllers/awscluster_controller.go b/controllers/awscluster_controller.go index 13db38000a..709c7ff411 100644 --- a/controllers/awscluster_controller.go +++ b/controllers/awscluster_controller.go @@ -19,7 +19,6 @@ package controllers import ( "context" "fmt" - "net" "time" "github.com/google/go-cmp/cmp" @@ -288,13 +287,6 @@ func (r *AWSClusterReconciler) reconcileLoadBalancer(clusterScope *scope.Cluster return &retryAfterDuration, nil } - clusterScope.Debug("Looking up IP address for DNS", "dns", awsCluster.Status.Network.APIServerELB.DNSName) - if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil { - clusterScope.Error(err, "failed to get IP address for dns name", "dns", awsCluster.Status.Network.APIServerELB.DNSName) - conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "") - clusterScope.Info("Waiting on API server ELB DNS name to resolve") - return &retryAfterDuration, nil - } conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition) awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{ diff --git a/controllers/awscluster_controller_unit_test.go b/controllers/awscluster_controller_unit_test.go index e282d2bf79..be59c0a678 100644 --- a/controllers/awscluster_controller_unit_test.go +++ b/controllers/awscluster_controller_unit_test.go @@ -395,31 +395,6 @@ func TestAWSClusterReconcileOperations(t *testing.T) { g.Expect(err).To(BeNil()) expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameReason}}) }) - t.Run("Should fail AWSCluster create with LoadBalancer reconcile failure with WaitForDNSNameResolve condition as false", func(t *testing.T) { - g := NewWithT(t) - awsCluster := getAWSCluster("test", "test") - runningCluster := func() { - networkSvc.EXPECT().ReconcileNetwork().Return(nil) - sgSvc.EXPECT().ReconcileSecurityGroups().Return(nil) - ec2Svc.EXPECT().ReconcileBastion().Return(nil) - elbSvc.EXPECT().ReconcileLoadbalancers().Return(nil) - } - csClient := setup(t, &awsCluster) - defer teardown() - runningCluster() - cs, err := scope.NewClusterScope( - scope.ClusterScopeParams{ - Client: csClient, - Cluster: &clusterv1.Cluster{}, - AWSCluster: &awsCluster, - }, - ) - awsCluster.Status.Network.APIServerELB.DNSName = "test-apiserver.us-east-1.aws" - g.Expect(err).To(BeNil()) - _, err = reconciler.reconcileNormal(cs) - g.Expect(err).To(BeNil()) - expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameResolveReason}}) - }) }) }) t.Run("Reconcile delete AWSCluster", func(t *testing.T) { diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 09d4402af8..72be2f58b1 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -291,6 +291,9 @@ func mockedCreateLBV2Calls(t *testing.T, m *mocks.MockELBV2APIMockRecorder) { LoadBalancerArn: lbArn, SecurityGroups: aws.StringSlice([]string{"sg-apiserver-lb"}), })).MaxTimes(1) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: []*string{lbArn}, + })).MaxTimes(1) } func mockedDescribeTargetGroupsCall(t *testing.T, m *mocks.MockELBV2APIMockRecorder) { diff --git a/pkg/cloud/services/elb/loadbalancer.go b/pkg/cloud/services/elb/loadbalancer.go index c7beb578d4..9d02347f8b 100644 --- a/pkg/cloud/services/elb/loadbalancer.go +++ b/pkg/cloud/services/elb/loadbalancer.go @@ -118,6 +118,17 @@ func (s *Service) reconcileV2LB(lbSpec *infrav1.AWSLoadBalancerSpec) error { return err } + wReq := &elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{lb.ARN}), + } + s.scope.Debug("Waiting for LB to become active", "api-server-lb-name", lb.Name) + waitStart := time.Now() + if err := s.ELBV2Client.WaitUntilLoadBalancerAvailableWithContext(context.TODO(), wReq); err != nil { + s.scope.Error(err, "failed to wait for LB to become available", "time", time.Since(waitStart)) + return err + } + s.scope.Debug("LB reports active state", "api-server-lb-name", lb.Name, "time", time.Since(waitStart)) + // set up the type for later processing lb.LoadBalancerType = lbSpec.LoadBalancerType if lb.IsManaged(s.scope.Name()) { diff --git a/pkg/cloud/services/elb/loadbalancer_test.go b/pkg/cloud/services/elb/loadbalancer_test.go index f2b4b1dbbe..212192bf13 100644 --- a/pkg/cloud/services/elb/loadbalancer_test.go +++ b/pkg/cloud/services/elb/loadbalancer_test.go @@ -2275,6 +2275,9 @@ func TestReconcileV2LB(t *testing.T) { }, nil, ) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) }, check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) { t.Helper() @@ -2476,6 +2479,10 @@ func TestReconcileV2LB(t *testing.T) { LoadBalancerArn: aws.String(elbArn), Subnets: []*string{}, }).Return(&elbv2.SetSubnetsOutput{}, nil) + + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) }, check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) { t.Helper() @@ -2658,6 +2665,12 @@ func TestReconcileLoadbalancers(t *testing.T) { }, nil, ) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{secondElbArn}), + })).Return(nil) }, check: func(t *testing.T, firstLB *infrav1.LoadBalancer, secondLB *infrav1.LoadBalancer, err error) { t.Helper()