From 5b70b1e43f8db81ff38f2f721ed132009d5e7567 Mon Sep 17 00:00:00 2001 From: Tom Pantelis Date: Thu, 17 Oct 2024 11:51:17 -0400 Subject: [PATCH] Adjust OVN TransitSwitchIP initialization We've observed E2E failures on route agent restart that was caused by the addition of the TransitSwitchIP refactoring. On code inspection, there's a potential timing issue related to initialization of the TransitSwitchIP. The Init method is called by NonGatewayRouteHandler.Init however the NewNonGatewayRouteController is started before that in Handler.Init so, depending on timing, nonGatewayRouteCreatedOrUpdated could get invoked prior to initialization of the TransitSwitchIP, in which case it would observe an empty IP. To avoid this, move the TransitSwitchIP.Init call to Handler.Init prior to starting the NewNonGatewayRouteController. Signed-off-by: Tom Pantelis --- pkg/routeagent_driver/handlers/ovn/handler.go | 7 ++++++- pkg/routeagent_driver/handlers/ovn/handler_test.go | 1 - .../handlers/ovn/non_gateway_route_handler.go | 7 ++----- .../handlers/ovn/non_gateway_route_handler_test.go | 4 +++- pkg/routeagent_driver/main.go | 2 +- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pkg/routeagent_driver/handlers/ovn/handler.go b/pkg/routeagent_driver/handlers/ovn/handler.go index 9e9097dff..2e1de1cad 100644 --- a/pkg/routeagent_driver/handlers/ovn/handler.go +++ b/pkg/routeagent_driver/handlers/ovn/handler.go @@ -51,7 +51,7 @@ type HandlerConfig struct { DynClient dynamic.Interface WatcherConfig *watcher.Config NewOVSDBClient NewOVSDBClientFn - TransitSwitchIP TransitSwitchIPGetter + TransitSwitchIP TransitSwitchIP } type Handler struct { @@ -114,6 +114,11 @@ func (ovn *Handler) Init() error { return errors.Wrapf(err, "error getting connection handler to connect to OvnDB") } + err = ovn.TransitSwitchIP.Init(ovn.K8sClient) + if err != nil { + return errors.Wrap(err, "error initializing TransitSwitchIP") + } + gatewayRouteController, err := NewGatewayRouteController(*ovn.WatcherConfig, connectionHandler, ovn.Namespace) if err != nil { return err diff --git a/pkg/routeagent_driver/handlers/ovn/handler_test.go b/pkg/routeagent_driver/handlers/ovn/handler_test.go index 4565fb17e..fbaa0b840 100644 --- a/pkg/routeagent_driver/handlers/ovn/handler_test.go +++ b/pkg/routeagent_driver/handlers/ovn/handler_test.go @@ -102,7 +102,6 @@ var _ = Describe("Handler", func() { })) Expect(ovsdbClient.Connected()).To(BeTrue()) - Expect(transitSwitchIP.Init(t.k8sClient)).To(Succeed()) }) When("a remote Endpoint is created, updated, and deleted", func() { diff --git a/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler.go b/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler.go index 24992cee9..884bf95dc 100644 --- a/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler.go +++ b/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler.go @@ -31,29 +31,26 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" ) type NonGatewayRouteHandler struct { event.HandlerBase event.NodeHandlerBase smClient submarinerClientset.Interface - k8sClient kubernetes.Interface transitSwitchIP TransitSwitchIP } -func NewNonGatewayRouteHandler(smClient submarinerClientset.Interface, k8sClient kubernetes.Interface, transitSwitchIP TransitSwitchIP, +func NewNonGatewayRouteHandler(smClient submarinerClientset.Interface, transitSwitchIP TransitSwitchIP, ) *NonGatewayRouteHandler { return &NonGatewayRouteHandler{ smClient: smClient, - k8sClient: k8sClient, transitSwitchIP: transitSwitchIP, } } func (h *NonGatewayRouteHandler) Init() error { logger.Info("Starting NonGatewayRouteHandler") - return errors.Wrap(h.transitSwitchIP.Init(h.k8sClient), "error initializing TransitSwitchIP") + return nil } func (h *NonGatewayRouteHandler) GetName() string { diff --git a/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler_test.go b/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler_test.go index d88ab5d60..081359989 100644 --- a/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler_test.go +++ b/pkg/routeagent_driver/handlers/ovn/non_gateway_route_handler_test.go @@ -38,7 +38,9 @@ var _ = Describe("NonGatewayRouteHandler", func() { t := newTestDriver() JustBeforeEach(func() { - t.Start(ovn.NewNonGatewayRouteHandler(t.submClient, t.k8sClient, ovn.NewTransitSwitchIP())) + tsIP := ovn.NewTransitSwitchIP() + t.Start(ovn.NewNonGatewayRouteHandler(t.submClient, tsIP)) + Expect(tsIP.Init(t.k8sClient)).To(Succeed()) }) awaitNonGatewayRoute := func(ep *submarinerv1.Endpoint) { diff --git a/pkg/routeagent_driver/main.go b/pkg/routeagent_driver/main.go index 58a937034..583b44df7 100644 --- a/pkg/routeagent_driver/main.go +++ b/pkg/routeagent_driver/main.go @@ -156,7 +156,7 @@ func main() { TransitSwitchIP: transitSwitchIP, }), ovn.NewGatewayRouteHandler(smClientset), - ovn.NewNonGatewayRouteHandler(smClientset, k8sClientSet, transitSwitchIP), + ovn.NewNonGatewayRouteHandler(smClientset, transitSwitchIP), cabledriver.NewXRFMCleanupHandler(), cabledriver.NewVXLANCleanup(), mtu.NewMTUHandler(env.ClusterCidr, len(env.GlobalCidr) != 0, getTCPMssValue(localNode)),