From efeeb8809e58a71e9d688ed259afbf7589c3e642 Mon Sep 17 00:00:00 2001 From: Kirill Sibirev Date: Tue, 16 Apr 2024 15:25:47 +0200 Subject: [PATCH] Add UpdateStrategy support --- controllers/sync.go | 266 ++++++++++++++++++++++++++++++++++++--- pkg/apiproxy/ytsaurus.go | 8 +- 2 files changed, 254 insertions(+), 20 deletions(-) diff --git a/controllers/sync.go b/controllers/sync.go index 00129eed..d8d3fc85 100644 --- a/controllers/sync.go +++ b/controllers/sync.go @@ -2,6 +2,7 @@ package controllers import ( "context" + "fmt" "time" "github.com/ytsaurus/yt-k8s-operator/pkg/components" @@ -14,7 +15,7 @@ import ( apiProxy "github.com/ytsaurus/yt-k8s-operator/pkg/apiproxy" ) -func (r *YtsaurusReconciler) handleUpdatingStateFullMode( +func (r *YtsaurusReconciler) handleFullStrategy( ctx context.Context, ytsaurus *apiProxy.Ytsaurus, componentManager *ComponentManager, @@ -160,7 +161,7 @@ func (r *YtsaurusReconciler) handleUpdatingStateFullMode( return nil, nil } -func (r *YtsaurusReconciler) handleUpdatingStateLocalMode( +func (r *YtsaurusReconciler) handleStatelessStrategy( ctx context.Context, ytsaurus *apiProxy.Ytsaurus, componentManager *ComponentManager, @@ -232,6 +233,159 @@ func (r *YtsaurusReconciler) handleUpdatingStateLocalMode( return nil, nil } +func (r *YtsaurusReconciler) handleMasterOnlyStrategy( + ctx context.Context, + ytsaurus *apiProxy.Ytsaurus, + componentManager *ComponentManager, +) (*ctrl.Result, error) { + resource := ytsaurus.GetResource() + + switch resource.Status.UpdateStatus.State { + case ytv1.UpdateStateNone: + ytsaurus.LogUpdate(ctx, "Checking the possibility of updating") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStatePossibilityCheck) + return &ctrl.Result{Requeue: true}, err + + case ytv1.UpdateStatePossibilityCheck: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionHasPossibility) { + ytsaurus.LogUpdate(ctx, "Waiting for safe mode enabled") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSafeModeEnabled) + return &ctrl.Result{Requeue: true}, err + } else if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionNoPossibility) { + ytsaurus.LogUpdate(ctx, "Update is impossible, need to apply previous images") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateImpossibleToStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateImpossibleToStart: + if !componentManager.needSync() || !ytsaurus.GetResource().Spec.EnableFullUpdate { + ytsaurus.LogUpdate(ctx, "Spec changed back or full update isn't enabled, update is canceling") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateCancelUpdate) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSafeModeEnabled: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSafeModeEnabled) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells saving") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSnapshots) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSnapshots: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSnaphotsSaved) { + ytsaurus.LogUpdate(ctx, "Waiting for pods removal") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsRemoval) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsRemoval: + if componentManager.arePodsRemoved() { + ytsaurus.LogUpdate(ctx, "Waiting for pods creation") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsCreation) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsCreation: + if componentManager.allReadyOrUpdating() { + ytsaurus.LogUpdate(ctx, "All components were recreated") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForMasterExitReadOnly) + return &ctrl.Result{RequeueAfter: time.Second * 7}, err + } + + case ytv1.UpdateStateWaitingForMasterExitReadOnly: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionMasterExitedReadOnly) { + ytsaurus.LogUpdate(ctx, "Masters exited read-only state") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSafeModeDisabled) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSafeModeDisabled: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSafeModeDisabled) { + ytsaurus.LogUpdate(ctx, "Finishing") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateUpdateFinishing) + return &ctrl.Result{Requeue: true}, err + } + } + return nil, nil +} + +func (r *YtsaurusReconciler) handleTabletNodesOnlyStrategy( + ctx context.Context, + ytsaurus *apiProxy.Ytsaurus, + componentManager *ComponentManager, +) (*ctrl.Result, error) { + resource := ytsaurus.GetResource() + + switch resource.Status.UpdateStatus.State { + case ytv1.UpdateStateNone: + ytsaurus.LogUpdate(ctx, "Checking the possibility of updating") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStatePossibilityCheck) + return &ctrl.Result{Requeue: true}, err + + case ytv1.UpdateStatePossibilityCheck: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionHasPossibility) { + ytsaurus.LogUpdate(ctx, "Waiting for safe mode enabled") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsSaving) + return &ctrl.Result{Requeue: true}, err + } else if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionNoPossibility) { + ytsaurus.LogUpdate(ctx, "Update is impossible, need to apply previous images") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateImpossibleToStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateImpossibleToStart: + if !componentManager.needSync() || !ytsaurus.GetResource().Spec.EnableFullUpdate { + ytsaurus.LogUpdate(ctx, "Spec changed back or full update isn't enabled, update is canceling") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateCancelUpdate) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsSaving: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsSaved) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells removing to start") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRemovingStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRemovingStart: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRemovingStarted) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells removing to finish") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRemoved) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRemoved: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRemoved) { + ytsaurus.LogUpdate(ctx, "Waiting for snapshots") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsRemoval) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsRemoval: + if componentManager.arePodsRemoved() { + ytsaurus.LogUpdate(ctx, "Waiting for pods creation") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsCreation) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsCreation: + if componentManager.allReadyOrUpdating() { + ytsaurus.LogUpdate(ctx, "All components were recreated") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRecovery) + return &ctrl.Result{RequeueAfter: time.Second * 7}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRecovery: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRecovered) { + ytsaurus.LogUpdate(ctx, "Finishing") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateUpdateFinishing) + return &ctrl.Result{Requeue: true}, err + } + } + + return nil, nil +} + func getComponentNames(components []components.Component) []string { if components == nil { return nil @@ -243,35 +397,97 @@ func getComponentNames(components []components.Component) []string { return names } +type updateMeta struct { + strategy ytv1.UpdateStrategy + componentNames []string +} + // chooseUpdateStrategy considers spec decides if operator should proceed with update or block. // Block is indicated with non-empty blockMsg. -// Component names which are chosen for update are return in names slice. -// Nil names slice means "full update". -func chooseUpdateStrategy(spec ytv1.YtsaurusSpec, needUpdate []components.Component) (names []string, blockMsg string) { +// If update is not blocked updateMeta with chosen strategy and component names to update returned. +func chooseUpdateStrategy(spec ytv1.YtsaurusSpec, needUpdate []components.Component) (meta updateMeta, blockMsg string) { isFullUpdateEnabled := spec.EnableFullUpdate + configuredStrategy := spec.UpdateStrategy masterNeedsUpdate := false tabletNodesNeedUpdate := false + statelessNeedUpdate := false + var masterNames []string + var tabletNodeNames []string + var statelessNames []string for _, comp := range needUpdate { if comp.GetType() == consts.MasterType { masterNeedsUpdate = true + masterNames = append(masterNames, comp.GetName()) continue } if comp.GetType() == consts.TabletNodeType { tabletNodesNeedUpdate = true + tabletNodeNames = append(tabletNodeNames, comp.GetName()) continue } + statelessNames = append(statelessNames, comp.GetName()) + statelessNeedUpdate = true } statefulNeedUpdate := masterNeedsUpdate || tabletNodesNeedUpdate - if statefulNeedUpdate { - if isFullUpdateEnabled { - return nil, "" - } else { - return nil, "Full update is not allowed by enableFullUpdate field, ignoring it" + allNamesNeedingUpdate := getComponentNames(needUpdate) + + // Fallback to EnableFullUpdate field. + if configuredStrategy == ytv1.UpdateStrategyNone { + if statefulNeedUpdate { + if isFullUpdateEnabled { + return updateMeta{strategy: ytv1.UpdateStrategyFull, componentNames: nil}, "" + } else { + return updateMeta{strategy: "", componentNames: nil}, "Full update is not allowed by enableFullUpdate field, ignoring it" + } } + return updateMeta{strategy: ytv1.UpdateStrategyStatelessOnly, componentNames: allNamesNeedingUpdate}, "" + } + + switch configuredStrategy { + case ytv1.UpdateStrategyBlocked: + return updateMeta{}, "All updates are blocked by updateStrategy field." + case ytv1.UpdateStrategyFull: + if statefulNeedUpdate { + return updateMeta{ + strategy: ytv1.UpdateStrategyFull, + componentNames: nil, + }, "" + } else { + return updateMeta{ + strategy: ytv1.UpdateStrategyStatelessOnly, + componentNames: allNamesNeedingUpdate, + }, "" + } + case ytv1.UpdateStrategyMasterOnly: + if !masterNeedsUpdate { + return updateMeta{}, "Only Master update is allowed by updateStrategy, but it doesn't need update" + } + return updateMeta{ + strategy: ytv1.UpdateStrategyMasterOnly, + componentNames: masterNames, + }, "" + case ytv1.UpdateStrategyTabletNodesOnly: + if !tabletNodesNeedUpdate { + return updateMeta{}, "Only Tablet nodes update is allowed by updateStrategy, but they don't need update" + } + return updateMeta{ + strategy: ytv1.UpdateStrategyTabletNodesOnly, + componentNames: tabletNodeNames, + }, "" + case ytv1.UpdateStrategyStatelessOnly: + if !statelessNeedUpdate { + return updateMeta{}, "Only stateless components update is allowed by configuration, but they don't need update" + } + return updateMeta{ + strategy: ytv1.UpdateStrategyStatelessOnly, + componentNames: statelessNames, + }, "" + default: + // TODO: just validate it in hook + return updateMeta{}, fmt.Sprintf("Unexpected update strategy %s", configuredStrategy) } - return getComponentNames(needUpdate), "" } func (r *YtsaurusReconciler) Sync(ctx context.Context, resource *ytv1.Ytsaurus) (ctrl.Result, error) { @@ -314,23 +530,35 @@ func (r *YtsaurusReconciler) Sync(ctx context.Context, resource *ytv1.Ytsaurus) return ctrl.Result{Requeue: true}, err case componentManager.needUpdate() != nil: - componentNames, blockMsg := chooseUpdateStrategy(ytsaurus.GetResource().Spec, componentManager.needUpdate()) + meta, blockMsg := chooseUpdateStrategy(ytsaurus.GetResource().Spec, componentManager.needUpdate()) if blockMsg != "" { logger.Info(blockMsg) return ctrl.Result{}, nil } - logger.Info("Ytsaurus needs components update", "components", componentNames) - err := ytsaurus.SaveUpdatingClusterState(ctx, componentNames) - return ctrl.Result{Requeue: true}, err + logger.Info("Ytsaurus needs components update", + "components", meta.componentNames, + "strategy", meta.strategy, + ) + err = ytsaurus.SaveUpdatingClusterState(ctx, meta.strategy, meta.componentNames) + if err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{Requeue: true}, nil } case ytv1.ClusterStateUpdating: var result *ctrl.Result var err error - if ytsaurus.GetLocalUpdatingComponents() != nil { - result, err = r.handleUpdatingStateLocalMode(ctx, ytsaurus, componentManager) - } else { - result, err = r.handleUpdatingStateFullMode(ctx, ytsaurus, componentManager) + + switch ytsaurus.GetUpdateStrategy() { + case ytv1.UpdateStrategyFull: + result, err = r.handleFullStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyStatelessOnly: + result, err = r.handleStatelessStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyMasterOnly: + result, err = r.handleMasterOnlyStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyTabletNodesOnly: + result, err = r.handleTabletNodesOnlyStrategy(ctx, ytsaurus, componentManager) } if result != nil { diff --git a/pkg/apiproxy/ytsaurus.go b/pkg/apiproxy/ytsaurus.go index 933f7cb3..7b828a00 100644 --- a/pkg/apiproxy/ytsaurus.go +++ b/pkg/apiproxy/ytsaurus.go @@ -58,6 +58,10 @@ func (c *Ytsaurus) GetLocalUpdatingComponents() []string { return c.ytsaurus.Status.UpdateStatus.Components } +func (c *Ytsaurus) GetUpdateStrategy() ytv1.UpdateStrategy { + return c.ytsaurus.Status.UpdateStatus.Strategy +} + func (c *Ytsaurus) IsUpdateStatusConditionTrue(condition string) bool { return meta.IsStatusConditionTrue(c.ytsaurus.Status.UpdateStatus.Conditions, condition) } @@ -73,6 +77,7 @@ func (c *Ytsaurus) ClearUpdateStatus(ctx context.Context) error { c.ytsaurus.Status.UpdateStatus.TabletCellBundles = make([]ytv1.TabletCellBundleInfo, 0) c.ytsaurus.Status.UpdateStatus.MasterMonitoringPaths = make([]string, 0) c.ytsaurus.Status.UpdateStatus.Components = nil + c.ytsaurus.Status.UpdateStatus.Strategy = ytv1.UpdateStrategyNone return c.apiProxy.UpdateStatus(ctx) } @@ -82,9 +87,10 @@ func (c *Ytsaurus) LogUpdate(ctx context.Context, message string) { logger.Info(fmt.Sprintf("Ytsaurus update: %s", message)) } -func (c *Ytsaurus) SaveUpdatingClusterState(ctx context.Context, components []string) error { +func (c *Ytsaurus) SaveUpdatingClusterState(ctx context.Context, strategy ytv1.UpdateStrategy, components []string) error { logger := log.FromContext(ctx) c.ytsaurus.Status.State = ytv1.ClusterStateUpdating + c.ytsaurus.Status.UpdateStatus.Strategy = strategy c.ytsaurus.Status.UpdateStatus.Components = components if err := c.apiProxy.UpdateStatus(ctx); err != nil {