From d7bf1b52d15a2d86feb5b1e96da7635b46dd53dc Mon Sep 17 00:00:00 2001 From: Kirill Sibirev Date: Thu, 9 Jan 2025 15:35:37 +0100 Subject: [PATCH] Restart Enable chunk locations job --- api/v1/ytsaurus_types.go | 1 + controllers/sync.go | 9 +++++ controllers/ytsaurus_local_test.go | 15 +++---- pkg/components/master.go | 63 ++++++++++++++++++++++++------ pkg/consts/conditions.go | 2 + 5 files changed, 72 insertions(+), 18 deletions(-) diff --git a/api/v1/ytsaurus_types.go b/api/v1/ytsaurus_types.go index 5333844c..8a836ad0 100644 --- a/api/v1/ytsaurus_types.go +++ b/api/v1/ytsaurus_types.go @@ -710,6 +710,7 @@ const ( UpdateStateWaitingForPodsRemoval UpdateState = "WaitingForPodsRemoval" UpdateStateWaitingForPodsCreation UpdateState = "WaitingForPodsCreation" UpdateStateWaitingForMasterExitReadOnly UpdateState = "WaitingForMasterExitReadOnly" + UpdateStateWaitingForEnableRealChunkLocations UpdateState = "WaitingForEnableRealChunkLocations" UpdateStateWaitingForTabletCellsRecovery UpdateState = "WaitingForTabletCellsRecovery" UpdateStateWaitingForOpArchiveUpdatingPrepare UpdateState = "WaitingForOpArchiveUpdatingPrepare" UpdateStateWaitingForOpArchiveUpdate UpdateState = "WaitingForOpArchiveUpdate" diff --git a/controllers/sync.go b/controllers/sync.go index 0928151c..0a21a0f2 100644 --- a/controllers/sync.go +++ b/controllers/sync.go @@ -71,6 +71,15 @@ func (r *YtsaurusReconciler) handleEverything( case ytv1.UpdateStateWaitingForTabletCellsRemoved: if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRemoved) { ytsaurus.LogUpdate(ctx, "Waiting for snapshots") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForEnableRealChunkLocations) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForEnableRealChunkLocations: + // This stage may also be added to MasterOnly flow, but it makes sense only if + // data nodes are re-registered in master after this job, so I've added it only here. + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionRealChunkLocationsEnabled) { + ytsaurus.LogUpdate(ctx, "Real chunk locations enabled") err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSnapshots) return &ctrl.Result{Requeue: true}, err } diff --git a/controllers/ytsaurus_local_test.go b/controllers/ytsaurus_local_test.go index debf5cc9..a57ca948 100644 --- a/controllers/ytsaurus_local_test.go +++ b/controllers/ytsaurus_local_test.go @@ -38,16 +38,17 @@ func prepareTest(t *testing.T, namespace string) *testutil.TestHelper { return h } -func waitClusterState(h *testutil.TestHelper, expectedState ytv1.ClusterState) { +func waitClusterState(h *testutil.TestHelper, expectedState ytv1.ClusterState, expectedObservedGeneration int64) { h.Logf("[ Wait for YTsaurus %s state ]", expectedState) testutil.FetchAndCheckEventually( h, ytsaurusName, &ytv1.Ytsaurus{}, - fmt.Sprintf("cluster state is %s", expectedState), + fmt.Sprintf("cluster state is %s, gen is %d", expectedState, expectedObservedGeneration), func(obj client.Object) bool { state := obj.(*ytv1.Ytsaurus).Status.State - return state == expectedState + observedGen := obj.(*ytv1.Ytsaurus).Status.ObservedGeneration + return state == expectedState && observedGen == expectedObservedGeneration }, ) } @@ -107,7 +108,7 @@ func TestYtsaurusFromScratch(t *testing.T) { return len(secret.Data["YT_TOKEN"]) != 0 }, ) - waitClusterState(h, ytv1.ClusterStateRunning) + waitClusterState(h, ytv1.ClusterStateRunning, ytsaurusResource.Generation) } func TestYtsaurusUpdateStatelessComponent(t *testing.T) { @@ -123,7 +124,7 @@ func TestYtsaurusUpdateStatelessComponent(t *testing.T) { ytsaurusResource.Spec.DataNodes[0].MinReadyInstanceCount = ptr.To(0) testutil.DeployObject(h, &ytsaurusResource) - waitClusterState(h, ytv1.ClusterStateRunning) + waitClusterState(h, ytv1.ClusterStateRunning, ytsaurusResource.Generation) imageUpdated := testYtsaurusImage + "-updated" ytsaurusResource.Spec.Discovery.Image = &imageUpdated @@ -131,7 +132,7 @@ func TestYtsaurusUpdateStatelessComponent(t *testing.T) { ytsaurusResource.Spec.EnableFullUpdate = false testutil.UpdateObject(h, &ytv1.Ytsaurus{}, &ytsaurusResource) - waitClusterState(h, ytv1.ClusterStateRunning) + waitClusterState(h, ytv1.ClusterStateRunning, ytsaurusResource.Generation) sts := appsv1.StatefulSet{} testutil.GetObject(h, "ds", &sts) @@ -151,7 +152,7 @@ func TestYtsaurusUpdateMasterBlocked(t *testing.T) { ytsaurusResource.Spec.DataNodes[0].MinReadyInstanceCount = ptr.To(0) testutil.DeployObject(h, &ytsaurusResource) - waitClusterState(h, ytv1.ClusterStateRunning) + waitClusterState(h, ytv1.ClusterStateRunning, ytsaurusResource.Generation) imageUpdated := testYtsaurusImage + "-updated" ytsaurusResource.Spec.PrimaryMasters.Image = &imageUpdated diff --git a/pkg/components/master.go b/pkg/components/master.go index 04682369..819ccbe3 100644 --- a/pkg/components/master.go +++ b/pkg/components/master.go @@ -320,12 +320,14 @@ func (m *Master) doSync(ctx context.Context, dry bool) (ComponentStatus, error) if m.ytsaurus.GetClusterState() == ytv1.ClusterStateUpdating { if m.ytsaurus.GetUpdateState() == ytv1.UpdateStateWaitingForMasterExitReadOnly { - st, err := m.exitReadOnly(ctx, dry) - return *st, err + return m.exitReadOnly(ctx, dry) } if status, err := handleUpdatingClusterState(ctx, m.ytsaurus, m, &m.localComponent, m.server, dry); status != nil { return *status, err } + if m.ytsaurus.GetUpdateState() == ytv1.UpdateStateWaitingForEnableRealChunkLocations { + return m.restartEnableRealChunksJob(ctx, dry) + } } if m.NeedSync() { @@ -339,7 +341,7 @@ func (m *Master) doSync(ctx context.Context, dry bool) (ComponentStatus, error) return WaitingStatus(SyncStatusBlocked, "pods"), err } - return m.runPostSyncJobs(ctx, dry) + return m.runInitPhaseJobs(ctx, dry) } func (m *Master) Status(ctx context.Context) (ComponentStatus, error) { @@ -374,26 +376,25 @@ func (m *Master) getHostAddressLabel() string { return defaultHostAddressLabel } -func (m *Master) exitReadOnly(ctx context.Context, dry bool) (*ComponentStatus, error) { +func (m *Master) exitReadOnly(ctx context.Context, dry bool) (ComponentStatus, error) { if !m.ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionMasterExitReadOnlyPrepared) { if !m.exitReadOnlyJob.isRestartPrepared() { if err := m.exitReadOnlyJob.prepareRestart(ctx, dry); err != nil { - return ptr.To(SimpleStatus(SyncStatusUpdating)), err + return SimpleStatus(SyncStatusUpdating), err } } if !dry { m.setMasterReadOnlyExitPrepared(ctx, metav1.ConditionTrue) } - return ptr.To(SimpleStatus(SyncStatusUpdating)), nil + return SimpleStatus(SyncStatusUpdating), nil } if !m.exitReadOnlyJob.IsCompleted() { if !dry { m.exitReadOnlyJob.SetInitScript(m.createExitReadOnlyScript()) } - status, err := m.exitReadOnlyJob.Sync(ctx, dry) - return &status, err + return m.exitReadOnlyJob.Sync(ctx, dry) } if !dry { @@ -405,7 +406,7 @@ func (m *Master) exitReadOnly(ctx context.Context, dry bool) (*ComponentStatus, }) m.setMasterReadOnlyExitPrepared(ctx, metav1.ConditionFalse) } - return ptr.To(SimpleStatus(SyncStatusUpdating)), nil + return SimpleStatus(SyncStatusUpdating), nil } func (m *Master) setMasterReadOnlyExitPrepared(ctx context.Context, status metav1.ConditionStatus) { @@ -417,7 +418,7 @@ func (m *Master) setMasterReadOnlyExitPrepared(ctx context.Context, status metav }) } -func (m *Master) runPostSyncJobs(ctx context.Context, dry bool) (ComponentStatus, error) { +func (m *Master) runInitPhaseJobs(ctx context.Context, dry bool) (ComponentStatus, error) { st, err := m.runMasterInitJob(ctx, dry) if err != nil { return ComponentStatus{}, err @@ -432,6 +433,7 @@ func (m *Master) runPostSyncJobs(ctx context.Context, dry bool) (ComponentStatus return st, nil } +// runMasterInitJob launches job only once in an Initialization phase. func (m *Master) runMasterInitJob(ctx context.Context, dry bool) (ComponentStatus, error) { if !dry { m.initJob.SetInitScript(m.createInitScript()) @@ -439,10 +441,49 @@ func (m *Master) runMasterInitJob(ctx context.Context, dry bool) (ComponentStatu return m.initJob.Sync(ctx, dry) } +// runEnableRealChunksJob launches job in the Initialization and Updating phases. func (m *Master) runEnableRealChunksJob(ctx context.Context, dry bool) (ComponentStatus, error) { - // TODO: prepare restart, etc if !dry { m.enableRealChunksJob.SetInitScript(m.createEnableRealChunksScript()) } return m.enableRealChunksJob.Sync(ctx, dry) } + +func (m *Master) restartEnableRealChunksJob(ctx context.Context, dry bool) (ComponentStatus, error) { + if !m.ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionRealChunkLocationsEnablePrepared) { + if !dry { + if !m.enableRealChunksJob.isRestartPrepared() { + if err := m.enableRealChunksJob.prepareRestart(ctx, dry); err != nil { + return ComponentStatus{}, err + } + } + m.ytsaurus.SetUpdateStatusCondition(ctx, metav1.Condition{ + Type: consts.ConditionRealChunkLocationsEnablePrepared, + Status: metav1.ConditionTrue, + Reason: "RealChunkLocationsEnablePrepared", + Message: "Enable real chunk locations job prepared to restart", + }) + } + return WaitingStatus(SyncStatusPending, "reconciliation"), nil + } + + if !m.enableRealChunksJob.IsCompleted() { + return m.runEnableRealChunksJob(ctx, dry) + } + + if !dry { + m.ytsaurus.SetUpdateStatusCondition(ctx, metav1.Condition{ + Type: consts.ConditionRealChunkLocationsEnabled, + Status: metav1.ConditionTrue, + Reason: "RealChunksLocationsEnabled", + Message: "Enable real-chunk locations job is finished", + }) + m.ytsaurus.SetUpdateStatusCondition(ctx, metav1.Condition{ + Type: consts.ConditionRealChunkLocationsEnablePrepared, + Status: metav1.ConditionFalse, + Reason: "RealChunkLocationsEnablePrepared", + Message: "Enable real chunk locations job preparation reset after completion", + }) + } + return WaitingStatus(SyncStatusPending, "reconciliation"), nil +} diff --git a/pkg/consts/conditions.go b/pkg/consts/conditions.go index 377e51f2..31593c94 100644 --- a/pkg/consts/conditions.go +++ b/pkg/consts/conditions.go @@ -19,4 +19,6 @@ const ConditionYqlaUpdated = "YqlaUpdated" const ConditionYqlaPreparedForUpdating = "YqlaPreparedForUpdating" const ConditionMasterExitReadOnlyPrepared = "MasterExitReadOnlyPrepared" const ConditionMasterExitedReadOnly = "MasterExitedReadOnly" +const ConditionRealChunkLocationsEnablePrepared = "RealChunkLocationsEnablePrepared" +const ConditionRealChunkLocationsEnabled = "RealChunkLocationsEnabled" const ConditionSafeModeDisabled = "SafeModeDisabled"