From 28bc998df53a3a968fc9b165cfff810034624238 Mon Sep 17 00:00:00 2001 From: Daniil Komarevtsev Date: Mon, 13 Jan 2025 13:26:40 +0000 Subject: [PATCH 1/2] Add devices to pending cleanup only when necessary --- .../disk_registry/disk_registry_state.cpp | 86 ++++---- .../disk_registry/disk_registry_state.h | 4 + .../disk_registry/disk_registry_state_ut.cpp | 192 ++++++++++++++++++ 3 files changed, 247 insertions(+), 35 deletions(-) diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index 51714d32e53..f0768a102e7 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -749,7 +749,7 @@ void TDiskRegistryState::ProcessDirtyDevices(TVector dirtyDevices) { for (auto&& [uuid, diskId]: dirtyDevices) { if (!diskId.empty()) { - auto error = PendingCleanup.Insert(diskId, std::move(uuid)); + auto error = AddDevicesToPendingCleanup(diskId, {std::move(uuid)}); if (HasError(error)) { ReportDiskRegistryInsertToPendingCleanupFailed( TStringBuilder() @@ -1310,6 +1310,15 @@ NProto::TError TDiskRegistryState::ReplaceDeviceWithoutDiskStateUpdate( << " since it is in pending cleanup for disk " << cleaningDiskId); } + + if (IsDirtyDevice(deviceReplacementId)) { + return MakeError( + E_ARGUMENT, + TStringBuilder() + << "can't allocate specific device " + << deviceReplacementId.Quote() << " for disk " << diskId + << " since it is dirty"); + } } const ui64 logicalBlockCount = devicePtr->GetBlockSize() * devicePtr->GetBlocksCount() @@ -1414,7 +1423,7 @@ NProto::TError TDiskRegistryState::ReplaceDeviceWithoutDiskStateUpdate( UpdatePlacementGroup(db, diskId, disk, "ReplaceDevice"); UpdateAndReallocateDisk(db, diskId, disk); - error = PendingCleanup.Insert(diskId, deviceId); + error = AddDevicesToPendingCleanup(diskId, {deviceId}); if (HasError(error)) { ReportDiskRegistryInsertToPendingCleanupFailed( TStringBuilder() << "An error occurred while replacing device: " @@ -2808,10 +2817,12 @@ NProto::TError TDiskRegistryState::DeallocateDisk( for (const auto& affectedDiskId: affectedDisks) { Y_DEBUG_ABORT_UNLESS(affectedDiskId.StartsWith(diskId + "/")); - error = PendingCleanup.Insert( + error = AddDevicesToPendingCleanup( diskId, - DeallocateSimpleDisk(db, affectedDiskId, "DeallocateDisk:Replica") - ); + DeallocateSimpleDisk( + db, + affectedDiskId, + "DeallocateDisk:Replica")); if (HasError(error)) { ReportDiskRegistryInsertToPendingCleanupFailed( TStringBuilder() @@ -2827,37 +2838,15 @@ NProto::TError TDiskRegistryState::DeallocateDisk( } auto dirtyDevices = DeallocateSimpleDisk(db, diskId, *disk); - TVector devicesAllowedToBeCleaned; - devicesAllowedToBeCleaned.reserve(dirtyDevices.size()); - for (const auto& uuid: dirtyDevices) { - if (CanSecureErase(uuid)) { - devicesAllowedToBeCleaned.push_back(uuid); - } - } - - if (devicesAllowedToBeCleaned.empty()) { - // Devices are not going to be secure erased if they aren't allowed to. - // Don't create a PendingCleanup entry. - return MakeError( - S_ALREADY, - TStringBuilder() << "Disk " << diskId << " devices [" - << JoinSeq(",", dirtyDevices) - << "] are not going to be secure erased."); + auto error = AddDevicesToPendingCleanup(diskId, std::move(dirtyDevices)); + if (SUCCEEDED(error.GetCode())) { + // NOTE: We must pass S_ALREADY to the higher-level code. It is used for + // sync deallocations. + return error; } - - STORAGE_DEBUG( - "Disk %s is deallocated. Adding devices [%s] to pending cleanup", - diskId.c_str(), - JoinSeq(", ", devicesAllowedToBeCleaned).c_str()); - - auto error = - PendingCleanup.Insert(diskId, std::move(devicesAllowedToBeCleaned)); - if (HasError(error)) { ReportDiskRegistryInsertToPendingCleanupFailed( TStringBuilder() << "An error occurred while deallocating disk: " << FormatError(error)); - } - return {}; } @@ -4826,7 +4815,7 @@ void TDiskRegistryState::RemoveFinishedMigrations( DeviceList.ReleaseDevice(m.DeviceId); db.UpdateDirtyDevice(m.DeviceId, diskId); - auto error = PendingCleanup.Insert(diskId, m.DeviceId); + auto error = AddDevicesToPendingCleanup(diskId, {m.DeviceId}); if (HasError(error)) { ReportDiskRegistryInsertToPendingCleanupFailed( TStringBuilder() @@ -6774,7 +6763,7 @@ NProto::TError TDiskRegistryState::AllocateDiskReplicas( for (ui32 i = 0; i < count; ++i) { const size_t idx = masterDisk->ReplicaCount + i + 1; - auto error = PendingCleanup.Insert( + auto error = AddDevicesToPendingCleanup( masterDiskId, DeallocateSimpleDisk( db, @@ -6819,6 +6808,33 @@ NProto::TError TDiskRegistryState::AllocateDiskReplicas( return {}; } +NProto::TError TDiskRegistryState::AddDevicesToPendingCleanup( + const TString& diskId, + TVector uuids) +{ + TVector devicesAllowedToBeCleaned; + devicesAllowedToBeCleaned.reserve(uuids.size()); + for (auto& uuid: uuids) { + if (CanSecureErase(uuid)) { + devicesAllowedToBeCleaned.push_back(std::move(uuid)); + } + } + if (devicesAllowedToBeCleaned.empty()) { + // Devices are not going to be secure erased if they aren't allowed to. + // Don't create a PendingCleanup entry. + return MakeError( + S_ALREADY, + TStringBuilder() + << "Disk " << diskId << " devices [" << JoinSeq(", ", uuids) + << "] are not going to be secure erased."); + } + + STORAGE_DEBUG( + "Adding devices [%s] to pending cleanup", + JoinSeq(", ", devicesAllowedToBeCleaned).c_str()); + return PendingCleanup.Insert(diskId, std::move(devicesAllowedToBeCleaned)); +} + NProto::TError TDiskRegistryState::DeallocateDiskReplicas( TDiskRegistryDatabase& db, const TDiskId& masterDiskId, @@ -6854,7 +6870,7 @@ NProto::TError TDiskRegistryState::DeallocateDiskReplicas( for (size_t i = masterDisk->ReplicaCount; i >= newReplicaCount + 1; --i) { const auto replicaDiskId = GetReplicaDiskId(masterDiskId, i); - auto error = PendingCleanup.Insert( + auto error = AddDevicesToPendingCleanup( masterDiskId, DeallocateSimpleDisk(db, replicaDiskId, "DeallocateDiskReplicas")); if (HasError(error)) { diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h index 3b78b8786fb..249906cb29f 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h @@ -1165,6 +1165,10 @@ class TDiskRegistryState TDiskRegistryDatabase& db, const TString& diskId); + NProto::TError AddDevicesToPendingCleanup( + const TString& diskId, + TVector uuids); + /// Try to update configuration of selected device and its agent /// in the disk registry database /// @return true if the device updates successfully; otherwise, return false diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp index c52dec73eda..c6871eee036 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp @@ -11509,6 +11509,198 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) {}); } + Y_UNIT_TEST(ShouldAddToPendingCleanupOnlyWhenNecessary) + { + const auto agent1 = AgentConfig( + 1, + { + Device("dev-1", "uuid-1.1", "rack-1"), + Device("dev-1", "uuid-1.2", "rack-1"), + Device("dev-1", "uuid-1.3", "rack-1"), + Device("dev-1", "uuid-1.4", "rack-1"), + }); + + const auto agent2 = AgentConfig( + 2, + { + Device("dev-1", "uuid-2.1", "rack-2"), + Device("dev-2", "uuid-2.2", "rack-2"), + Device("dev-3", "uuid-2.3", "rack-2"), + Device("dev-4", "uuid-2.4", "rack-2"), + }); + + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + TDiskRegistryState state = + TDiskRegistryStateBuilder() + .WithKnownAgents({agent1, agent2}) + .WithDisks( + {Disk("disk-1", {"uuid-1.1", "uuid-1.2"}), + Disk("disk-2", {"uuid-2.1", "uuid-2.2"})}) + .Build(); + + auto moveDeviceToState = + [&](TString deviceId, NProto::EDeviceState deviceState) + { + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + TString affectedDisk; + const auto error = state.UpdateDeviceState( + db, + deviceId, + deviceState, + Now(), + "test", + affectedDisk); + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + }); + }; + + auto replaceDevice = + [&](TString diskId, auto fromDevId, auto toDevId, auto manual) + { + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + bool updated = false; + const auto error = state.ReplaceDevice( + db, + diskId, + fromDevId, + toDevId, + Now(), + "", // message + manual, + &updated); + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + }); + }; + + auto checkDevices = [&](TString diskId, + TVector diskDevices, + TVector dirtyDevices, + TVector autoReplacedDevices) + { + TVector devices; + const auto error = state.GetDiskDevices(diskId, devices); + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + UNIT_ASSERT_VALUES_EQUAL(diskDevices.size(), devices.size()); + for (ui32 i = 0; i < diskDevices.size(); i++) { + UNIT_ASSERT_EQUAL( + NProto::DEVICE_STATE_ONLINE, + devices[i].GetState()); + UNIT_ASSERT_VALUES_EQUAL( + diskDevices[i], + devices[i].GetDeviceUUID()); + } + + TDiskInfo info; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo(diskId, info)); + // device replacement list should be empty for non-mirrored disks + ASSERT_VECTORS_EQUAL(TVector{}, info.DeviceReplacementIds); + + auto stateDirtyDevices = state.GetDirtyDevices(); + UNIT_ASSERT_VALUES_EQUAL( + dirtyDevices.size(), + stateDirtyDevices.size()); + Sort(stateDirtyDevices, TByDeviceUUID()); + for (ui32 i = 0; i < dirtyDevices.size(); i++) { + UNIT_ASSERT_VALUES_EQUAL( + dirtyDevices[i], + stateDirtyDevices[i].GetDeviceUUID()); + } + + TVector stateAutoReplacedDevices( + state.GetAutomaticallyReplacedDevices().begin(), + state.GetAutomaticallyReplacedDevices().end()); + UNIT_ASSERT_VALUES_EQUAL( + autoReplacedDevices.size(), + stateAutoReplacedDevices.size()); + SortBy( + stateAutoReplacedDevices, + [](auto& x) { return x.DeviceId; }); + for (ui32 i = 0; i < autoReplacedDevices.size(); i++) { + UNIT_ASSERT_VALUES_EQUAL( + autoReplacedDevices[i], + stateAutoReplacedDevices[i].DeviceId); + } + }; + + // Automatic replacements shouldn't be be added to pending cleanup. + replaceDevice("disk-1", "uuid-1.1", "uuid-1.3", false /* manual */); + checkDevices( + "disk-1", + {"uuid-1.3", "uuid-1.2"}, + {"uuid-1.1"}, + {"uuid-1.1"}); + // Device is broken. The pending cleanup is empty. + UNIT_ASSERT(!state.HasPendingCleanup("disk-1")); + + // Manual replacements also does not add to pending cleanup since the + // source device state is changing to ERROR. + replaceDevice("disk-1", "uuid-1.2", "uuid-1.4", true /* manual */); + UNIT_ASSERT(!state.HasPendingCleanup("disk-1")); + checkDevices( + "disk-1", + {"uuid-1.3", "uuid-1.4"}, + {"uuid-1.1", "uuid-1.2"}, + {"uuid-1.1"}); + + moveDeviceToState("uuid-2.1", NProto::DEVICE_STATE_WARNING); + + // Start a migration. + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + const auto migrations = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(1, migrations.size()); + + UNIT_ASSERT_VALUES_EQUAL("disk-2", migrations[0].DiskId); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-2.1", + migrations[0].SourceDeviceId); + + auto r = + state.StartDeviceMigration(Now(), db, "disk-2", "uuid-2.1"); + UNIT_ASSERT_SUCCESS(r.GetError()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-2.3", + r.GetResult().GetDeviceUUID()); + }); + UNIT_ASSERT(!state.HasPendingCleanup("disk-2")); + + // Finish the migration. + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + bool updated = false; + const auto error = state.FinishDeviceMigration( + db, + "disk-2", + "uuid-2.1", + "uuid-2.3", + Now(), + &updated); + + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + UNIT_ASSERT(updated); + }); + UNIT_ASSERT(!state.HasPendingCleanup("disk-2")); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto disksToReallocate = state.GetDisksToReallocate(); + UNIT_ASSERT_VALUES_EQUAL(2, disksToReallocate.size()); + for (auto& [diskId, seqNo]: disksToReallocate) { + state.DeleteDiskToReallocate(db, diskId, seqNo); + } + }); + UNIT_ASSERT(state.HasPendingCleanup("disk-2")); + } + Y_UNIT_TEST(ShouldPreserveDeviceErrorState) { const TString errorMessage = "broken device"; From f5e79f00e0e8a9a8a6b148203be0c9d324ee3279 Mon Sep 17 00:00:00 2001 From: Daniil Komarevtsev Date: Tue, 14 Jan 2025 12:46:04 +0000 Subject: [PATCH 2/2] Review fix --- .../libs/storage/disk_registry/disk_registry_state.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index f0768a102e7..be4236d81e4 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -2839,14 +2839,14 @@ NProto::TError TDiskRegistryState::DeallocateDisk( auto dirtyDevices = DeallocateSimpleDisk(db, diskId, *disk); auto error = AddDevicesToPendingCleanup(diskId, std::move(dirtyDevices)); - if (SUCCEEDED(error.GetCode())) { + if (!HasError(error)) { // NOTE: We must pass S_ALREADY to the higher-level code. It is used for // sync deallocations. return error; } - ReportDiskRegistryInsertToPendingCleanupFailed( - TStringBuilder() << "An error occurred while deallocating disk: " - << FormatError(error)); + ReportDiskRegistryInsertToPendingCleanupFailed( + TStringBuilder() << "An error occurred while deallocating disk: " + << FormatError(error)); return {}; }