Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add devices to pending cleanup only when necessary #2843

Merged
merged 2 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 54 additions & 38 deletions cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ void TDiskRegistryState::ProcessDirtyDevices(TVector<TDirtyDevice> dirtyDevices)
{
for (auto&& [uuid, diskId]: dirtyDevices) {
if (!diskId.empty()) {
auto error = PendingCleanup.Insert(diskId, std::move(uuid));
auto error = AddDevicesToPendingCleanup(diskId, {std::move(uuid)});
if (HasError(error)) {
ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder()
Expand Down Expand Up @@ -1310,6 +1310,15 @@ NProto::TError TDiskRegistryState::ReplaceDeviceWithoutDiskStateUpdate(
<< " since it is in pending cleanup for disk "
<< cleaningDiskId);
}

if (IsDirtyDevice(deviceReplacementId)) {
return MakeError(
E_ARGUMENT,
TStringBuilder()
<< "can't allocate specific device "
<< deviceReplacementId.Quote() << " for disk " << diskId
<< " since it is dirty");
}
komarevtsev-d marked this conversation as resolved.
Show resolved Hide resolved
}

const ui64 logicalBlockCount = devicePtr->GetBlockSize() * devicePtr->GetBlocksCount()
Expand Down Expand Up @@ -1414,7 +1423,7 @@ NProto::TError TDiskRegistryState::ReplaceDeviceWithoutDiskStateUpdate(
UpdatePlacementGroup(db, diskId, disk, "ReplaceDevice");
UpdateAndReallocateDisk(db, diskId, disk);

error = PendingCleanup.Insert(diskId, deviceId);
error = AddDevicesToPendingCleanup(diskId, {deviceId});
if (HasError(error)) {
ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder() << "An error occurred while replacing device: "
Expand Down Expand Up @@ -2808,10 +2817,12 @@ NProto::TError TDiskRegistryState::DeallocateDisk(

for (const auto& affectedDiskId: affectedDisks) {
Y_DEBUG_ABORT_UNLESS(affectedDiskId.StartsWith(diskId + "/"));
error = PendingCleanup.Insert(
error = AddDevicesToPendingCleanup(
diskId,
DeallocateSimpleDisk(db, affectedDiskId, "DeallocateDisk:Replica")
);
DeallocateSimpleDisk(
db,
affectedDiskId,
"DeallocateDisk:Replica"));
if (HasError(error)) {
ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder()
Expand All @@ -2827,37 +2838,15 @@ NProto::TError TDiskRegistryState::DeallocateDisk(
}

auto dirtyDevices = DeallocateSimpleDisk(db, diskId, *disk);
TVector<TDeviceId> devicesAllowedToBeCleaned;
devicesAllowedToBeCleaned.reserve(dirtyDevices.size());
for (const auto& uuid: dirtyDevices) {
if (CanSecureErase(uuid)) {
devicesAllowedToBeCleaned.push_back(uuid);
}
}

if (devicesAllowedToBeCleaned.empty()) {
// Devices are not going to be secure erased if they aren't allowed to.
// Don't create a PendingCleanup entry.
return MakeError(
S_ALREADY,
TStringBuilder() << "Disk " << diskId << " devices ["
<< JoinSeq(",", dirtyDevices)
<< "] are not going to be secure erased.");
}

STORAGE_DEBUG(
"Disk %s is deallocated. Adding devices [%s] to pending cleanup",
diskId.c_str(),
JoinSeq(", ", devicesAllowedToBeCleaned).c_str());

auto error =
PendingCleanup.Insert(diskId, std::move(devicesAllowedToBeCleaned));
if (HasError(error)) {
ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder() << "An error occurred while deallocating disk: "
<< FormatError(error));
auto error = AddDevicesToPendingCleanup(diskId, std::move(dirtyDevices));
if (!HasError(error)) {
// NOTE: We must pass S_ALREADY to the higher-level code. It is used for
// sync deallocations.
return error;
}

ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder() << "An error occurred while deallocating disk: "
<< FormatError(error));
return {};
}

Expand Down Expand Up @@ -4826,7 +4815,7 @@ void TDiskRegistryState::RemoveFinishedMigrations(

DeviceList.ReleaseDevice(m.DeviceId);
db.UpdateDirtyDevice(m.DeviceId, diskId);
auto error = PendingCleanup.Insert(diskId, m.DeviceId);
auto error = AddDevicesToPendingCleanup(diskId, {m.DeviceId});
if (HasError(error)) {
ReportDiskRegistryInsertToPendingCleanupFailed(
TStringBuilder()
Expand Down Expand Up @@ -6774,7 +6763,7 @@ NProto::TError TDiskRegistryState::AllocateDiskReplicas(
for (ui32 i = 0; i < count; ++i) {
const size_t idx = masterDisk->ReplicaCount + i + 1;

auto error = PendingCleanup.Insert(
auto error = AddDevicesToPendingCleanup(
masterDiskId,
DeallocateSimpleDisk(
db,
Expand Down Expand Up @@ -6819,6 +6808,33 @@ NProto::TError TDiskRegistryState::AllocateDiskReplicas(
return {};
}

NProto::TError TDiskRegistryState::AddDevicesToPendingCleanup(
const TString& diskId,
TVector<TDeviceId> uuids)
{
TVector<TDeviceId> devicesAllowedToBeCleaned;
devicesAllowedToBeCleaned.reserve(uuids.size());
for (auto& uuid: uuids) {
if (CanSecureErase(uuid)) {
devicesAllowedToBeCleaned.push_back(std::move(uuid));
}
}
sharpeye marked this conversation as resolved.
Show resolved Hide resolved
if (devicesAllowedToBeCleaned.empty()) {
// Devices are not going to be secure erased if they aren't allowed to.
// Don't create a PendingCleanup entry.
return MakeError(
S_ALREADY,
TStringBuilder()
<< "Disk " << diskId << " devices [" << JoinSeq(", ", uuids)
<< "] are not going to be secure erased.");
}

STORAGE_DEBUG(
"Adding devices [%s] to pending cleanup",
JoinSeq(", ", devicesAllowedToBeCleaned).c_str());
return PendingCleanup.Insert(diskId, std::move(devicesAllowedToBeCleaned));
}

NProto::TError TDiskRegistryState::DeallocateDiskReplicas(
TDiskRegistryDatabase& db,
const TDiskId& masterDiskId,
Expand Down Expand Up @@ -6854,7 +6870,7 @@ NProto::TError TDiskRegistryState::DeallocateDiskReplicas(

for (size_t i = masterDisk->ReplicaCount; i >= newReplicaCount + 1; --i) {
const auto replicaDiskId = GetReplicaDiskId(masterDiskId, i);
auto error = PendingCleanup.Insert(
auto error = AddDevicesToPendingCleanup(
masterDiskId,
DeallocateSimpleDisk(db, replicaDiskId, "DeallocateDiskReplicas"));
if (HasError(error)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,10 @@ class TDiskRegistryState
TDiskRegistryDatabase& db,
const TString& diskId);

NProto::TError AddDevicesToPendingCleanup(
const TString& diskId,
TVector<TDeviceId> uuids);

/// Try to update configuration of selected device and its agent
/// in the disk registry database
/// @return true if the device updates successfully; otherwise, return false
Expand Down
192 changes: 192 additions & 0 deletions cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11509,6 +11509,198 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest)
{});
}

Y_UNIT_TEST(ShouldAddToPendingCleanupOnlyWhenNecessary)
{
const auto agent1 = AgentConfig(
1,
{
Device("dev-1", "uuid-1.1", "rack-1"),
Device("dev-1", "uuid-1.2", "rack-1"),
Device("dev-1", "uuid-1.3", "rack-1"),
Device("dev-1", "uuid-1.4", "rack-1"),
});

const auto agent2 = AgentConfig(
2,
{
Device("dev-1", "uuid-2.1", "rack-2"),
Device("dev-2", "uuid-2.2", "rack-2"),
Device("dev-3", "uuid-2.3", "rack-2"),
Device("dev-4", "uuid-2.4", "rack-2"),
});

TTestExecutor executor;
executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); });

TDiskRegistryState state =
TDiskRegistryStateBuilder()
.WithKnownAgents({agent1, agent2})
.WithDisks(
{Disk("disk-1", {"uuid-1.1", "uuid-1.2"}),
Disk("disk-2", {"uuid-2.1", "uuid-2.2"})})
.Build();

auto moveDeviceToState =
[&](TString deviceId, NProto::EDeviceState deviceState)
{
executor.WriteTx(
[&](TDiskRegistryDatabase db) mutable
{
TString affectedDisk;
const auto error = state.UpdateDeviceState(
db,
deviceId,
deviceState,
Now(),
"test",
affectedDisk);
UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode());
});
};

auto replaceDevice =
[&](TString diskId, auto fromDevId, auto toDevId, auto manual)
{
executor.WriteTx(
[&](TDiskRegistryDatabase db) mutable
{
bool updated = false;
const auto error = state.ReplaceDevice(
db,
diskId,
fromDevId,
toDevId,
Now(),
"", // message
manual,
&updated);
UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode());
});
};

auto checkDevices = [&](TString diskId,
TVector<TString> diskDevices,
TVector<TString> dirtyDevices,
TVector<TString> autoReplacedDevices)
{
TVector<TDeviceConfig> devices;
const auto error = state.GetDiskDevices(diskId, devices);
UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode());
UNIT_ASSERT_VALUES_EQUAL(diskDevices.size(), devices.size());
for (ui32 i = 0; i < diskDevices.size(); i++) {
UNIT_ASSERT_EQUAL(
NProto::DEVICE_STATE_ONLINE,
devices[i].GetState());
UNIT_ASSERT_VALUES_EQUAL(
diskDevices[i],
devices[i].GetDeviceUUID());
}

TDiskInfo info;
UNIT_ASSERT_SUCCESS(state.GetDiskInfo(diskId, info));
// device replacement list should be empty for non-mirrored disks
ASSERT_VECTORS_EQUAL(TVector<TString>{}, info.DeviceReplacementIds);

auto stateDirtyDevices = state.GetDirtyDevices();
UNIT_ASSERT_VALUES_EQUAL(
dirtyDevices.size(),
stateDirtyDevices.size());
Sort(stateDirtyDevices, TByDeviceUUID());
for (ui32 i = 0; i < dirtyDevices.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(
dirtyDevices[i],
stateDirtyDevices[i].GetDeviceUUID());
}

TVector<TAutomaticallyReplacedDeviceInfo> stateAutoReplacedDevices(
state.GetAutomaticallyReplacedDevices().begin(),
state.GetAutomaticallyReplacedDevices().end());
UNIT_ASSERT_VALUES_EQUAL(
autoReplacedDevices.size(),
stateAutoReplacedDevices.size());
SortBy(
stateAutoReplacedDevices,
[](auto& x) { return x.DeviceId; });
for (ui32 i = 0; i < autoReplacedDevices.size(); i++) {
UNIT_ASSERT_VALUES_EQUAL(
autoReplacedDevices[i],
stateAutoReplacedDevices[i].DeviceId);
}
};

// Automatic replacements shouldn't be be added to pending cleanup.
replaceDevice("disk-1", "uuid-1.1", "uuid-1.3", false /* manual */);
checkDevices(
"disk-1",
{"uuid-1.3", "uuid-1.2"},
{"uuid-1.1"},
{"uuid-1.1"});
// Device is broken. The pending cleanup is empty.
UNIT_ASSERT(!state.HasPendingCleanup("disk-1"));

// Manual replacements also does not add to pending cleanup since the
// source device state is changing to ERROR.
replaceDevice("disk-1", "uuid-1.2", "uuid-1.4", true /* manual */);
UNIT_ASSERT(!state.HasPendingCleanup("disk-1"));
checkDevices(
"disk-1",
{"uuid-1.3", "uuid-1.4"},
{"uuid-1.1", "uuid-1.2"},
{"uuid-1.1"});

moveDeviceToState("uuid-2.1", NProto::DEVICE_STATE_WARNING);

// Start a migration.
executor.WriteTx(
[&](TDiskRegistryDatabase db)
{
const auto migrations = state.BuildMigrationList();
UNIT_ASSERT_VALUES_EQUAL(1, migrations.size());

UNIT_ASSERT_VALUES_EQUAL("disk-2", migrations[0].DiskId);
UNIT_ASSERT_VALUES_EQUAL(
"uuid-2.1",
migrations[0].SourceDeviceId);

auto r =
state.StartDeviceMigration(Now(), db, "disk-2", "uuid-2.1");
UNIT_ASSERT_SUCCESS(r.GetError());
UNIT_ASSERT_VALUES_EQUAL(
"uuid-2.3",
r.GetResult().GetDeviceUUID());
});
UNIT_ASSERT(!state.HasPendingCleanup("disk-2"));

// Finish the migration.
executor.WriteTx(
[&](TDiskRegistryDatabase db) mutable
{
bool updated = false;
const auto error = state.FinishDeviceMigration(
db,
"disk-2",
"uuid-2.1",
"uuid-2.3",
Now(),
&updated);

UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode());
UNIT_ASSERT(updated);
});
UNIT_ASSERT(!state.HasPendingCleanup("disk-2"));

executor.WriteTx(
[&](TDiskRegistryDatabase db)
{
auto disksToReallocate = state.GetDisksToReallocate();
UNIT_ASSERT_VALUES_EQUAL(2, disksToReallocate.size());
for (auto& [diskId, seqNo]: disksToReallocate) {
state.DeleteDiskToReallocate(db, diskId, seqNo);
}
});
UNIT_ASSERT(state.HasPendingCleanup("disk-2"));
}

Y_UNIT_TEST(ShouldPreserveDeviceErrorState)
{
const TString errorMessage = "broken device";
Expand Down
Loading