Skip to content

Commit

Permalink
Merge pull request #66 from lorelei-sakai/feature/mitigate-old-recove…
Browse files Browse the repository at this point in the history
…ry-journal-issues

[VDO-5742] Mitigate old recovery journal issues
  • Loading branch information
lorelei-sakai authored Aug 10, 2024
2 parents ff2ae07 + ba71222 commit 3c4561e
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 22 deletions.
24 changes: 23 additions & 1 deletion drivers/md/dm-vdo/dm-vdo-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -2296,6 +2296,14 @@ static void handle_load_error(struct vdo_completion *completion)
return;
}

if ((completion->result == VDO_UNSUPPORTED_VERSION) &&
(vdo->admin.phase == LOAD_PHASE_MAKE_DIRTY)) {
vdo_log_error("Aborting load due to unsupported version");
vdo->admin.phase = LOAD_PHASE_FINISHED;
load_callback(completion);
return;
}

vdo_log_error_strerror(completion->result,
"Entering read-only mode due to load error");
vdo->admin.phase = LOAD_PHASE_WAIT_FOR_READ_ONLY;
Expand Down Expand Up @@ -2740,6 +2748,19 @@ static int vdo_preresume_registered(struct dm_target *ti, struct vdo *vdo)
vdo_log_info("starting device '%s'", device_name);
result = perform_admin_operation(vdo, LOAD_PHASE_START, load_callback,
handle_load_error, "load");
if (result == VDO_UNSUPPORTED_VERSION) {
/*
* A component version is not supported. This can happen when the
* recovery journal metadata is in an old version format. Abort the
* load without saving the state.
*/
vdo->suspend_type = VDO_ADMIN_STATE_SUSPENDING;
perform_admin_operation(vdo, SUSPEND_PHASE_START,
suspend_callback, suspend_callback,
"suspend");
return result;
}

if ((result != VDO_SUCCESS) && (result != VDO_READ_ONLY)) {
/*
* Something has gone very wrong. Make sure everything has drained and
Expand Down Expand Up @@ -2811,7 +2832,8 @@ static int vdo_preresume(struct dm_target *ti)

vdo_register_thread_device_id(&instance_thread, &vdo->instance);
result = vdo_preresume_registered(ti, vdo);
if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE))
if ((result == VDO_PARAMETER_MISMATCH) || (result == VDO_INVALID_ADMIN_STATE) ||
(result == VDO_UNSUPPORTED_VERSION))
result = -EINVAL;
vdo_unregister_thread_device_id();
return vdo_status_to_errno(result);
Expand Down
41 changes: 22 additions & 19 deletions drivers/md/dm-vdo/repair.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,17 +1201,14 @@ static bool __must_check is_valid_recovery_journal_block(const struct recovery_j
* @journal: The journal to use.
* @header: The unpacked block header to check.
* @sequence: The expected sequence number.
* @type: The expected metadata type.
*
* Return: True if the block matches.
*/
static bool __must_check is_exact_recovery_journal_block(const struct recovery_journal *journal,
const struct recovery_block_header *header,
sequence_number_t sequence,
enum vdo_metadata_type type)
sequence_number_t sequence)
{
return ((header->metadata_type == type) &&
(header->sequence_number == sequence) &&
return ((header->sequence_number == sequence) &&
(is_valid_recovery_journal_block(journal, header, true)));
}

Expand Down Expand Up @@ -1370,7 +1367,8 @@ static void extract_entries_from_block(struct repair_completion *repair,
get_recovery_journal_block_header(journal, repair->journal_data,
sequence);

if (!is_exact_recovery_journal_block(journal, &header, sequence, format)) {
if (!is_exact_recovery_journal_block(journal, &header, sequence) ||
(header.metadata_type != format)) {
/* This block is invalid, so skip it. */
return;
}
Expand Down Expand Up @@ -1556,10 +1554,13 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
sequence_number_t i, head;
bool found_entries = false;
struct recovery_journal *journal = repair->completion.vdo->recovery_journal;
struct recovery_block_header header;
enum vdo_metadata_type expected_format;

head = min(repair->block_map_head, repair->slab_journal_head);
header = get_recovery_journal_block_header(journal, repair->journal_data, head);
expected_format = header.metadata_type;
for (i = head; i <= repair->highest_tail; i++) {
struct recovery_block_header header;
journal_entry_count_t block_entries;
u8 j;

Expand All @@ -1571,19 +1572,15 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
};

header = get_recovery_journal_block_header(journal, repair->journal_data, i);
if (header.metadata_type == VDO_METADATA_RECOVERY_JOURNAL) {
/* This is an old format block, so we need to upgrade */
vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
"Recovery journal is in the old format, a read-only rebuild is required.");
vdo_enter_read_only_mode(repair->completion.vdo,
VDO_UNSUPPORTED_VERSION);
return VDO_UNSUPPORTED_VERSION;
}

if (!is_exact_recovery_journal_block(journal, &header, i,
VDO_METADATA_RECOVERY_JOURNAL_2)) {
if (!is_exact_recovery_journal_block(journal, &header, i)) {
/* A bad block header was found so this must be the end of the journal. */
break;
} else if (header.metadata_type != expected_format) {
/* There is a mix of old and new format blocks, so we need to rebuild. */
vdo_log_error_strerror(VDO_CORRUPT_JOURNAL,
"Recovery journal is in an invalid format, a read-only rebuild is required.");
vdo_enter_read_only_mode(repair->completion.vdo, VDO_CORRUPT_JOURNAL);
return VDO_CORRUPT_JOURNAL;
}

block_entries = header.entry_count;
Expand Down Expand Up @@ -1619,8 +1616,14 @@ static int parse_journal_for_recovery(struct repair_completion *repair)
break;
}

if (!found_entries)
if (!found_entries) {
return validate_heads(repair);
} else if (expected_format == VDO_METADATA_RECOVERY_JOURNAL) {
/* All journal blocks have the old format, so we need to upgrade. */
vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
"Recovery journal is in the old format. Downgrade and complete recovery, then upgrade with a clean volume");
return VDO_UNSUPPORTED_VERSION;
}

/* Set the tail to the last valid tail block, if there is one. */
if (repair->tail_recovery_point.sector_count == 0)
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/dm-vdo/status-codes.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const struct error_info vdo_status_list[] = {
{ "VDO_LOCK_ERROR", "A lock is held incorrectly" },
{ "VDO_READ_ONLY", "The device is in read-only mode" },
{ "VDO_SHUTTING_DOWN", "The device is shutting down" },
{ "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" },
{ "VDO_CORRUPT_JOURNAL", "Recovery journal corrupted" },
{ "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" },
{ "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" },
{ "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" },
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/dm-vdo/status-codes.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ enum vdo_status_codes {
VDO_READ_ONLY,
/* the VDO is shutting down */
VDO_SHUTTING_DOWN,
/* the recovery journal has corrupt entries */
/* the recovery journal has corrupt entries or corrupt metadata */
VDO_CORRUPT_JOURNAL,
/* exceeds maximum number of slabs supported */
VDO_TOO_MANY_SLABS,
Expand Down

0 comments on commit 3c4561e

Please sign in to comment.