Skip to content

Commit

Permalink
Detect a slow raidz child during reads
Browse files Browse the repository at this point in the history
If it's an persistent outlier then sit out reads for a period.

Signed-off-by: Don Brady <[email protected]>
  • Loading branch information
don-brady committed Nov 20, 2024
1 parent ff3df12 commit e786f99
Show file tree
Hide file tree
Showing 6 changed files with 367 additions and 2 deletions.
9 changes: 8 additions & 1 deletion cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -7374,6 +7374,13 @@ ztest_run_zdb(uint64_t guid)
char *set_gvars_args_joined = join_strings(set_gvars_args, " ");
free(set_gvars_args);

printf("%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64,
bin,
ztest_opts.zo_verbose >= 3 ? "s" : "",
ztest_opts.zo_verbose >= 4 ? "v" : "",
set_gvars_args_joined,
ztest_opts.zo_dir,
guid);
size_t would = snprintf(zdb, len,
"%s -bcc%s%s -G -d -Y -e -y %s -p %s %"PRIu64,
bin,
Expand All @@ -7386,7 +7393,7 @@ ztest_run_zdb(uint64_t guid)

umem_free(set_gvars_args_joined, strlen(set_gvars_args_joined) + 1);

if (ztest_opts.zo_verbose >= 5)
// if (ztest_opts.zo_verbose >= 5)
(void) printf("Executing %s\n", zdb);

fp = popen(zdb, "r");
Expand Down
2 changes: 2 additions & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,8 @@ struct vdev {
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
uint64_t vdev_expansion_time; /* vdev's last expansion time */
uint64_t vdev_outlier_count; /* peer outlier events */
hrtime_t vdev_read_sit_out_expire;
list_node_t vdev_leaf_node; /* leaf vdev list */

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_raidz.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
void vdev_raidz_reflow_copy_scratch(spa_t *);
void raidz_dtl_reassessed(vdev_t *);
boolean_t vdev_skip_latency_outlier(vdev_t *, zio_flag_t);

extern const zio_vsd_ops_t vdev_raidz_vsd_ops;

Expand Down
6 changes: 6 additions & 0 deletions include/sys/vdev_raidz_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ typedef struct raidz_impl_ops {
char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
} raidz_impl_ops_t;

typedef struct raidz_row raidz_row_t;
typedef struct raidz_col raidz_col_t;

typedef struct raidz_col {
int rc_devidx; /* child device index for I/O */
Expand All @@ -121,6 +123,9 @@ typedef struct raidz_col {
int rc_shadow_devidx; /* for double write during expansion */
int rc_shadow_error; /* for double write during expansion */
uint64_t rc_shadow_offset; /* for double write during expansion */
uint64_t rc_latency_val; /* leaf device scaled latency value */
raidz_col_t *rc_latency_next; /* sorted next column in list */
raidz_row_t *rc_raidz_row;
} raidz_col_t;

typedef struct raidz_row {
Expand All @@ -132,6 +137,7 @@ typedef struct raidz_row {
int rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
raidz_col_t *rr_latency_list_start; /* head of child latency columns */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
Expand Down
19 changes: 19 additions & 0 deletions module/zfs/vdev_draid.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,6 +1048,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,

rc->rc_devidx = vdev_draid_permute_id(vdc, base, iter, c);
rc->rc_offset = physical_offset;
rc->rc_raidz_row = rr;

if (q == 0 && i >= bc)
rc->rc_size = 0;
Expand Down Expand Up @@ -1889,6 +1890,17 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
/* Sequential rebuild must do IO at redundancy group boundary. */
IMPLY(zio->io_priority == ZIO_PRIORITY_REBUILD, rr->rr_nempty == 0);

/*
* Calculate how much parity is available for sitting out reads
*/
int parity_avail = rr->rr_firstdatacol;
for (int p = 0; p < rr->rr_firstdatacol; p++) {
raidz_col_t *rc = &rr->rr_col[p];
if (!vdev_draid_readable(vd->vdev_child[rc->rc_devidx],
rc->rc_offset)) {
parity_avail--;
}
}
/*
* Iterate over the columns in reverse order so that we hit the parity
* last. Any errors along the way will force us to read the parity.
Expand Down Expand Up @@ -1993,6 +2005,13 @@ vdev_draid_io_start_read(zio_t *zio, raidz_row_t *rr)
rc->rc_force_repair = 1;
rc->rc_allow_repair = 1;
}
} else if (parity_avail > 0 && c >= rr->rr_firstdatacol &&
rr->rr_missingdata == 0 &&
vdev_skip_latency_outlier(cvd, zio->io_flags)) {
rr->rr_missingdata++;
rc->rc_error = SET_ERROR(EAGAIN);
rc->rc_skipped = 1;
continue;
}
}

Expand Down
Loading

0 comments on commit e786f99

Please sign in to comment.