Changeset View
Changeset View
Standalone View
Standalone View
sys/contrib/openzfs/module/zfs/vdev_raidz.c
Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | |||||
{ \ | { \ | ||||
VDEV_RAIDZ_64MUL_2((x), mask); \ | VDEV_RAIDZ_64MUL_2((x), mask); \ | ||||
VDEV_RAIDZ_64MUL_2((x), mask); \ | VDEV_RAIDZ_64MUL_2((x), mask); \ | ||||
} | } | ||||
static void | static void | ||||
vdev_raidz_row_free(raidz_row_t *rr) | vdev_raidz_row_free(raidz_row_t *rr) | ||||
{ | { | ||||
int c; | for (int c = 0; c < rr->rr_cols; c++) { | ||||
raidz_col_t *rc = &rr->rr_col[c]; | |||||
for (c = 0; c < rr->rr_firstdatacol && c < rr->rr_cols; c++) { | if (rc->rc_size != 0) | ||||
abd_free(rr->rr_col[c].rc_abd); | abd_free(rc->rc_abd); | ||||
if (rc->rc_gdata != NULL) | |||||
if (rr->rr_col[c].rc_gdata != NULL) { | abd_free(rc->rc_gdata); | ||||
abd_free(rr->rr_col[c].rc_gdata); | if (rc->rc_orig_data != NULL) | ||||
zio_buf_free(rc->rc_orig_data, rc->rc_size); | |||||
} | } | ||||
if (rr->rr_col[c].rc_orig_data != NULL) { | |||||
zio_buf_free(rr->rr_col[c].rc_orig_data, | |||||
rr->rr_col[c].rc_size); | |||||
} | |||||
} | |||||
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { | |||||
if (rr->rr_col[c].rc_size != 0) { | |||||
if (abd_is_gang(rr->rr_col[c].rc_abd)) | |||||
abd_free(rr->rr_col[c].rc_abd); | |||||
else | |||||
abd_put(rr->rr_col[c].rc_abd); | |||||
} | |||||
if (rr->rr_col[c].rc_orig_data != NULL) { | |||||
zio_buf_free(rr->rr_col[c].rc_orig_data, | |||||
rr->rr_col[c].rc_size); | |||||
} | |||||
} | |||||
if (rr->rr_abd_copy != NULL) | if (rr->rr_abd_copy != NULL) | ||||
abd_free(rr->rr_abd_copy); | abd_free(rr->rr_abd_copy); | ||||
if (rr->rr_abd_empty != NULL) | if (rr->rr_abd_empty != NULL) | ||||
abd_free(rr->rr_abd_empty); | abd_free(rr->rr_abd_empty); | ||||
kmem_free(rr, offsetof(raidz_row_t, rr_col[rr->rr_scols])); | kmem_free(rr, offsetof(raidz_row_t, rr_col[rr->rr_scols])); | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | if (rr->rr_col[0].rc_gdata == NULL) { | ||||
rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata = | rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata = | ||||
abd_alloc_sametype(rr->rr_col[x].rc_abd, | abd_alloc_sametype(rr->rr_col[x].rc_abd, | ||||
rr->rr_col[x].rc_size); | rr->rr_col[x].rc_size); | ||||
} | } | ||||
/* fill in the data columns from good_data */ | /* fill in the data columns from good_data */ | ||||
offset = 0; | offset = 0; | ||||
for (; x < rr->rr_cols; x++) { | for (; x < rr->rr_cols; x++) { | ||||
abd_put(rr->rr_col[x].rc_abd); | abd_free(rr->rr_col[x].rc_abd); | ||||
rr->rr_col[x].rc_abd = | rr->rr_col[x].rc_abd = | ||||
abd_get_offset_size((abd_t *)good_data, | abd_get_offset_size((abd_t *)good_data, | ||||
offset, rr->rr_col[x].rc_size); | offset, rr->rr_col[x].rc_size); | ||||
offset += rr->rr_col[x].rc_size; | offset += rr->rr_col[x].rc_size; | ||||
} | } | ||||
/* | /* | ||||
* Construct the parity from the good data. | * Construct the parity from the good data. | ||||
*/ | */ | ||||
vdev_raidz_generate_parity_row(rm, rr); | vdev_raidz_generate_parity_row(rm, rr); | ||||
/* restore everything back to its original state */ | /* restore everything back to its original state */ | ||||
for (x = 0; x < rr->rr_firstdatacol; x++) | for (x = 0; x < rr->rr_firstdatacol; x++) | ||||
rr->rr_col[x].rc_abd = bad_parity[x]; | rr->rr_col[x].rc_abd = bad_parity[x]; | ||||
offset = 0; | offset = 0; | ||||
for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) { | for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) { | ||||
abd_put(rr->rr_col[x].rc_abd); | abd_free(rr->rr_col[x].rc_abd); | ||||
rr->rr_col[x].rc_abd = abd_get_offset_size( | rr->rr_col[x].rc_abd = abd_get_offset_size( | ||||
rr->rr_abd_copy, offset, | rr->rr_abd_copy, offset, | ||||
rr->rr_col[x].rc_size); | rr->rr_col[x].rc_size); | ||||
offset += rr->rr_col[x].rc_size; | offset += rr->rr_col[x].rc_size; | ||||
} | } | ||||
} | } | ||||
ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL); | ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL); | ||||
good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0, | good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0, | ||||
rr->rr_col[c].rc_size); | rr->rr_col[c].rc_size); | ||||
} else { | } else { | ||||
/* adjust good_data to point at the start of our column */ | /* adjust good_data to point at the start of our column */ | ||||
offset = 0; | offset = 0; | ||||
for (x = rr->rr_firstdatacol; x < c; x++) | for (x = rr->rr_firstdatacol; x < c; x++) | ||||
offset += rr->rr_col[x].rc_size; | offset += rr->rr_col[x].rc_size; | ||||
good = abd_get_offset_size((abd_t *)good_data, offset, | good = abd_get_offset_size((abd_t *)good_data, offset, | ||||
rr->rr_col[c].rc_size); | rr->rr_col[c].rc_size); | ||||
} | } | ||||
/* we drop the ereport if it ends up that the data was good */ | /* we drop the ereport if it ends up that the data was good */ | ||||
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); | zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); | ||||
abd_put((abd_t *)good); | abd_free((abd_t *)good); | ||||
} | } | ||||
/* | /* | ||||
* Invoked indirectly by zfs_ereport_start_checksum(), called | * Invoked indirectly by zfs_ereport_start_checksum(), called | ||||
* below when our read operation fails completely. The main point | * below when our read operation fails completely. The main point | ||||
* is to keep a copy of everything we read from disk, so that at | * is to keep a copy of everything we read from disk, so that at | ||||
* vdev_raidz_cksum_finish() time we can compare it with the good data. | * vdev_raidz_cksum_finish() time we can compare it with the good data. | ||||
*/ | */ | ||||
Show All 36 Lines | for (int i = 0; i < rm->rm_nrows; i++) { | ||||
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { | for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) { | ||||
raidz_col_t *col = &rr->rr_col[c]; | raidz_col_t *col = &rr->rr_col[c]; | ||||
abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy, | abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy, | ||||
offset, col->rc_size); | offset, col->rc_size); | ||||
abd_copy(tmp, col->rc_abd, col->rc_size); | abd_copy(tmp, col->rc_abd, col->rc_size); | ||||
abd_put(col->rc_abd); | abd_free(col->rc_abd); | ||||
col->rc_abd = tmp; | col->rc_abd = tmp; | ||||
offset += col->rc_size; | offset += col->rc_size; | ||||
} | } | ||||
ASSERT3U(offset, ==, size); | ASSERT3U(offset, ==, size); | ||||
} | } | ||||
} | } | ||||
Show All 18 Lines | vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols, | ||||
uint64_t b = zio->io_offset >> ashift; | uint64_t b = zio->io_offset >> ashift; | ||||
/* The zio's size in units of the vdev's minimum sector size. */ | /* The zio's size in units of the vdev's minimum sector size. */ | ||||
uint64_t s = zio->io_size >> ashift; | uint64_t s = zio->io_size >> ashift; | ||||
/* The first column for this stripe. */ | /* The first column for this stripe. */ | ||||
uint64_t f = b % dcols; | uint64_t f = b % dcols; | ||||
/* The starting byte offset on each child vdev. */ | /* The starting byte offset on each child vdev. */ | ||||
uint64_t o = (b / dcols) << ashift; | uint64_t o = (b / dcols) << ashift; | ||||
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; | uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; | ||||
uint64_t off = 0; | |||||
raidz_map_t *rm = | raidz_map_t *rm = | ||||
kmem_zalloc(offsetof(raidz_map_t, rm_row[1]), KM_SLEEP); | kmem_zalloc(offsetof(raidz_map_t, rm_row[1]), KM_SLEEP); | ||||
rm->rm_nrows = 1; | rm->rm_nrows = 1; | ||||
/* | /* | ||||
* "Quotient": The number of data sectors for this stripe on all but | * "Quotient": The number of data sectors for this stripe on all but | ||||
* the "big column" child vdevs that also contain "remainder" data. | * the "big column" child vdevs that also contain "remainder" data. | ||||
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines | #endif | ||||
ASSERT3U(asize, ==, tot << ashift); | ASSERT3U(asize, ==, tot << ashift); | ||||
rm->rm_nskip = roundup(tot, nparity + 1) - tot; | rm->rm_nskip = roundup(tot, nparity + 1) - tot; | ||||
rm->rm_skipstart = bc; | rm->rm_skipstart = bc; | ||||
for (c = 0; c < rr->rr_firstdatacol; c++) | for (c = 0; c < rr->rr_firstdatacol; c++) | ||||
rr->rr_col[c].rc_abd = | rr->rr_col[c].rc_abd = | ||||
abd_alloc_linear(rr->rr_col[c].rc_size, B_FALSE); | abd_alloc_linear(rr->rr_col[c].rc_size, B_FALSE); | ||||
rr->rr_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0, | for (uint64_t off = 0; c < acols; c++) { | ||||
rr->rr_col[c].rc_size); | |||||
off = rr->rr_col[c].rc_size; | |||||
for (c = c + 1; c < acols; c++) { | |||||
raidz_col_t *rc = &rr->rr_col[c]; | raidz_col_t *rc = &rr->rr_col[c]; | ||||
rc->rc_abd = abd_get_offset_size(zio->io_abd, off, rc->rc_size); | rc->rc_abd = abd_get_offset_struct(&rc->rc_abdstruct, | ||||
zio->io_abd, off, rc->rc_size); | |||||
off += rc->rc_size; | off += rc->rc_size; | ||||
} | } | ||||
/* | /* | ||||
* If all data stored spans all columns, there's a danger that parity | * If all data stored spans all columns, there's a danger that parity | ||||
* will always be on the same device and, since parity isn't read | * will always be on the same device and, since parity isn't read | ||||
* during normal operation, that device's I/O bandwidth won't be | * during normal operation, that device's I/O bandwidth won't be | ||||
* used effectively. We therefore switch the parity every 1MB. | * used effectively. We therefore switch the parity every 1MB. | ||||
▲ Show 20 Lines • Show All 2,272 Lines • Show Last 20 Lines |