Changeset View
Changeset View
Standalone View
Standalone View
sys/contrib/openzfs/module/zfs/vdev.c
Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
#include <sys/dsl_scan.h> | #include <sys/dsl_scan.h> | ||||
#include <sys/vdev_raidz.h> | #include <sys/vdev_raidz.h> | ||||
#include <sys/abd.h> | #include <sys/abd.h> | ||||
#include <sys/vdev_initialize.h> | #include <sys/vdev_initialize.h> | ||||
#include <sys/vdev_trim.h> | #include <sys/vdev_trim.h> | ||||
#include <sys/zvol.h> | #include <sys/zvol.h> | ||||
#include <sys/zfs_ratelimit.h> | #include <sys/zfs_ratelimit.h> | ||||
/* | |||||
* One metaslab from each (normal-class) vdev is used by the ZIL. These are | |||||
* called "embedded slog metaslabs", are referenced by vdev_log_mg, and are | |||||
* part of the spa_embedded_log_class. The metaslab with the most free space | |||||
* in each vdev is selected for this purpose when the pool is opened (or a | |||||
* vdev is added). See vdev_metaslab_init(). | |||||
* | |||||
* Log blocks can be allocated from the following locations. Each one is tried | |||||
* in order until the allocation succeeds: | |||||
* 1. dedicated log vdevs, aka "slog" (spa_log_class) | |||||
* 2. embedded slog metaslabs (spa_embedded_log_class) | |||||
* 3. other metaslabs in normal vdevs (spa_normal_class) | |||||
* | |||||
* zfs_embedded_slog_min_ms disables the embedded slog if there are fewer | |||||
* than this number of metaslabs in the vdev. This ensures that we don't set | |||||
* aside an unreasonable amount of space for the ZIL. If set to less than | |||||
* 1 << (spa_slop_shift + 1), on small pools the usable space may be reduced | |||||
* (by more than 1<<spa_slop_shift) due to the embedded slog metaslab. | |||||
*/ | |||||
int zfs_embedded_slog_min_ms = 64; | |||||
/* default target for number of metaslabs per top-level vdev */ | /* default target for number of metaslabs per top-level vdev */ | ||||
int zfs_vdev_default_ms_count = 200; | int zfs_vdev_default_ms_count = 200; | ||||
/* minimum number of metaslabs per top-level vdev */ | /* minimum number of metaslabs per top-level vdev */ | ||||
int zfs_vdev_min_ms_count = 16; | int zfs_vdev_min_ms_count = 16; | ||||
/* practical upper limit of total metaslabs per top-level vdev */ | /* practical upper limit of total metaslabs per top-level vdev */ | ||||
int zfs_vdev_ms_count_limit = 1ULL << 17; | int zfs_vdev_ms_count_limit = 1ULL << 17; | ||||
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines | vdev_getops(const char *type) | ||||
for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) | for (opspp = vdev_ops_table; (ops = *opspp) != NULL; opspp++) | ||||
if (strcmp(ops->vdev_op_type, type) == 0) | if (strcmp(ops->vdev_op_type, type) == 0) | ||||
break; | break; | ||||
return (ops); | return (ops); | ||||
} | } | ||||
/* | |||||
* Given a vdev and a metaslab class, find which metaslab group we're | |||||
* interested in. All vdevs may belong to two different metaslab classes. | |||||
* Dedicated slog devices use only the primary metaslab group, rather than a | |||||
* separate log group. For embedded slogs, the vdev_log_mg will be non-NULL. | |||||
*/ | |||||
metaslab_group_t * | |||||
vdev_get_mg(vdev_t *vd, metaslab_class_t *mc) | |||||
{ | |||||
if (mc == spa_embedded_log_class(vd->vdev_spa) && | |||||
vd->vdev_log_mg != NULL) | |||||
return (vd->vdev_log_mg); | |||||
else | |||||
return (vd->vdev_mg); | |||||
} | |||||
/* ARGSUSED */ | /* ARGSUSED */ | ||||
void | void | ||||
vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs, | vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs, | ||||
range_seg64_t *physical_rs, range_seg64_t *remain_rs) | range_seg64_t *physical_rs, range_seg64_t *remain_rs) | ||||
{ | { | ||||
physical_rs->rs_start = logical_rs->rs_start; | physical_rs->rs_start = logical_rs->rs_start; | ||||
physical_rs->rs_end = logical_rs->rs_end; | physical_rs->rs_end = logical_rs->rs_end; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 739 Lines • ▼ Show 20 Lines | vdev_free(vdev_t *vd) | ||||
/* | /* | ||||
* Discard allocation state. | * Discard allocation state. | ||||
*/ | */ | ||||
if (vd->vdev_mg != NULL) { | if (vd->vdev_mg != NULL) { | ||||
vdev_metaslab_fini(vd); | vdev_metaslab_fini(vd); | ||||
metaslab_group_destroy(vd->vdev_mg); | metaslab_group_destroy(vd->vdev_mg); | ||||
vd->vdev_mg = NULL; | vd->vdev_mg = NULL; | ||||
} | } | ||||
if (vd->vdev_log_mg != NULL) { | |||||
ASSERT0(vd->vdev_ms_count); | |||||
metaslab_group_destroy(vd->vdev_log_mg); | |||||
vd->vdev_log_mg = NULL; | |||||
} | |||||
ASSERT0(vd->vdev_stat.vs_space); | ASSERT0(vd->vdev_stat.vs_space); | ||||
ASSERT0(vd->vdev_stat.vs_dspace); | ASSERT0(vd->vdev_stat.vs_dspace); | ||||
ASSERT0(vd->vdev_stat.vs_alloc); | ASSERT0(vd->vdev_stat.vs_alloc); | ||||
/* | /* | ||||
* Remove this vdev from its parent's child list. | * Remove this vdev from its parent's child list. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | vdev_top_transfer(vdev_t *svd, vdev_t *tvd) | ||||
svd->vdev_ms_array = 0; | svd->vdev_ms_array = 0; | ||||
svd->vdev_ms_shift = 0; | svd->vdev_ms_shift = 0; | ||||
svd->vdev_ms_count = 0; | svd->vdev_ms_count = 0; | ||||
svd->vdev_top_zap = 0; | svd->vdev_top_zap = 0; | ||||
if (tvd->vdev_mg) | if (tvd->vdev_mg) | ||||
ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg); | ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg); | ||||
if (tvd->vdev_log_mg) | |||||
ASSERT3P(tvd->vdev_log_mg, ==, svd->vdev_log_mg); | |||||
tvd->vdev_mg = svd->vdev_mg; | tvd->vdev_mg = svd->vdev_mg; | ||||
tvd->vdev_log_mg = svd->vdev_log_mg; | |||||
tvd->vdev_ms = svd->vdev_ms; | tvd->vdev_ms = svd->vdev_ms; | ||||
svd->vdev_mg = NULL; | svd->vdev_mg = NULL; | ||||
svd->vdev_log_mg = NULL; | |||||
svd->vdev_ms = NULL; | svd->vdev_ms = NULL; | ||||
if (tvd->vdev_mg != NULL) | if (tvd->vdev_mg != NULL) | ||||
tvd->vdev_mg->mg_vd = tvd; | tvd->vdev_mg->mg_vd = tvd; | ||||
if (tvd->vdev_log_mg != NULL) | |||||
tvd->vdev_log_mg->mg_vd = tvd; | |||||
tvd->vdev_checkpoint_sm = svd->vdev_checkpoint_sm; | tvd->vdev_checkpoint_sm = svd->vdev_checkpoint_sm; | ||||
svd->vdev_checkpoint_sm = NULL; | svd->vdev_checkpoint_sm = NULL; | ||||
tvd->vdev_alloc_bias = svd->vdev_alloc_bias; | tvd->vdev_alloc_bias = svd->vdev_alloc_bias; | ||||
svd->vdev_alloc_bias = VDEV_BIAS_NONE; | svd->vdev_alloc_bias = VDEV_BIAS_NONE; | ||||
tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; | tvd->vdev_stat.vs_alloc = svd->vdev_stat.vs_alloc; | ||||
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines | vdev_remove_parent(vdev_t *cvd) | ||||
if (cvd == cvd->vdev_top) | if (cvd == cvd->vdev_top) | ||||
vdev_top_transfer(mvd, cvd); | vdev_top_transfer(mvd, cvd); | ||||
ASSERT(mvd->vdev_children == 0); | ASSERT(mvd->vdev_children == 0); | ||||
vdev_free(mvd); | vdev_free(mvd); | ||||
} | } | ||||
static void | void | ||||
vdev_metaslab_group_create(vdev_t *vd) | vdev_metaslab_group_create(vdev_t *vd) | ||||
{ | { | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
/* | /* | ||||
* metaslab_group_create was delayed until allocation bias was available | * metaslab_group_create was delayed until allocation bias was available | ||||
*/ | */ | ||||
if (vd->vdev_mg == NULL) { | if (vd->vdev_mg == NULL) { | ||||
Show All 17 Lines | case VDEV_BIAS_DEDUP: | ||||
break; | break; | ||||
default: | default: | ||||
mc = spa_normal_class(spa); | mc = spa_normal_class(spa); | ||||
} | } | ||||
vd->vdev_mg = metaslab_group_create(mc, vd, | vd->vdev_mg = metaslab_group_create(mc, vd, | ||||
spa->spa_alloc_count); | spa->spa_alloc_count); | ||||
if (!vd->vdev_islog) { | |||||
vd->vdev_log_mg = metaslab_group_create( | |||||
spa_embedded_log_class(spa), vd, 1); | |||||
} | |||||
/* | /* | ||||
* The spa ashift min/max only apply for the normal metaslab | * The spa ashift min/max only apply for the normal metaslab | ||||
* class. Class destination is late binding so ashift boundry | * class. Class destination is late binding so ashift boundry | ||||
* setting had to wait until now. | * setting had to wait until now. | ||||
*/ | */ | ||||
if (vd->vdev_top == vd && vd->vdev_ashift != 0 && | if (vd->vdev_top == vd && vd->vdev_ashift != 0 && | ||||
mc == spa_normal_class(spa) && vd->vdev_aux == NULL) { | mc == spa_normal_class(spa) && vd->vdev_aux == NULL) { | ||||
if (vd->vdev_ashift > spa->spa_max_ashift) | if (vd->vdev_ashift > spa->spa_max_ashift) | ||||
spa->spa_max_ashift = vd->vdev_ashift; | spa->spa_max_ashift = vd->vdev_ashift; | ||||
if (vd->vdev_ashift < spa->spa_min_ashift) | if (vd->vdev_ashift < spa->spa_min_ashift) | ||||
spa->spa_min_ashift = vd->vdev_ashift; | spa->spa_min_ashift = vd->vdev_ashift; | ||||
uint64_t min_alloc = vdev_get_min_alloc(vd); | uint64_t min_alloc = vdev_get_min_alloc(vd); | ||||
if (min_alloc < spa->spa_min_alloc) | if (min_alloc < spa->spa_min_alloc) | ||||
spa->spa_min_alloc = min_alloc; | spa->spa_min_alloc = min_alloc; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
int | int | ||||
vdev_metaslab_init(vdev_t *vd, uint64_t txg) | vdev_metaslab_init(vdev_t *vd, uint64_t txg) | ||||
{ | { | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
objset_t *mos = spa->spa_meta_objset; | |||||
uint64_t m; | |||||
uint64_t oldc = vd->vdev_ms_count; | uint64_t oldc = vd->vdev_ms_count; | ||||
uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; | uint64_t newc = vd->vdev_asize >> vd->vdev_ms_shift; | ||||
metaslab_t **mspp; | metaslab_t **mspp; | ||||
int error; | int error; | ||||
boolean_t expanding = (oldc != 0); | boolean_t expanding = (oldc != 0); | ||||
ASSERT(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER)); | ASSERT(txg == 0 || spa_config_held(spa, SCL_ALLOC, RW_WRITER)); | ||||
Show All 11 Lines | vdev_metaslab_init(vdev_t *vd, uint64_t txg) | ||||
if (expanding) { | if (expanding) { | ||||
bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); | bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); | ||||
vmem_free(vd->vdev_ms, oldc * sizeof (*mspp)); | vmem_free(vd->vdev_ms, oldc * sizeof (*mspp)); | ||||
} | } | ||||
vd->vdev_ms = mspp; | vd->vdev_ms = mspp; | ||||
vd->vdev_ms_count = newc; | vd->vdev_ms_count = newc; | ||||
for (m = oldc; m < newc; m++) { | |||||
uint64_t object = 0; | |||||
for (uint64_t m = oldc; m < newc; m++) { | |||||
uint64_t object = 0; | |||||
/* | /* | ||||
* vdev_ms_array may be 0 if we are creating the "fake" | * vdev_ms_array may be 0 if we are creating the "fake" | ||||
* metaslabs for an indirect vdev for zdb's leak detection. | * metaslabs for an indirect vdev for zdb's leak detection. | ||||
* See zdb_leak_init(). | * See zdb_leak_init(). | ||||
*/ | */ | ||||
if (txg == 0 && vd->vdev_ms_array != 0) { | if (txg == 0 && vd->vdev_ms_array != 0) { | ||||
error = dmu_read(mos, vd->vdev_ms_array, | error = dmu_read(spa->spa_meta_objset, | ||||
vd->vdev_ms_array, | |||||
m * sizeof (uint64_t), sizeof (uint64_t), &object, | m * sizeof (uint64_t), sizeof (uint64_t), &object, | ||||
DMU_READ_PREFETCH); | DMU_READ_PREFETCH); | ||||
if (error != 0) { | if (error != 0) { | ||||
vdev_dbgmsg(vd, "unable to read the metaslab " | vdev_dbgmsg(vd, "unable to read the metaslab " | ||||
"array [error=%d]", error); | "array [error=%d]", error); | ||||
return (error); | return (error); | ||||
} | } | ||||
} | } | ||||
#ifndef _KERNEL | |||||
/* | |||||
* To accommodate zdb_leak_init() fake indirect | |||||
* metaslabs, we allocate a metaslab group for | |||||
* indirect vdevs which normally don't have one. | |||||
*/ | |||||
if (vd->vdev_mg == NULL) { | |||||
ASSERT0(vdev_is_concrete(vd)); | |||||
vdev_metaslab_group_create(vd); | |||||
} | |||||
#endif | |||||
error = metaslab_init(vd->vdev_mg, m, object, txg, | error = metaslab_init(vd->vdev_mg, m, object, txg, | ||||
&(vd->vdev_ms[m])); | &(vd->vdev_ms[m])); | ||||
if (error != 0) { | if (error != 0) { | ||||
vdev_dbgmsg(vd, "metaslab_init failed [error=%d]", | vdev_dbgmsg(vd, "metaslab_init failed [error=%d]", | ||||
error); | error); | ||||
return (error); | return (error); | ||||
} | } | ||||
} | } | ||||
/* | |||||
* Find the emptiest metaslab on the vdev and mark it for use for | |||||
* embedded slog by moving it from the regular to the log metaslab | |||||
* group. | |||||
*/ | |||||
if (vd->vdev_mg->mg_class == spa_normal_class(spa) && | |||||
vd->vdev_ms_count > zfs_embedded_slog_min_ms && | |||||
avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) { | |||||
uint64_t slog_msid = 0; | |||||
uint64_t smallest = UINT64_MAX; | |||||
/* | |||||
* Note, we only search the new metaslabs, because the old | |||||
* (pre-existing) ones may be active (e.g. have non-empty | |||||
* range_tree's), and we don't move them to the new | |||||
* metaslab_t. | |||||
*/ | |||||
for (uint64_t m = oldc; m < newc; m++) { | |||||
uint64_t alloc = | |||||
space_map_allocated(vd->vdev_ms[m]->ms_sm); | |||||
if (alloc < smallest) { | |||||
slog_msid = m; | |||||
smallest = alloc; | |||||
} | |||||
} | |||||
metaslab_t *slog_ms = vd->vdev_ms[slog_msid]; | |||||
/* | |||||
* The metaslab was marked as dirty at the end of | |||||
* metaslab_init(). Remove it from the dirty list so that we | |||||
* can uninitialize and reinitialize it to the new class. | |||||
*/ | |||||
if (txg != 0) { | |||||
(void) txg_list_remove_this(&vd->vdev_ms_list, | |||||
slog_ms, txg); | |||||
} | |||||
uint64_t sm_obj = space_map_object(slog_ms->ms_sm); | |||||
metaslab_fini(slog_ms); | |||||
VERIFY0(metaslab_init(vd->vdev_log_mg, slog_msid, sm_obj, txg, | |||||
&vd->vdev_ms[slog_msid])); | |||||
} | |||||
if (txg == 0) | if (txg == 0) | ||||
spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER); | spa_config_enter(spa, SCL_ALLOC, FTAG, RW_WRITER); | ||||
/* | /* | ||||
* If the vdev is being removed we don't activate | * If the vdev is being removed we don't activate | ||||
* the metaslabs since we want to ensure that no new | * the metaslabs since we want to ensure that no new | ||||
* allocations are performed on this device. | * allocations are performed on this device. | ||||
*/ | */ | ||||
if (!expanding && !vd->vdev_removing) { | if (!expanding && !vd->vdev_removing) { | ||||
metaslab_group_activate(vd->vdev_mg); | metaslab_group_activate(vd->vdev_mg); | ||||
if (vd->vdev_log_mg != NULL) | |||||
metaslab_group_activate(vd->vdev_log_mg); | |||||
} | } | ||||
if (txg == 0) | if (txg == 0) | ||||
spa_config_exit(spa, SCL_ALLOC, FTAG); | spa_config_exit(spa, SCL_ALLOC, FTAG); | ||||
/* | /* | ||||
* Regardless whether this vdev was just added or it is being | * Regardless whether this vdev was just added or it is being | ||||
* expanded, the metaslab count has changed. Recalculate the | * expanded, the metaslab count has changed. Recalculate the | ||||
Show All 19 Lines | if (vd->vdev_checkpoint_sm != NULL) { | ||||
* this clause never executes twice. This logic is similar | * this clause never executes twice. This logic is similar | ||||
* to the one used for the vdev_ms clause below. | * to the one used for the vdev_ms clause below. | ||||
*/ | */ | ||||
vd->vdev_checkpoint_sm = NULL; | vd->vdev_checkpoint_sm = NULL; | ||||
} | } | ||||
if (vd->vdev_ms != NULL) { | if (vd->vdev_ms != NULL) { | ||||
metaslab_group_t *mg = vd->vdev_mg; | metaslab_group_t *mg = vd->vdev_mg; | ||||
metaslab_group_passivate(mg); | metaslab_group_passivate(mg); | ||||
if (vd->vdev_log_mg != NULL) { | |||||
ASSERT(!vd->vdev_islog); | |||||
metaslab_group_passivate(vd->vdev_log_mg); | |||||
} | |||||
uint64_t count = vd->vdev_ms_count; | uint64_t count = vd->vdev_ms_count; | ||||
for (uint64_t m = 0; m < count; m++) { | for (uint64_t m = 0; m < count; m++) { | ||||
metaslab_t *msp = vd->vdev_ms[m]; | metaslab_t *msp = vd->vdev_ms[m]; | ||||
if (msp != NULL) | if (msp != NULL) | ||||
metaslab_fini(msp); | metaslab_fini(msp); | ||||
} | } | ||||
vmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); | vmem_free(vd->vdev_ms, count * sizeof (metaslab_t *)); | ||||
vd->vdev_ms = NULL; | vd->vdev_ms = NULL; | ||||
vd->vdev_ms_count = 0; | vd->vdev_ms_count = 0; | ||||
for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) | for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { | ||||
ASSERT0(mg->mg_histogram[i]); | ASSERT0(mg->mg_histogram[i]); | ||||
if (vd->vdev_log_mg != NULL) | |||||
ASSERT0(vd->vdev_log_mg->mg_histogram[i]); | |||||
} | } | ||||
} | |||||
ASSERT0(vd->vdev_ms_count); | ASSERT0(vd->vdev_ms_count); | ||||
ASSERT3U(vd->vdev_pending_fastwrite, ==, 0); | ASSERT3U(vd->vdev_pending_fastwrite, ==, 0); | ||||
} | } | ||||
typedef struct vdev_probe_stats { | typedef struct vdev_probe_stats { | ||||
boolean_t vps_readable; | boolean_t vps_readable; | ||||
boolean_t vps_writeable; | boolean_t vps_writeable; | ||||
int vps_flags; | int vps_flags; | ||||
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | vdev_probe(vdev_t *vd, zio_t *zio) | ||||
if (zio == NULL) | if (zio == NULL) | ||||
return (pio); | return (pio); | ||||
zio_nowait(pio); | zio_nowait(pio); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
static void | static void | ||||
vdev_load_child(void *arg) | |||||
{ | |||||
vdev_t *vd = arg; | |||||
vd->vdev_load_error = vdev_load(vd); | |||||
} | |||||
static void | |||||
vdev_open_child(void *arg) | vdev_open_child(void *arg) | ||||
{ | { | ||||
vdev_t *vd = arg; | vdev_t *vd = arg; | ||||
vd->vdev_open_thread = curthread; | vd->vdev_open_thread = curthread; | ||||
vd->vdev_open_error = vdev_open(vd); | vd->vdev_open_error = vdev_open(vd); | ||||
vd->vdev_open_thread = NULL; | vd->vdev_open_thread = NULL; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 387 Lines • ▼ Show 20 Lines | vdev_open(vdev_t *vd) | ||||
* this would just restart the scrub we are already doing. | * this would just restart the scrub we are already doing. | ||||
*/ | */ | ||||
if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen) | if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen) | ||||
dsl_scan_assess_vdev(spa->spa_dsl_pool, vd); | dsl_scan_assess_vdev(spa->spa_dsl_pool, vd); | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | |||||
vdev_validate_child(void *arg) | |||||
{ | |||||
vdev_t *vd = arg; | |||||
vd->vdev_validate_thread = curthread; | |||||
vd->vdev_validate_error = vdev_validate(vd); | |||||
vd->vdev_validate_thread = NULL; | |||||
} | |||||
/* | /* | ||||
* Called once the vdevs are all opened, this routine validates the label | * Called once the vdevs are all opened, this routine validates the label | ||||
* contents. This needs to be done before vdev_load() so that we don't | * contents. This needs to be done before vdev_load() so that we don't | ||||
* inadvertently do repair I/Os to the wrong device. | * inadvertently do repair I/Os to the wrong device. | ||||
* | * | ||||
* This function will only return failure if one of the vdevs indicates that it | * This function will only return failure if one of the vdevs indicates that it | ||||
* has since been destroyed or exported. This is only possible if | * has since been destroyed or exported. This is only possible if | ||||
* /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state | * /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state | ||||
* will be updated but the function will return 0. | * will be updated but the function will return 0. | ||||
*/ | */ | ||||
int | int | ||||
vdev_validate(vdev_t *vd) | vdev_validate(vdev_t *vd) | ||||
{ | { | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
taskq_t *tq = NULL; | |||||
nvlist_t *label; | nvlist_t *label; | ||||
uint64_t guid = 0, aux_guid = 0, top_guid; | uint64_t guid = 0, aux_guid = 0, top_guid; | ||||
uint64_t state; | uint64_t state; | ||||
nvlist_t *nvl; | nvlist_t *nvl; | ||||
uint64_t txg; | uint64_t txg; | ||||
int children = vd->vdev_children; | |||||
if (vdev_validate_skip) | if (vdev_validate_skip) | ||||
return (0); | return (0); | ||||
for (uint64_t c = 0; c < vd->vdev_children; c++) | if (children > 0) { | ||||
if (vdev_validate(vd->vdev_child[c]) != 0) | tq = taskq_create("vdev_validate", children, minclsyspri, | ||||
children, children, TASKQ_PREPOPULATE); | |||||
} | |||||
for (uint64_t c = 0; c < children; c++) { | |||||
vdev_t *cvd = vd->vdev_child[c]; | |||||
if (tq == NULL || vdev_uses_zvols(cvd)) { | |||||
vdev_validate_child(cvd); | |||||
} else { | |||||
VERIFY(taskq_dispatch(tq, vdev_validate_child, cvd, | |||||
TQ_SLEEP) != TASKQID_INVALID); | |||||
} | |||||
} | |||||
if (tq != NULL) { | |||||
taskq_wait(tq); | |||||
taskq_destroy(tq); | |||||
} | |||||
for (int c = 0; c < children; c++) { | |||||
int error = vd->vdev_child[c]->vdev_validate_error; | |||||
if (error != 0) | |||||
return (SET_ERROR(EBADF)); | return (SET_ERROR(EBADF)); | ||||
} | |||||
/* | /* | ||||
* If the device has already failed, or was marked offline, don't do | * If the device has already failed, or was marked offline, don't do | ||||
* any further validation. Otherwise, label I/O will fail and we will | * any further validation. Otherwise, label I/O will fail and we will | ||||
* overwrite the previous state. | * overwrite the previous state. | ||||
*/ | */ | ||||
if (!vd->vdev_ops->vdev_op_leaf || !vdev_readable(vd)) | if (!vd->vdev_ops->vdev_op_leaf || !vdev_readable(vd)) | ||||
return (0); | return (0); | ||||
▲ Show 20 Lines • Show All 1,179 Lines • ▼ Show 20 Lines | vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj) | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
int | int | ||||
vdev_load(vdev_t *vd) | vdev_load(vdev_t *vd) | ||||
{ | { | ||||
int children = vd->vdev_children; | |||||
int error = 0; | int error = 0; | ||||
taskq_t *tq = NULL; | |||||
/* | /* | ||||
* It's only worthwhile to use the taskq for the root vdev, because the | |||||
* slow part is metaslab_init, and that only happens for top-level | |||||
* vdevs. | |||||
*/ | |||||
if (vd->vdev_ops == &vdev_root_ops && vd->vdev_children > 0) { | |||||
tq = taskq_create("vdev_load", children, minclsyspri, | |||||
children, children, TASKQ_PREPOPULATE); | |||||
} | |||||
/* | |||||
* Recursively load all children. | * Recursively load all children. | ||||
*/ | */ | ||||
for (int c = 0; c < vd->vdev_children; c++) { | for (int c = 0; c < vd->vdev_children; c++) { | ||||
error = vdev_load(vd->vdev_child[c]); | vdev_t *cvd = vd->vdev_child[c]; | ||||
if (error != 0) { | |||||
return (error); | if (tq == NULL || vdev_uses_zvols(cvd)) { | ||||
cvd->vdev_load_error = vdev_load(cvd); | |||||
} else { | |||||
VERIFY(taskq_dispatch(tq, vdev_load_child, | |||||
cvd, TQ_SLEEP) != TASKQID_INVALID); | |||||
} | } | ||||
} | } | ||||
if (tq != NULL) { | |||||
taskq_wait(tq); | |||||
taskq_destroy(tq); | |||||
} | |||||
for (int c = 0; c < vd->vdev_children; c++) { | |||||
int error = vd->vdev_child[c]->vdev_load_error; | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
vdev_set_deflate_ratio(vd); | vdev_set_deflate_ratio(vd); | ||||
/* | /* | ||||
* On spa_load path, grab the allocation bias from our zap | * On spa_load path, grab the allocation bias from our zap | ||||
*/ | */ | ||||
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) { | if (vd == vd->vdev_top && vd->vdev_top_zap != 0) { | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
char bias_str[64]; | char bias_str[64]; | ||||
▲ Show 20 Lines • Show All 244 Lines • ▼ Show 20 Lines | vdev_sync_done(vdev_t *vd, uint64_t txg) | ||||
boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg)); | boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg)); | ||||
ASSERT(vdev_is_concrete(vd)); | ASSERT(vdev_is_concrete(vd)); | ||||
while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) | while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))) | ||||
!= NULL) | != NULL) | ||||
metaslab_sync_done(msp, txg); | metaslab_sync_done(msp, txg); | ||||
if (reassess) | if (reassess) { | ||||
metaslab_sync_reassess(vd->vdev_mg); | metaslab_sync_reassess(vd->vdev_mg); | ||||
if (vd->vdev_log_mg != NULL) | |||||
metaslab_sync_reassess(vd->vdev_log_mg); | |||||
} | } | ||||
} | |||||
void | void | ||||
vdev_sync(vdev_t *vd, uint64_t txg) | vdev_sync(vdev_t *vd, uint64_t txg) | ||||
{ | { | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
vdev_t *lvd; | vdev_t *lvd; | ||||
metaslab_t *msp; | metaslab_t *msp; | ||||
▲ Show 20 Lines • Show All 306 Lines • ▼ Show 20 Lines | if (!vd->vdev_offline) { | ||||
* then proceed. We check that the vdev's metaslab group | * then proceed. We check that the vdev's metaslab group | ||||
* is not NULL since it's possible that we may have just | * is not NULL since it's possible that we may have just | ||||
* added this vdev but not yet initialized its metaslabs. | * added this vdev but not yet initialized its metaslabs. | ||||
*/ | */ | ||||
if (tvd->vdev_islog && mg != NULL) { | if (tvd->vdev_islog && mg != NULL) { | ||||
/* | /* | ||||
* Prevent any future allocations. | * Prevent any future allocations. | ||||
*/ | */ | ||||
ASSERT3P(tvd->vdev_log_mg, ==, NULL); | |||||
metaslab_group_passivate(mg); | metaslab_group_passivate(mg); | ||||
(void) spa_vdev_state_exit(spa, vd, 0); | (void) spa_vdev_state_exit(spa, vd, 0); | ||||
error = spa_reset_logs(spa); | error = spa_reset_logs(spa); | ||||
/* | /* | ||||
* If the log device was successfully reset but has | * If the log device was successfully reset but has | ||||
* checkpointed data, do not offline it. | * checkpointed data, do not offline it. | ||||
▲ Show 20 Lines • Show All 384 Lines • ▼ Show 20 Lines | if (vs) { | ||||
vs->vs_physical_ashift = vd->vdev_physical_ashift; | vs->vs_physical_ashift = vd->vdev_physical_ashift; | ||||
/* | /* | ||||
* Report fragmentation and rebuild progress for top-level, | * Report fragmentation and rebuild progress for top-level, | ||||
* non-auxiliary, concrete devices. | * non-auxiliary, concrete devices. | ||||
*/ | */ | ||||
if (vd->vdev_aux == NULL && vd == vd->vdev_top && | if (vd->vdev_aux == NULL && vd == vd->vdev_top && | ||||
vdev_is_concrete(vd)) { | vdev_is_concrete(vd)) { | ||||
/* | |||||
* The vdev fragmentation rating doesn't take into | |||||
* account the embedded slog metaslab (vdev_log_mg). | |||||
* Since it's only one metaslab, it would have a tiny | |||||
* impact on the overall fragmentation. | |||||
*/ | |||||
vs->vs_fragmentation = (vd->vdev_mg != NULL) ? | vs->vs_fragmentation = (vd->vdev_mg != NULL) ? | ||||
vd->vdev_mg->mg_fragmentation : 0; | vd->vdev_mg->mg_fragmentation : 0; | ||||
} | } | ||||
} | } | ||||
vdev_get_stats_ex_impl(vd, vs, vsx); | vdev_get_stats_ex_impl(vd, vs, vsx); | ||||
mutex_exit(&vd->vdev_stat_lock); | mutex_exit(&vd->vdev_stat_lock); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 961 Lines • ▼ Show 20 Lines | |||||
ZFS_MODULE_PARAM(zfs, zfs_, scan_ignore_errors, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs, zfs_, scan_ignore_errors, INT, ZMOD_RW, | ||||
"Ignore errors during resilver/scrub"); | "Ignore errors during resilver/scrub"); | ||||
ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs_vdev, vdev_, validate_skip, INT, ZMOD_RW, | ||||
"Bypass vdev_validate()"); | "Bypass vdev_validate()"); | ||||
ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW, | ||||
"Disable cache flushes"); | "Disable cache flushes"); | ||||
ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, INT, ZMOD_RW, | |||||
"Minimum number of metaslabs required to dedicate one for log blocks"); | |||||
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift, | ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift, | ||||
param_set_min_auto_ashift, param_get_ulong, ZMOD_RW, | param_set_min_auto_ashift, param_get_ulong, ZMOD_RW, | ||||
"Minimum ashift used when creating new top-level vdevs"); | "Minimum ashift used when creating new top-level vdevs"); | ||||
ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, | ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, | ||||
param_set_max_auto_ashift, param_get_ulong, ZMOD_RW, | param_set_max_auto_ashift, param_get_ulong, ZMOD_RW, | ||||
"Maximum ashift used when optimizing for logical -> physical sector " | "Maximum ashift used when optimizing for logical -> physical sector " | ||||
"size on new top-level vdevs"); | "size on new top-level vdevs"); | ||||
/* END CSTYLED */ | /* END CSTYLED */ |