Changeset View
Changeset View
Standalone View
Standalone View
sys/contrib/openzfs/module/zfs/metaslab.c
Show First 20 Lines • Show All 516 Lines • ▼ Show 20 Lines | metaslab_class_histogram_verify(metaslab_class_t *mc) | ||||
int i; | int i; | ||||
if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) | if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) | ||||
return; | return; | ||||
mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, | mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, | ||||
KM_SLEEP); | KM_SLEEP); | ||||
mutex_enter(&mc->mc_lock); | |||||
for (int c = 0; c < rvd->vdev_children; c++) { | for (int c = 0; c < rvd->vdev_children; c++) { | ||||
vdev_t *tvd = rvd->vdev_child[c]; | vdev_t *tvd = rvd->vdev_child[c]; | ||||
metaslab_group_t *mg = tvd->vdev_mg; | metaslab_group_t *mg = vdev_get_mg(tvd, mc); | ||||
/* | /* | ||||
* Skip any holes, uninitialized top-levels, or | * Skip any holes, uninitialized top-levels, or | ||||
* vdevs that are not in this metalab class. | * vdevs that are not in this metalab class. | ||||
*/ | */ | ||||
if (!vdev_is_concrete(tvd) || tvd->vdev_ms_shift == 0 || | if (!vdev_is_concrete(tvd) || tvd->vdev_ms_shift == 0 || | ||||
mg->mg_class != mc) { | mg->mg_class != mc) { | ||||
continue; | continue; | ||||
} | } | ||||
IMPLY(mg == mg->mg_vd->vdev_log_mg, | |||||
mc == spa_embedded_log_class(mg->mg_vd->vdev_spa)); | |||||
for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) | for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) | ||||
mc_hist[i] += mg->mg_histogram[i]; | mc_hist[i] += mg->mg_histogram[i]; | ||||
} | } | ||||
for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) | for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) { | ||||
VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]); | VERIFY3U(mc_hist[i], ==, mc->mc_histogram[i]); | ||||
} | |||||
mutex_exit(&mc->mc_lock); | |||||
kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); | kmem_free(mc_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); | ||||
} | } | ||||
/* | /* | ||||
* Calculate the metaslab class's fragmentation metric. The metric | * Calculate the metaslab class's fragmentation metric. The metric | ||||
* is weighted based on the space contribution of each metaslab group. | * is weighted based on the space contribution of each metaslab group. | ||||
* The return value will be a number between 0 and 100 (inclusive), or | * The return value will be a number between 0 and 100 (inclusive), or | ||||
* ZFS_FRAG_INVALID if the metric has not been set. See comment above the | * ZFS_FRAG_INVALID if the metric has not been set. See comment above the | ||||
▲ Show 20 Lines • Show All 446 Lines • ▼ Show 20 Lines | metaslab_group_initialized(metaslab_group_t *mg) | ||||
vdev_stat_t *vs = &vd->vdev_stat; | vdev_stat_t *vs = &vd->vdev_stat; | ||||
return (vs->vs_space != 0 && mg->mg_activation_count > 0); | return (vs->vs_space != 0 && mg->mg_activation_count > 0); | ||||
} | } | ||||
uint64_t | uint64_t | ||||
metaslab_group_get_space(metaslab_group_t *mg) | metaslab_group_get_space(metaslab_group_t *mg) | ||||
{ | { | ||||
return ((1ULL << mg->mg_vd->vdev_ms_shift) * mg->mg_vd->vdev_ms_count); | /* | ||||
* Note that the number of nodes in mg_metaslab_tree may be one less | |||||
* than vdev_ms_count, due to the embedded log metaslab. | |||||
*/ | |||||
mutex_enter(&mg->mg_lock); | |||||
uint64_t ms_count = avl_numnodes(&mg->mg_metaslab_tree); | |||||
mutex_exit(&mg->mg_lock); | |||||
return ((1ULL << mg->mg_vd->vdev_ms_shift) * ms_count); | |||||
} | } | ||||
void | void | ||||
metaslab_group_histogram_verify(metaslab_group_t *mg) | metaslab_group_histogram_verify(metaslab_group_t *mg) | ||||
{ | { | ||||
uint64_t *mg_hist; | uint64_t *mg_hist; | ||||
vdev_t *vd = mg->mg_vd; | avl_tree_t *t = &mg->mg_metaslab_tree; | ||||
uint64_t ashift = vd->vdev_ashift; | uint64_t ashift = mg->mg_vd->vdev_ashift; | ||||
int i; | |||||
if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) | if ((zfs_flags & ZFS_DEBUG_HISTOGRAM_VERIFY) == 0) | ||||
return; | return; | ||||
mg_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, | mg_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, | ||||
KM_SLEEP); | KM_SLEEP); | ||||
ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=, | ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=, | ||||
SPACE_MAP_HISTOGRAM_SIZE + ashift); | SPACE_MAP_HISTOGRAM_SIZE + ashift); | ||||
for (int m = 0; m < vd->vdev_ms_count; m++) { | mutex_enter(&mg->mg_lock); | ||||
metaslab_t *msp = vd->vdev_ms[m]; | for (metaslab_t *msp = avl_first(t); | ||||
msp != NULL; msp = AVL_NEXT(t, msp)) { | |||||
/* skip if not active or not a member */ | VERIFY3P(msp->ms_group, ==, mg); | ||||
if (msp->ms_sm == NULL || msp->ms_group != mg) | /* skip if not active */ | ||||
if (msp->ms_sm == NULL) | |||||
continue; | continue; | ||||
for (i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) | for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) { | ||||
mg_hist[i + ashift] += | mg_hist[i + ashift] += | ||||
msp->ms_sm->sm_phys->smp_histogram[i]; | msp->ms_sm->sm_phys->smp_histogram[i]; | ||||
} | } | ||||
} | |||||
for (i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++) | for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i ++) | ||||
VERIFY3U(mg_hist[i], ==, mg->mg_histogram[i]); | VERIFY3U(mg_hist[i], ==, mg->mg_histogram[i]); | ||||
mutex_exit(&mg->mg_lock); | |||||
kmem_free(mg_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); | kmem_free(mg_hist, sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE); | ||||
} | } | ||||
static void | static void | ||||
metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp) | metaslab_group_histogram_add(metaslab_group_t *mg, metaslab_t *msp) | ||||
{ | { | ||||
metaslab_class_t *mc = mg->mg_class; | metaslab_class_t *mc = mg->mg_class; | ||||
uint64_t ashift = mg->mg_vd->vdev_ashift; | uint64_t ashift = mg->mg_vd->vdev_ashift; | ||||
ASSERT(MUTEX_HELD(&msp->ms_lock)); | ASSERT(MUTEX_HELD(&msp->ms_lock)); | ||||
if (msp->ms_sm == NULL) | if (msp->ms_sm == NULL) | ||||
return; | return; | ||||
mutex_enter(&mg->mg_lock); | mutex_enter(&mg->mg_lock); | ||||
mutex_enter(&mc->mc_lock); | |||||
for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) { | for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) { | ||||
IMPLY(mg == mg->mg_vd->vdev_log_mg, | |||||
mc == spa_embedded_log_class(mg->mg_vd->vdev_spa)); | |||||
mg->mg_histogram[i + ashift] += | mg->mg_histogram[i + ashift] += | ||||
msp->ms_sm->sm_phys->smp_histogram[i]; | msp->ms_sm->sm_phys->smp_histogram[i]; | ||||
mc->mc_histogram[i + ashift] += | mc->mc_histogram[i + ashift] += | ||||
msp->ms_sm->sm_phys->smp_histogram[i]; | msp->ms_sm->sm_phys->smp_histogram[i]; | ||||
} | } | ||||
mutex_exit(&mc->mc_lock); | |||||
mutex_exit(&mg->mg_lock); | mutex_exit(&mg->mg_lock); | ||||
} | } | ||||
void | void | ||||
metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp) | metaslab_group_histogram_remove(metaslab_group_t *mg, metaslab_t *msp) | ||||
{ | { | ||||
metaslab_class_t *mc = mg->mg_class; | metaslab_class_t *mc = mg->mg_class; | ||||
uint64_t ashift = mg->mg_vd->vdev_ashift; | uint64_t ashift = mg->mg_vd->vdev_ashift; | ||||
ASSERT(MUTEX_HELD(&msp->ms_lock)); | ASSERT(MUTEX_HELD(&msp->ms_lock)); | ||||
if (msp->ms_sm == NULL) | if (msp->ms_sm == NULL) | ||||
return; | return; | ||||
mutex_enter(&mg->mg_lock); | mutex_enter(&mg->mg_lock); | ||||
mutex_enter(&mc->mc_lock); | |||||
for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) { | for (int i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) { | ||||
ASSERT3U(mg->mg_histogram[i + ashift], >=, | ASSERT3U(mg->mg_histogram[i + ashift], >=, | ||||
msp->ms_sm->sm_phys->smp_histogram[i]); | msp->ms_sm->sm_phys->smp_histogram[i]); | ||||
ASSERT3U(mc->mc_histogram[i + ashift], >=, | ASSERT3U(mc->mc_histogram[i + ashift], >=, | ||||
msp->ms_sm->sm_phys->smp_histogram[i]); | msp->ms_sm->sm_phys->smp_histogram[i]); | ||||
IMPLY(mg == mg->mg_vd->vdev_log_mg, | |||||
mc == spa_embedded_log_class(mg->mg_vd->vdev_spa)); | |||||
mg->mg_histogram[i + ashift] -= | mg->mg_histogram[i + ashift] -= | ||||
msp->ms_sm->sm_phys->smp_histogram[i]; | msp->ms_sm->sm_phys->smp_histogram[i]; | ||||
mc->mc_histogram[i + ashift] -= | mc->mc_histogram[i + ashift] -= | ||||
msp->ms_sm->sm_phys->smp_histogram[i]; | msp->ms_sm->sm_phys->smp_histogram[i]; | ||||
} | } | ||||
mutex_exit(&mc->mc_lock); | |||||
mutex_exit(&mg->mg_lock); | mutex_exit(&mg->mg_lock); | ||||
} | } | ||||
static void | static void | ||||
metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) | metaslab_group_add(metaslab_group_t *mg, metaslab_t *msp) | ||||
{ | { | ||||
ASSERT(msp->ms_group == NULL); | ASSERT(msp->ms_group == NULL); | ||||
mutex_enter(&mg->mg_lock); | mutex_enter(&mg->mg_lock); | ||||
▲ Show 20 Lines • Show All 1,641 Lines • ▼ Show 20 Lines | metaslab_fini(metaslab_t *msp) | ||||
spa_t *spa = vd->vdev_spa; | spa_t *spa = vd->vdev_spa; | ||||
metaslab_fini_flush_data(msp); | metaslab_fini_flush_data(msp); | ||||
metaslab_group_remove(mg, msp); | metaslab_group_remove(mg, msp); | ||||
mutex_enter(&msp->ms_lock); | mutex_enter(&msp->ms_lock); | ||||
VERIFY(msp->ms_group == NULL); | VERIFY(msp->ms_group == NULL); | ||||
/* | |||||
* If the range trees haven't been allocated, this metaslab hasn't | |||||
* been through metaslab_sync_done() for the first time yet, so its | |||||
* space hasn't been accounted for in its vdev and doesn't need to be | |||||
* subtracted. | |||||
*/ | |||||
if (msp->ms_freed != NULL) { | |||||
metaslab_space_update(vd, mg->mg_class, | metaslab_space_update(vd, mg->mg_class, | ||||
-metaslab_allocated_space(msp), 0, -msp->ms_size); | -metaslab_allocated_space(msp), 0, -msp->ms_size); | ||||
} | |||||
space_map_close(msp->ms_sm); | space_map_close(msp->ms_sm); | ||||
msp->ms_sm = NULL; | msp->ms_sm = NULL; | ||||
metaslab_unload(msp); | metaslab_unload(msp); | ||||
range_tree_destroy(msp->ms_allocatable); | range_tree_destroy(msp->ms_allocatable); | ||||
if (msp->ms_freed != NULL) { | |||||
range_tree_destroy(msp->ms_freeing); | range_tree_destroy(msp->ms_freeing); | ||||
range_tree_destroy(msp->ms_freed); | range_tree_destroy(msp->ms_freed); | ||||
ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=, | ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=, | ||||
metaslab_unflushed_changes_memused(msp)); | metaslab_unflushed_changes_memused(msp)); | ||||
spa->spa_unflushed_stats.sus_memused -= | spa->spa_unflushed_stats.sus_memused -= | ||||
metaslab_unflushed_changes_memused(msp); | metaslab_unflushed_changes_memused(msp); | ||||
range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); | range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL); | ||||
range_tree_destroy(msp->ms_unflushed_allocs); | range_tree_destroy(msp->ms_unflushed_allocs); | ||||
range_tree_destroy(msp->ms_checkpointing); | |||||
range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); | range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL); | ||||
range_tree_destroy(msp->ms_unflushed_frees); | range_tree_destroy(msp->ms_unflushed_frees); | ||||
for (int t = 0; t < TXG_SIZE; t++) { | for (int t = 0; t < TXG_SIZE; t++) { | ||||
range_tree_destroy(msp->ms_allocating[t]); | range_tree_destroy(msp->ms_allocating[t]); | ||||
} | } | ||||
for (int t = 0; t < TXG_DEFER_SIZE; t++) { | for (int t = 0; t < TXG_DEFER_SIZE; t++) { | ||||
range_tree_destroy(msp->ms_defer[t]); | range_tree_destroy(msp->ms_defer[t]); | ||||
} | } | ||||
} | |||||
ASSERT0(msp->ms_deferspace); | ASSERT0(msp->ms_deferspace); | ||||
range_tree_destroy(msp->ms_checkpointing); | |||||
for (int t = 0; t < TXG_SIZE; t++) | for (int t = 0; t < TXG_SIZE; t++) | ||||
ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t)); | ASSERT(!txg_list_member(&vd->vdev_ms_list, msp, t)); | ||||
range_tree_vacate(msp->ms_trim, NULL, NULL); | range_tree_vacate(msp->ms_trim, NULL, NULL); | ||||
range_tree_destroy(msp->ms_trim); | range_tree_destroy(msp->ms_trim); | ||||
mutex_exit(&msp->ms_lock); | mutex_exit(&msp->ms_lock); | ||||
cv_destroy(&msp->ms_load_cv); | cv_destroy(&msp->ms_load_cv); | ||||
▲ Show 20 Lines • Show All 2,325 Lines • ▼ Show 20 Lines | if (hintdva) { | ||||
/* | /* | ||||
* It's possible the vdev we're using as the hint no | * It's possible the vdev we're using as the hint no | ||||
* longer exists or its mg has been closed (e.g. by | * longer exists or its mg has been closed (e.g. by | ||||
* device removal). Consult the rotor when | * device removal). Consult the rotor when | ||||
* all else fails. | * all else fails. | ||||
*/ | */ | ||||
if (vd != NULL && vd->vdev_mg != NULL) { | if (vd != NULL && vd->vdev_mg != NULL) { | ||||
mg = vd->vdev_mg; | mg = vdev_get_mg(vd, mc); | ||||
if (flags & METASLAB_HINTBP_AVOID && | if (flags & METASLAB_HINTBP_AVOID && | ||||
mg->mg_next != NULL) | mg->mg_next != NULL) | ||||
mg = mg->mg_next; | mg = mg->mg_next; | ||||
} else { | } else { | ||||
mg = mca->mca_rotor; | mg = mca->mca_rotor; | ||||
} | } | ||||
} else if (d != 0) { | } else if (d != 0) { | ||||
▲ Show 20 Lines • Show All 1,129 Lines • Show Last 20 Lines |