Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c @@ -1053,6 +1053,9 @@ doca->doca_cred, tx); } +#if defined(__FreeBSD__) && defined(_KERNEL) + zvol_create_minors(dp->dp_spa, doca->doca_name); +#endif spa_history_log_internal_ds(ds, "create", tx, ""); dsl_dataset_rele(ds, FTAG); dsl_dir_rele(pdd, FTAG); @@ -1148,6 +1151,9 @@ VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds)); dsl_dataset_name(origin, namebuf); +#if defined(__FreeBSD__) && defined(_KERNEL) + zvol_create_minors(dp->dp_spa, doca->doca_clone); +#endif spa_history_log_internal_ds(ds, "clone", tx, "origin=%s (%llu)", namebuf, origin->ds_object); dsl_dataset_rele(ds, FTAG); Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c @@ -57,6 +57,9 @@ #include #include #include +#ifdef __FreeBSD__ +#include +#endif #ifdef __FreeBSD__ #undef dump_write @@ -3445,6 +3448,11 @@ drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj; } + +#if defined(__FreeBSD__) && defined(_KERNEL) + zvol_create_minors(dp->dp_spa, drc->drc_tofs); +#endif + /* * Release the hold from dmu_recv_begin. This must be done before * we return to open context, so that when we free the dataset's dnode, Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c @@ -1572,6 +1572,9 @@ dsl_props_set_sync_impl(ds->ds_prev, ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); } +#if defined(__FreeBSD__) && defined(_KERNEL) + zvol_create_minors(dp->dp_spa, name); +#endif dsl_dataset_rele(ds, FTAG); } } @@ -1646,17 +1649,6 @@ fnvlist_free(suspended); } -#ifdef __FreeBSD__ -#ifdef _KERNEL - if (error == 0) { - for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; - pair = nvlist_next_nvpair(snaps, pair)) { - char *snapname = nvpair_name(pair); - zvol_create_minors(snapname); - } - } -#endif -#endif return (error); } @@ -2535,7 +2527,7 @@ snprintf(newname, ZFS_MAX_DATASET_NAME_LEN, "%s@%s", ddrsa->ddrsa_fsname, ddrsa->ddrsa_newsnapname); zfsvfs_update_fromname(oldname, newname); - zvol_rename_minors(oldname, newname); + zvol_rename_minors(dp->dp_spa, oldname, newname); kmem_free(newname, ZFS_MAX_DATASET_NAME_LEN); kmem_free(oldname, ZFS_MAX_DATASET_NAME_LEN); #endif @@ -3087,9 +3079,6 @@ } #if defined(__FreeBSD__) && defined(_KERNEL) - /* Take the spa_namespace_lock early so zvol renames don't deadlock. */ - mutex_enter(&spa_namespace_lock); - oldname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); newname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); #endif @@ -3135,7 +3124,7 @@ #if defined(__FreeBSD__) && defined(_KERNEL) dsl_dataset_name(ds, newname); zfsvfs_update_fromname(oldname, newname); - zvol_rename_minors(oldname, newname); + zvol_rename_minors(dp->dp_spa, oldname, newname); #endif /* move any clone references */ @@ -3177,8 +3166,6 @@ } #if defined(__FreeBSD__) && defined(_KERNEL) - mutex_exit(&spa_namespace_lock); - kmem_free(newname, ZFS_MAX_DATASET_NAME_LEN); kmem_free(oldname, ZFS_MAX_DATASET_NAME_LEN); #endif Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c @@ -43,7 +43,11 @@ #include #include #include +#if defined(__FreeBSD__) && defined(_KERNEL) +#include +#endif + int dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) { @@ -489,6 +493,14 @@ if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, tx)); + +#if defined(__FreeBSD__) && defined(_KERNEL) + char dsname[ZFS_MAX_DATASET_NAME_LEN]; + + dsl_dataset_name(ds, dsname); + zvol_remove_minors(dp->dp_spa, dsname); +#endif + dsl_dir_rele(ds->ds_dir, ds); ds->ds_dir = NULL; dmu_object_free_zapified(mos, obj, tx); @@ -979,6 +991,9 @@ VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); dsl_destroy_head_sync_impl(ds, tx); +#if defined(__FreeBSD__) && defined(_KERNEL) + zvol_remove_minors(dp->dp_spa, ddha->ddha_name); +#endif dsl_dataset_rele(ds, FTAG); } Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c @@ -2093,7 +2093,7 @@ #ifdef __FreeBSD__ #ifdef _KERNEL zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname); - zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname); + zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname, ddra->ddra_newname); #endif #endif Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c @@ -32,6 +32,7 @@ * Copyright (c) 2017, Intel Corporation. * Copyright (c) 2017 Datto Inc. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. + * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ /* @@ -1280,6 +1281,24 @@ */ trim_thread_create(spa); + /* + * This taskq is used to perform zvol-minor-related tasks + * asynchronously. This has several advantages, including easy + * resolution of various deadlocks (zfsonlinux bug #3681). + * + * The taskq must be single threaded to ensure tasks are always + * processed in the order in which they were dispatched. + * + * A taskq per pool allows one to keep the pools independent. + * This way if one pool is suspended, it will not impact another. + * + * The preferred location to dispatch a zvol minor task is a sync + * task. In this context, there is easy access to the spa_t and minimal + * error handling is required because the sync task must succeed. + */ + spa->spa_zvol_taskq = taskq_create("z_zvol", 1, minclsyspri, + 1, INT_MAX, 0); + for (size_t i = 0; i < TXG_SIZE; i++) { spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); @@ -1323,6 +1342,11 @@ spa_evicting_os_wait(spa); + if (spa->spa_zvol_taskq) { + taskq_destroy(spa->spa_zvol_taskq); + spa->spa_zvol_taskq = NULL; + } + txg_list_destroy(&spa->spa_vdev_txg_list); list_destroy(&spa->spa_config_dirty_list); @@ -4614,7 +4638,7 @@ #ifdef __FreeBSD__ #ifdef _KERNEL if (firstopen) - zvol_create_minors(spa->spa_name); + zvol_create_minors(spa, spa->spa_name); #endif #endif } @@ -5970,7 +5994,7 @@ #ifdef __FreeBSD__ #ifdef _KERNEL - zvol_create_minors(pool); + zvol_create_minors(spa, pool); #endif #endif return (0); @@ -6119,6 +6143,10 @@ spa_open_ref(spa, FTAG); mutex_exit(&spa_namespace_lock); spa_async_suspend(spa); + if (spa->spa_zvol_taskq) { + zvol_remove_minors(spa, spa_name(spa)); + taskq_wait(spa->spa_zvol_taskq); + } mutex_enter(&spa_namespace_lock); spa_close(spa, FTAG); Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h @@ -27,6 +27,7 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. * Copyright (c) 2017 Datto Inc. * Copyright (c) 2017, Intel Corporation. + * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ #ifndef _SYS_SPA_IMPL_H @@ -398,6 +399,8 @@ uint64_t spa_lowmem_last_txg; /* txg window start */ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */ + + taskq_t *spa_zvol_taskq; /* Taskq for minor management */ uint64_t spa_multihost; /* multihost aware (mmp) */ mmp_thread_t spa_mmp; /* multihost mmp thread */ Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zvol.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ #ifndef _SYS_ZVOL_H @@ -40,9 +41,6 @@ extern int zvol_check_volblocksize(uint64_t volblocksize); extern int zvol_get_stats(objset_t *os, nvlist_t *nv); extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); -extern int zvol_create_minor(const char *); -extern int zvol_remove_minor(const char *); -extern void zvol_remove_minors(const char *); extern int zvol_set_volsize(const char *, uint64_t); #ifdef illumos @@ -72,8 +70,10 @@ #endif /* illumos */ #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -extern int zvol_create_minors(const char *name); -extern void zvol_rename_minors(const char *oldname, const char *newname); +extern void zvol_create_minors(spa_t *spa, const char *name); +extern void zvol_remove_minors(spa_t *spa, const char *name); +extern void zvol_rename_minors(spa_t *spa, const char *oldname, + const char *newname); #endif #endif /* _KERNEL */ Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -1642,8 +1642,10 @@ int error; zfs_log_history(zc); error = spa_destroy(zc->zc_name); +#ifndef __FreeBSD__ if (error == 0) zvol_remove_minors(zc->zc_name); +#endif return (error); } @@ -1694,8 +1696,10 @@ zfs_log_history(zc); error = spa_export(zc->zc_name, NULL, force, hardforce); +#ifndef __FreeBSD__ if (error == 0) zvol_remove_minors(zc->zc_name); +#endif return (error); } @@ -3395,13 +3399,23 @@ if (error == 0) { error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL, nvprops, outnvl); +#if defined(__FreeBSD__) && defined(_KERNEL) + /* + * Wait for ZVOL operations to settle down before destroying. + */ + if (error != 0) { + spa_t *spa; + + if (spa_open(fsname, &spa, FTAG) == 0) { + taskqueue_drain_all( + spa->spa_zvol_taskq->tq_queue); + spa_close(spa, FTAG); + } + } +#endif if (error != 0) (void) dsl_destroy_head(fsname); } -#ifdef __FreeBSD__ - if (error == 0 && type == DMU_OST_ZVOL) - zvol_create_minors(fsname); -#endif return (error); } @@ -3443,10 +3457,6 @@ if (error != 0) (void) dsl_destroy_head(fsname); } -#ifdef __FreeBSD__ - if (error == 0) - zvol_create_minors(fsname); -#endif return (error); } @@ -3738,9 +3748,6 @@ return (SET_ERROR(EXDEV)); zfs_unmount_snap(nvpair_name(pair)); -#if defined(__FreeBSD__) - zvol_remove_minors(name); -#endif } return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl)); @@ -3924,10 +3931,8 @@ err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy); else err = dsl_destroy_head(zc->zc_name); +#ifndef __FreeBSD__ if (ost == DMU_OST_ZVOL && err == 0) -#ifdef __FreeBSD__ - zvol_remove_minors(zc->zc_name); -#else (void) zvol_remove_minor(zc->zc_name); #endif return (err); @@ -4822,11 +4827,6 @@ } #endif -#ifdef __FreeBSD__ - if (error == 0) - zvol_create_minors(tofs); -#endif - /* * On error, restore the original props. */ @@ -6967,6 +6967,24 @@ out: nvlist_free(innvl); + +#if defined(__FreeBSD__) && defined(_KERNEL) + /* + * Wait for ZVOL changes to get applied. + * NB: taskqueue_drain_all() does less than taskq_wait(), + * but enough for what we want. + * And there is no equivalent illumos API. + */ + if (error == 0) { + spa_t *spa; + + if (spa_open(saved_poolname, &spa, FTAG) == 0) { + taskqueue_drain_all( + spa->spa_zvol_taskq->tq_queue); + spa_close(spa, FTAG); + } + } +#endif #ifdef illumos rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag); Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c @@ -30,6 +30,7 @@ * Copyright (c) 2012, 2017 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2014 Integros [integros.com] + * Copyright (c) 2016 Actifio, Inc. All rights reserved. */ /* Portions Copyright 2011 Martin Matuska */ @@ -185,6 +186,20 @@ #endif } zvol_state_t; +typedef enum { + ZVOL_ASYNC_CREATE_MINORS, + ZVOL_ASYNC_REMOVE_MINORS, + ZVOL_ASYNC_RENAME_MINORS, + ZVOL_ASYNC_MAX +} zvol_async_op_t; + +typedef struct { + zvol_async_op_t op; + char pool[ZFS_MAX_DATASET_NAME_LEN]; + char name1[ZFS_MAX_DATASET_NAME_LEN]; + char name2[ZFS_MAX_DATASET_NAME_LEN]; +} zvol_task_t; + #ifndef illumos static LIST_HEAD(, zvol_state) all_zvols; #endif @@ -607,7 +622,7 @@ /* * Create a minor node (plus a whole lot more) for the specified volume. */ -int +static int zvol_create_minor(const char *name) { zfs_soft_state_t *zs; @@ -691,7 +706,6 @@ if (error != 0 || mode == ZFS_VOLMODE_DEFAULT) mode = volmode; - DROP_GIANT(); zv->zv_volmode = mode; if (zv->zv_volmode == ZFS_VOLMODE_GEOM) { g_topology_lock(); @@ -766,7 +780,6 @@ zvol_geom_run(zv); g_topology_unlock(); } - PICKUP_GIANT(); ZFS_LOG(1, "ZVOL %s created.", name); #endif @@ -820,22 +833,6 @@ } int -zvol_remove_minor(const char *name) -{ - zvol_state_t *zv; - int rc; - - mutex_enter(&zfsdev_state_lock); - if ((zv = zvol_minor_lookup(name)) == NULL) { - mutex_exit(&zfsdev_state_lock); - return (SET_ERROR(ENXIO)); - } - rc = zvol_remove_zv(zv); - mutex_exit(&zfsdev_state_lock); - return (rc); -} - -int zvol_first_open(zvol_state_t *zv) { dmu_object_info_t doi; @@ -976,7 +973,7 @@ } void -zvol_remove_minors(const char *name) +zvol_remove_minors_impl(const char *name) { #ifdef illumos zvol_state_t *zv; @@ -1004,7 +1001,6 @@ namelen = strlen(name); - DROP_GIANT(); mutex_enter(&zfsdev_state_lock); LIST_FOREACH_SAFE(zv, &all_zvols, zv_links, tzv) { @@ -1017,7 +1013,6 @@ } mutex_exit(&zfsdev_state_lock); - PICKUP_GIANT(); #endif /* illumos */ } @@ -2920,7 +2915,7 @@ } int -zvol_create_minors(const char *name) +zvol_create_minors_impl(const char *name) { uint64_t cookie; objset_t *os; @@ -2976,7 +2971,7 @@ while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL, &cookie) == 0) { dmu_objset_rele(os, FTAG); - (void)zvol_create_minors(osname); + (void)zvol_create_minors_impl(osname); if ((error = dmu_objset_hold(name, FTAG, &os)) != 0) { printf("ZFS WARNING: Unable to put hold on %s (error=%d).\n", name, error); @@ -3045,7 +3040,7 @@ } void -zvol_rename_minors(const char *oldname, const char *newname) +zvol_rename_minors_impl(const char *oldname, const char *newname) { char name[MAXPATHLEN]; struct g_provider *pp; @@ -3058,7 +3053,6 @@ oldnamelen = strlen(oldname); newnamelen = strlen(newname); - DROP_GIANT(); /* See comment in zvol_open(). */ if (!MUTEX_HELD(&zfsdev_state_lock)) { mutex_enter(&zfsdev_state_lock); @@ -3080,7 +3074,88 @@ if (locked) mutex_exit(&zfsdev_state_lock); - PICKUP_GIANT(); +} + +static zvol_task_t * +zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2) +{ + zvol_task_t *task; + char *delim; + + task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP); + task->op = op; + delim = strchr(name1, '/'); + strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN); + + strlcpy(task->name1, name1, MAXNAMELEN); + if (name2 != NULL) + strlcpy(task->name2, name2, MAXNAMELEN); + + return (task); +} + +static void +zvol_task_free(zvol_task_t *task) +{ + kmem_free(task, sizeof (zvol_task_t)); +} + +/* + * The worker thread function performed asynchronously. + */ +static void +zvol_task_cb(void *param) +{ + zvol_task_t *task = (zvol_task_t *)param; + + switch (task->op) { + case ZVOL_ASYNC_CREATE_MINORS: + (void) zvol_create_minors_impl(task->name1); + break; + case ZVOL_ASYNC_REMOVE_MINORS: + zvol_remove_minors_impl(task->name1); + break; + case ZVOL_ASYNC_RENAME_MINORS: + zvol_rename_minors_impl(task->name1, task->name2); + break; + default: + VERIFY(0); + break; + } + + zvol_task_free(task); +} + +static void +zvol_minors_helper(spa_t *spa, zvol_async_op_t op, const char *name1, + const char *name2) +{ + zvol_task_t *task; + + if (dataset_name_hidden(name1)) + return; + if (name2 != NULL && dataset_name_hidden(name2)) + return; + task = zvol_task_alloc(op, name1, name2); + (void)taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP); +} + +void +zvol_create_minors(spa_t *spa, const char *name) +{ + zvol_minors_helper(spa, ZVOL_ASYNC_CREATE_MINORS, name, NULL); +} + +void +zvol_remove_minors(spa_t *spa, const char *name) +{ + zvol_minors_helper(spa, ZVOL_ASYNC_REMOVE_MINORS, name, NULL); +} + +void +zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname) +{ + zvol_minors_helper(spa, ZVOL_ASYNC_RENAME_MINORS, oldname, newname); } static int