diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
index 670e31398d80..a373537dd4e0 100644
--- a/module/zfs/zfs_dir.c
+++ b/module/zfs/zfs_dir.c
@@ -1,1124 +1,1095 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/resource.h>
 #include <sys/vfs.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/mode.h>
 #include <sys/kmem.h>
 #include <sys/uio.h>
 #include <sys/pathname.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/sunddi.h>
 #include <sys/random.h>
 #include <sys/policy.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/fs/zfs.h>
 #include "fs/fs_subr.h"
 #include <sys/zap.h>
 #include <sys/dmu.h>
 #include <sys/atomic.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/sa.h>
 #include <sys/zfs_sa.h>
 #include <sys/dnlc.h>
 #include <sys/extdirent.h>
 
 /*
  * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
  * of names after deciding which is the appropriate lookup interface.
  */
 static int
 zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
     boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
 {
 	boolean_t conflict = B_FALSE;
 	int error;
 
 	if (zsb->z_norm) {
 		matchtype_t mt = MT_FIRST;
 		size_t bufsz = 0;
 		char *buf = NULL;
 
 		if (rpnp) {
 			buf = rpnp->pn_buf;
 			bufsz = rpnp->pn_bufsize;
 		}
 		if (exact)
 			mt = MT_EXACT;
 		/*
 		 * In the non-mixed case we only expect there would ever
 		 * be one match, but we need to use the normalizing lookup.
 		 */
 		error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1,
 		    zoid, mt, buf, bufsz, &conflict);
 	} else {
 		error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid);
 	}
 
 	/*
 	 * Allow multiple entries provided the first entry is
 	 * the object id.  Non-zpl consumers may safely make
 	 * use of the additional space.
 	 *
 	 * XXX: This should be a feature flag for compatibility
 	 */
 	if (error == EOVERFLOW)
 		error = 0;
 
 	if (zsb->z_norm && !error && deflags)
 		*deflags = conflict ? ED_CASE_CONFLICT : 0;
 
 	*zoid = ZFS_DIRENT_OBJ(*zoid);
 
 #ifdef HAVE_DNLC
 	if (error == ENOENT && update)
 		dnlc_update(ZTOI(dzp), name, DNLC_NO_VNODE);
 #endif /* HAVE_DNLC */
 
 	return (error);
 }
 
 /*
  * Lock a directory entry.  A dirlock on <dzp, name> protects that name
  * in dzp's directory zap object.  As long as you hold a dirlock, you can
  * assume two things: (1) dzp cannot be reaped, and (2) no other thread
  * can change the zap entry for (i.e. link or unlink) this name.
  *
  * Input arguments:
  *	dzp	- znode for directory
  *	name	- name of entry to lock
  *	flag	- ZNEW: if the entry already exists, fail with EEXIST.
  *		  ZEXISTS: if the entry does not exist, fail with ENOENT.
  *		  ZSHARED: allow concurrent access with other ZSHARED callers.
  *		  ZXATTR: we want dzp's xattr directory
  *		  ZCILOOK: On a mixed sensitivity file system,
  *			   this lookup should be case-insensitive.
  *		  ZCIEXACT: On a purely case-insensitive file system,
  *			    this lookup should be case-sensitive.
  *		  ZRENAMING: we are locking for renaming, force narrow locks
  *		  ZHAVELOCK: Don't grab the z_name_lock for this call. The
  *			     current thread already holds it.
  *
  * Output arguments:
  *	zpp	- pointer to the znode for the entry (NULL if there isn't one)
  *	dlpp	- pointer to the dirlock for this entry (NULL on error)
  *      direntflags - (case-insensitive lookup only)
  *		flags if multiple case-sensitive matches exist in directory
  *      realpnp     - (case-insensitive lookup only)
  *		actual name matched within the directory
  *
  * Return value: 0 on success or errno on failure.
  *
  * NOTE: Always checks for, and rejects, '.' and '..'.
  * NOTE: For case-insensitive file systems we take wide locks (see below),
  *	 but return znode pointers to a single match.
  */
 int
 zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
     int flag, int *direntflags, pathname_t *realpnp)
 {
 	zfs_sb_t	*zsb = ZTOZSB(dzp);
 	zfs_dirlock_t	*dl;
 	boolean_t	update;
 	boolean_t	exact;
 	uint64_t	zoid;
 #ifdef HAVE_DNLC
 	vnode_t		*vp = NULL;
 #endif /* HAVE_DNLC */
 	int		error = 0;
 	int		cmpflags;
 
 	*zpp = NULL;
 	*dlpp = NULL;
 
 	/*
 	 * Verify that we are not trying to lock '.', '..', or '.zfs'
 	 */
 	if ((name[0] == '.' &&
 	    (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
 	    (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
 		return (EEXIST);
 
 	/*
 	 * Case sensitivity and normalization preferences are set when
 	 * the file system is created.  These are stored in the
 	 * zsb->z_case and zsb->z_norm fields.  These choices
 	 * affect what vnodes can be cached in the DNLC, how we
 	 * perform zap lookups, and the "width" of our dirlocks.
 	 *
 	 * A normal dirlock locks a single name.  Note that with
 	 * normalization a name can be composed multiple ways, but
 	 * when normalized, these names all compare equal.  A wide
 	 * dirlock locks multiple names.  We need these when the file
 	 * system is supporting mixed-mode access.  It is sometimes
 	 * necessary to lock all case permutations of file name at
 	 * once so that simultaneous case-insensitive/case-sensitive
 	 * behaves as rationally as possible.
 	 */
 
 	/*
 	 * Decide if exact matches should be requested when performing
 	 * a zap lookup on file systems supporting case-insensitive
 	 * access.
 	 */
 	exact =
 	    ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
 	    ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
 
 	/*
 	 * Only look in or update the DNLC if we are looking for the
 	 * name on a file system that does not require normalization
 	 * or case folding.  We can also look there if we happen to be
 	 * on a non-normalizing, mixed sensitivity file system IF we
 	 * are looking for the exact name.
 	 *
 	 * Maybe can add TO-UPPERed version of name to dnlc in ci-only
 	 * case for performance improvement?
 	 */
 	update = !zsb->z_norm ||
 	    ((zsb->z_case == ZFS_CASE_MIXED) &&
 	    !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
 
 	/*
 	 * ZRENAMING indicates we are in a situation where we should
 	 * take narrow locks regardless of the file system's
 	 * preferences for normalizing and case folding.  This will
 	 * prevent us deadlocking trying to grab the same wide lock
 	 * twice if the two names happen to be case-insensitive
 	 * matches.
 	 */
 	if (flag & ZRENAMING)
 		cmpflags = 0;
 	else
 		cmpflags = zsb->z_norm;
 
 	/*
 	 * Wait until there are no locks on this name.
 	 *
 	 * Don't grab the the lock if it is already held. However, cannot
 	 * have both ZSHARED and ZHAVELOCK together.
 	 */
 	ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
 	if (!(flag & ZHAVELOCK))
 		rw_enter(&dzp->z_name_lock, RW_READER);
 
 	mutex_enter(&dzp->z_lock);
 	for (;;) {
 		if (dzp->z_unlinked) {
 			mutex_exit(&dzp->z_lock);
 			if (!(flag & ZHAVELOCK))
 				rw_exit(&dzp->z_name_lock);
 			return (ENOENT);
 		}
 		for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
 			if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
 			    U8_UNICODE_LATEST, &error) == 0) || error != 0)
 				break;
 		}
 		if (error != 0) {
 			mutex_exit(&dzp->z_lock);
 			if (!(flag & ZHAVELOCK))
 				rw_exit(&dzp->z_name_lock);
 			return (ENOENT);
 		}
 		if (dl == NULL)	{
 			/*
 			 * Allocate a new dirlock and add it to the list.
 			 */
 			dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
 			cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
 			dl->dl_name = name;
 			dl->dl_sharecnt = 0;
 			dl->dl_namelock = 0;
 			dl->dl_namesize = 0;
 			dl->dl_dzp = dzp;
 			dl->dl_next = dzp->z_dirlocks;
 			dzp->z_dirlocks = dl;
 			break;
 		}
 		if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
 			break;
 		cv_wait(&dl->dl_cv, &dzp->z_lock);
 	}
 
 	/*
 	 * If the z_name_lock was NOT held for this dirlock record it.
 	 */
 	if (flag & ZHAVELOCK)
 		dl->dl_namelock = 1;
 
 	if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
 		/*
 		 * We're the second shared reference to dl.  Make a copy of
 		 * dl_name in case the first thread goes away before we do.
 		 * Note that we initialize the new name before storing its
 		 * pointer into dl_name, because the first thread may load
 		 * dl->dl_name at any time.  He'll either see the old value,
 		 * which is his, or the new shared copy; either is OK.
 		 */
 		dl->dl_namesize = strlen(dl->dl_name) + 1;
 		name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
 		bcopy(dl->dl_name, name, dl->dl_namesize);
 		dl->dl_name = name;
 	}
 
 	mutex_exit(&dzp->z_lock);
 
 	/*
 	 * We have a dirlock on the name.  (Note that it is the dirlock,
 	 * not the dzp's z_lock, that protects the name in the zap object.)
 	 * See if there's an object by this name; if so, put a hold on it.
 	 */
 	if (flag & ZXATTR) {
 		error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
 		    sizeof (zoid));
 		if (error == 0)
 			error = (zoid == 0 ? ENOENT : 0);
 	} else {
 #ifdef HAVE_DNLC
 		if (update)
 			vp = dnlc_lookup(ZTOI(dzp), name);
 		if (vp == DNLC_NO_VNODE) {
 			iput(vp);
 			error = ENOENT;
 		} else if (vp) {
 			if (flag & ZNEW) {
 				zfs_dirent_unlock(dl);
 				iput(vp);
 				return (EEXIST);
 			}
 			*dlpp = dl;
 			*zpp = VTOZ(vp);
 			return (0);
 		} else {
 			error = zfs_match_find(zsb, dzp, name, exact,
 			    update, direntflags, realpnp, &zoid);
 		}
 #else
 		error = zfs_match_find(zsb, dzp, name, exact,
 		    update, direntflags, realpnp, &zoid);
 #endif /* HAVE_DNLC */
 	}
 	if (error) {
 		if (error != ENOENT || (flag & ZEXISTS)) {
 			zfs_dirent_unlock(dl);
 			return (error);
 		}
 	} else {
 		if (flag & ZNEW) {
 			zfs_dirent_unlock(dl);
 			return (EEXIST);
 		}
 		error = zfs_zget(zsb, zoid, zpp);
 		if (error) {
 			zfs_dirent_unlock(dl);
 			return (error);
 		}
 #ifdef HAVE_DNLC
 		if (!(flag & ZXATTR) && update)
 			dnlc_update(ZTOI(dzp), name, ZTOI(*zpp));
 #endif /* HAVE_DNLC */
 	}
 
 	*dlpp = dl;
 
 	return (0);
 }
 
 /*
  * Unlock this directory entry and wake anyone who was waiting for it.
  */
 void
 zfs_dirent_unlock(zfs_dirlock_t *dl)
 {
 	znode_t *dzp = dl->dl_dzp;
 	zfs_dirlock_t **prev_dl, *cur_dl;
 
 	mutex_enter(&dzp->z_lock);
 
 	if (!dl->dl_namelock)
 		rw_exit(&dzp->z_name_lock);
 
 	if (dl->dl_sharecnt > 1) {
 		dl->dl_sharecnt--;
 		mutex_exit(&dzp->z_lock);
 		return;
 	}
 	prev_dl = &dzp->z_dirlocks;
 	while ((cur_dl = *prev_dl) != dl)
 		prev_dl = &cur_dl->dl_next;
 	*prev_dl = dl->dl_next;
 	cv_broadcast(&dl->dl_cv);
 	mutex_exit(&dzp->z_lock);
 
 	if (dl->dl_namesize != 0)
 		kmem_free(dl->dl_name, dl->dl_namesize);
 	cv_destroy(&dl->dl_cv);
 	kmem_free(dl, sizeof (*dl));
 }
 
 /*
  * Look up an entry in a directory.
  *
  * NOTE: '.' and '..' are handled as special cases because
  *	no directory entries are actually stored for them.  If this is
  *	the root of a filesystem, then '.zfs' is also treated as a
  *	special pseudo-directory.
  */
 int
 zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
     int *deflg, pathname_t *rpnp)
 {
 	zfs_dirlock_t *dl;
 	znode_t *zp;
 	int error = 0;
 	uint64_t parent;
 
 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
 		*ipp = ZTOI(dzp);
 		igrab(*ipp);
 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
 		zfs_sb_t *zsb = ZTOZSB(dzp);
 
 		/*
 		 * If we are a snapshot mounted under .zfs, return
 		 * the inode pointer for the snapshot directory.
 		 */
 		if ((error = sa_lookup(dzp->z_sa_hdl,
 		    SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
 			return (error);
 
 		if (parent == dzp->z_id && zsb->z_parent != zsb) {
 			error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
 			    "snapshot", ipp, 0, kcred, NULL, NULL);
 			return (error);
 		}
 		rw_enter(&dzp->z_parent_lock, RW_READER);
 		error = zfs_zget(zsb, parent, &zp);
 		if (error == 0)
 			*ipp = ZTOI(zp);
 		rw_exit(&dzp->z_parent_lock);
 	} else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
 		*ipp = zfsctl_root(dzp);
 	} else {
 		int zf;
 
 		zf = ZEXISTS | ZSHARED;
 		if (flags & FIGNORECASE)
 			zf |= ZCILOOK;
 
 		error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
 		if (error == 0) {
 			*ipp = ZTOI(zp);
 			zfs_dirent_unlock(dl);
 			dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
 		}
 		rpnp = NULL;
 	}
 
 	if ((flags & FIGNORECASE) && rpnp && !error)
 		(void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
 
 	return (error);
 }
 
 /*
  * unlinked Set (formerly known as the "delete queue") Error Handling
  *
  * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
  * don't specify the name of the entry that we will be manipulating.  We
  * also fib and say that we won't be adding any new entries to the
  * unlinked set, even though we might (this is to lower the minimum file
  * size that can be deleted in a full filesystem).  So on the small
  * chance that the nlink list is using a fat zap (ie. has more than
  * 2000 entries), we *may* not pre-read a block that's needed.
  * Therefore it is remotely possible for some of the assertions
  * regarding the unlinked set below to fail due to i/o error.  On a
  * nondebug system, this will result in the space being leaked.
  */
 void
 zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
 {
 	zfs_sb_t *zsb = ZTOZSB(zp);
 
 	ASSERT(zp->z_unlinked);
 	ASSERT(zp->z_links == 0);
 
 	VERIFY3U(0, ==,
 	    zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
 }
 
+/*
+ * Clean up any znodes that had no links when we either crashed or
+ * (force) umounted the file system.
+ */
+void
+zfs_unlinked_drain(zfs_sb_t *zsb)
+{
+	zap_cursor_t	zc;
+	zap_attribute_t zap;
+	dmu_object_info_t doi;
+	znode_t		*zp;
+	int		error;
+
+	/*
+	 * Interate over the contents of the unlinked set.
+	 */
+	for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
+	    zap_cursor_retrieve(&zc, &zap) == 0;
+	    zap_cursor_advance(&zc)) {
+
+		/*
+		 * See what kind of object we have in list
+		 */
+
+		error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
+		if (error != 0)
+			continue;
+
+		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
+		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
+		/*
+		 * We need to re-mark these list entries for deletion,
+		 * so we pull them back into core and set zp->z_unlinked.
+		 */
+		error = zfs_zget(zsb, zap.za_first_integer, &zp);
+
+		/*
+		 * We may pick up znodes that are already marked for deletion.
+		 * This could happen during the purge of an extended attribute
+		 * directory.  All we need to do is skip over them, since they
+		 * are already in the system marked z_unlinked.
+		 */
+		if (error != 0)
+			continue;
+
+		zp->z_unlinked = B_TRUE;
+		iput(ZTOI(zp));
+	}
+	zap_cursor_fini(&zc);
+}
+
 /*
  * Delete the entire contents of a directory.  Return a count
  * of the number of entries that could not be deleted. If we encounter
  * an error, return a count of at least one so that the directory stays
  * in the unlinked set.
  *
  * NOTE: this function assumes that the directory is inactive,
  *	so there is no need to lock its entries before deletion.
  *	Also, it assumes the directory contents is *only* regular
  *	files.
  */
 static int
 zfs_purgedir(znode_t *dzp)
 {
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	znode_t		*xzp;
 	dmu_tx_t	*tx;
 	zfs_sb_t	*zsb = ZTOZSB(dzp);
 	zfs_dirlock_t	dl;
 	int skipped = 0;
 	int error;
 
 	for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
 	    zap_cursor_advance(&zc)) {
 		error = zfs_zget(zsb,
 		    ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
 		if (error) {
 			skipped += 1;
 			continue;
 		}
 
 		ASSERT(S_ISREG(ZTOI(xzp)->i_mode)||S_ISLNK(ZTOI(xzp)->i_mode));
 
 		tx = dmu_tx_create(zsb->z_os);
 		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 		dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
 		/* Is this really needed ? */
 		zfs_sa_upgrade_txholds(tx, xzp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			iput(ZTOI(xzp));
 			skipped += 1;
 			continue;
 		}
 		bzero(&dl, sizeof (dl));
 		dl.dl_dzp = dzp;
 		dl.dl_name = zap.za_name;
 
 		error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
 		if (error)
 			skipped += 1;
 		dmu_tx_commit(tx);
 
 		iput(ZTOI(xzp));
 	}
 	zap_cursor_fini(&zc);
 	if (error != ENOENT)
 		skipped += 1;
 	return (skipped);
 }
 
-/*
- * Clean up any znodes that had no links when we either crashed or
- * (force) umounted the file system.
- */
-void
-zfs_unlinked_drain(zfs_sb_t *zsb)
-{
-	zap_cursor_t	zc;
-	zap_attribute_t zap;
-	dmu_object_info_t doi;
-	znode_t		*zp;
-	int		error;
-
-	/*
-	 * Interate over the contents of the unlinked set.
-	 */
-	for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
-	    zap_cursor_retrieve(&zc, &zap) == 0;
-	    zap_cursor_advance(&zc)) {
-
-		/*
-		 * See what kind of object we have in list
-		 */
-
-		error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
-		if (error != 0)
-			continue;
-
-		ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
-		    (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
-		/*
-		 * We need to re-mark these list entries for deletion,
-		 * so we pull them back into core and set zp->z_unlinked.
-		 */
-		error = zfs_zget(zsb, zap.za_first_integer, &zp);
-
-		/*
-		 * We may pick up znodes that are already marked for deletion.
-		 * This could happen during the purge of an extended attribute
-		 * directory.  All we need to do is skip over them, since they
-		 * are already in the system marked z_unlinked.
-		 */
-		if (error != 0)
-			continue;
-
-		zp->z_unlinked = B_TRUE;
-
-		/*
-		 * If this is an attribute directory, purge its contents.
-		 */
-		if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
-			/*
-			 * We don't need to check the return value of
-			 * zfs_purgedir here, because zfs_rmnode will just
-			 * return this xattr directory to the unlinked set
-			 * until all of its xattrs are gone.
-			 */
-			(void) zfs_purgedir(zp);
-		}
-
-		iput(ZTOI(zp));
-	}
-	zap_cursor_fini(&zc);
-}
-
 void
 zfs_rmnode(znode_t *zp)
 {
 	zfs_sb_t	*zsb = ZTOZSB(zp);
 	objset_t	*os = zsb->z_os;
 	znode_t		*xzp = NULL;
 	dmu_tx_t	*tx;
 	uint64_t	acl_obj;
 	uint64_t	xattr_obj;
-	uint64_t	count;
 	int		error;
 
 	ASSERT(zp->z_links == 0);
 	ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
 
 	/*
 	 * If this is an attribute directory, purge its contents.
 	 */
 	if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
-		error = zap_count(os, zp->z_id, &count);
-		if (error) {
-			zfs_znode_dmu_fini(zp);
-			return;
-		}
-
-		if (count > 0) {
-			taskq_t *taskq;
-
+		if (zfs_purgedir(zp) != 0) {
 			/*
-			 * There are still directory entries in this xattr
-			 * directory.  Let zfs_unlinked_drain() deal with
-			 * them to avoid deadlocking this process in the
-			 * zfs_purgedir()->zfs_zget()->ilookup() callpath
-			 * on the xattr inode's I_FREEING bit.
+			 * Not enough space to delete some xattrs.
+			 * Leave it in the unlinked set.
 			 */
-			taskq = dsl_pool_iput_taskq(dmu_objset_pool(os));
-			taskq_dispatch(taskq, (task_func_t *)
-			    zfs_unlinked_drain, zsb, TQ_SLEEP);
-
 			zfs_znode_dmu_fini(zp);
+
 			return;
 		}
 	}
 
 	/*
 	 * Free up all the data in the file.
 	 */
 	error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
 	if (error) {
 		/*
 		 * Not enough space.  Leave the file in the unlinked set.
 		 */
 		zfs_znode_dmu_fini(zp);
 		return;
 	}
 
 	/*
 	 * If the file has extended attributes, we're going to unlink
 	 * the xattr dir.
 	 */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
 	    &xattr_obj, sizeof (xattr_obj));
 	if (error == 0 && xattr_obj) {
 		error = zfs_zget(zsb, xattr_obj, &xzp);
 		ASSERT(error == 0);
 	}
 
 	acl_obj = zfs_external_acl(zp);
 
 	/*
 	 * Set up the final transaction.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
 	dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
 	if (xzp) {
 		dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 	if (acl_obj)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 
 	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		/*
 		 * Not enough space to delete the file.  Leave it in the
 		 * unlinked set, leaking it until the fs is remounted (at
 		 * which point we'll call zfs_unlinked_drain() to process it).
 		 */
 		dmu_tx_abort(tx);
 		zfs_znode_dmu_fini(zp);
 		goto out;
 	}
 
 	if (xzp) {
 		ASSERT(error == 0);
 		mutex_enter(&xzp->z_lock);
 		xzp->z_unlinked = B_TRUE;	/* mark xzp for deletion */
 		xzp->z_links = 0;	/* no more links to it */
 		VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
 		    &xzp->z_links, sizeof (xzp->z_links), tx));
 		mutex_exit(&xzp->z_lock);
 		zfs_unlinked_add(xzp, tx);
 	}
 
 	/* Remove this znode from the unlinked set */
 	VERIFY3U(0, ==,
 	    zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
 
 	zfs_znode_delete(zp, tx);
 
 	dmu_tx_commit(tx);
 out:
 	if (xzp)
 		iput(ZTOI(xzp));
 }
 
 static uint64_t
 zfs_dirent(znode_t *zp, uint64_t mode)
 {
 	uint64_t de = zp->z_id;
 
 	if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
 		de |= IFTODT(mode) << 60;
 	return (de);
 }
 
 /*
  * Link zp into dl.  Can only fail if zp has been unlinked.
  */
 int
 zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 {
 	znode_t *dzp = dl->dl_dzp;
 	zfs_sb_t *zsb = ZTOZSB(zp);
 	uint64_t value;
 	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
 	sa_bulk_attr_t bulk[5];
 	uint64_t mtime[2], ctime[2];
 	int count = 0;
 	int error;
 
 	mutex_enter(&zp->z_lock);
 
 	if (!(flag & ZRENAMING)) {
 		if (zp->z_unlinked) {	/* no new links to unlinked zp */
 			ASSERT(!(flag & (ZNEW | ZEXISTS)));
 			mutex_exit(&zp->z_lock);
 			return (ENOENT);
 		}
 		zp->z_links++;
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
 		    &zp->z_links, sizeof (zp->z_links));
 
 	}
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
 	    &dzp->z_id, sizeof (dzp->z_id));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (!(flag & ZNEW)) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
 		    ctime, sizeof (ctime));
 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
 		    ctime, B_TRUE);
 	}
 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 	ASSERT(error == 0);
 
 	mutex_exit(&zp->z_lock);
 
 	mutex_enter(&dzp->z_lock);
 	dzp->z_size++;
 	dzp->z_links += zp_is_dir;
 	count = 0;
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
 	    &dzp->z_size, sizeof (dzp->z_size));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
 	    &dzp->z_links, sizeof (dzp->z_links));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
 	    mtime, sizeof (mtime));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
 	    ctime, sizeof (ctime));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
 	    &dzp->z_pflags, sizeof (dzp->z_pflags));
 	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
 	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
 	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 
 	value = zfs_dirent(zp, zp->z_mode);
 	error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
 	    8, 1, &value, tx);
 	ASSERT(error == 0);
 
 	return (0);
 }
 
 static int
 zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
     int flag)
 {
 	int error;
 
 	if (ZTOZSB(zp)->z_norm) {
 		if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) &&
 		    (flag & ZCIEXACT)) ||
 		    ((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) &&
 		    !(flag & ZCILOOK)))
 			error = zap_remove_norm(ZTOZSB(zp)->z_os,
 			    dzp->z_id, dl->dl_name, MT_EXACT, tx);
 		else
 			error = zap_remove_norm(ZTOZSB(zp)->z_os,
 			    dzp->z_id, dl->dl_name, MT_FIRST, tx);
 	} else {
 		error = zap_remove(ZTOZSB(zp)->z_os,
 		    dzp->z_id, dl->dl_name, tx);
 	}
 
 	return (error);
 }
 
 /*
  * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
  * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
  * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
  * If it's non-NULL, we use it to indicate whether the znode needs deletion,
  * and it's the caller's job to do it.
  */
 int
 zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
 	boolean_t *unlinkedp)
 {
 	znode_t *dzp = dl->dl_dzp;
 	zfs_sb_t *zsb = ZTOZSB(dzp);
 	int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
 	boolean_t unlinked = B_FALSE;
 	sa_bulk_attr_t bulk[5];
 	uint64_t mtime[2], ctime[2];
 	int count = 0;
 	int error;
 
 #ifdef HAVE_DNLC
 	dnlc_remove(ZTOI(dzp), dl->dl_name);
 #endif /* HAVE_DNLC */
 
 	if (!(flag & ZRENAMING)) {
 		mutex_enter(&zp->z_lock);
 
 		if (zp_is_dir && !zfs_dirempty(zp)) {
 			mutex_exit(&zp->z_lock);
 			return (ENOTEMPTY);
 		}
 
 		/*
 		 * If we get here, we are going to try to remove the object.
 		 * First try removing the name from the directory; if that
 		 * fails, return the error.
 		 */
 		error = zfs_dropname(dl, zp, dzp, tx, flag);
 		if (error != 0) {
 			mutex_exit(&zp->z_lock);
 			return (error);
 		}
 
 		if (zp->z_links <= zp_is_dir) {
 			zfs_panic_recover("zfs: link count on %lu is %u, "
 			    "should be at least %u", zp->z_id,
 			    (int)zp->z_links, zp_is_dir + 1);
 			zp->z_links = zp_is_dir + 1;
 		}
 		if (--zp->z_links == zp_is_dir) {
 			zp->z_unlinked = B_TRUE;
 			zp->z_links = 0;
 			unlinked = B_TRUE;
 		} else {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
 			    NULL, &ctime, sizeof (ctime));
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
 			    NULL, &zp->z_pflags, sizeof (zp->z_pflags));
 			zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
 			    B_TRUE);
 		}
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
 		    NULL, &zp->z_links, sizeof (zp->z_links));
 		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		count = 0;
 		ASSERT(error == 0);
 		mutex_exit(&zp->z_lock);
 	} else {
 		error = zfs_dropname(dl, zp, dzp, tx, flag);
 		if (error != 0)
 			return (error);
 	}
 
 	mutex_enter(&dzp->z_lock);
 	dzp->z_size--;		/* one dirent removed */
 	dzp->z_links -= zp_is_dir;	/* ".." link from zp */
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
 	    NULL, &dzp->z_links, sizeof (dzp->z_links));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
 	    NULL, &dzp->z_size, sizeof (dzp->z_size));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
 	    NULL, ctime, sizeof (ctime));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
 	    NULL, mtime, sizeof (mtime));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
 	    NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
 	zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
 	error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
 	ASSERT(error == 0);
 	mutex_exit(&dzp->z_lock);
 
 	if (unlinkedp != NULL)
 		*unlinkedp = unlinked;
 	else if (unlinked)
 		zfs_unlinked_add(zp, tx);
 
 	return (0);
 }
 
 /*
  * Indicate whether the directory is empty.  Works with or without z_lock
  * held, but can only be consider a hint in the latter case.  Returns true
  * if only "." and ".." remain and there's no work in progress.
  */
 boolean_t
 zfs_dirempty(znode_t *dzp)
 {
 	return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
 }
 
 int
 zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
 {
 	zfs_sb_t *zsb = ZTOZSB(zp);
 	znode_t *xzp;
 	dmu_tx_t *tx;
 	int error;
 	zfs_acl_ids_t acl_ids;
 	boolean_t fuid_dirtied;
 #ifdef DEBUG
 	uint64_t parent;
 #endif
 
 	*xipp = NULL;
 
 	if ((error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)))
 		return (error);
 
 	if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
 	    &acl_ids)) != 0)
 		return (error);
 	if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
 		zfs_acl_ids_free(&acl_ids);
 		return (EDQUOT);
 	}
 
 top:
 	tx = dmu_tx_create(zsb->z_os);
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zsb->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zsb, tx);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		return (error);
 	}
 	zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zsb, tx);
 
 #ifdef DEBUG
 	error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
 	    &parent, sizeof (parent));
 	ASSERT(error == 0 && parent == zp->z_id);
 #endif
 
 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
 	    sizeof (xzp->z_id), tx));
 
 	(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
 	    xzp, "", NULL, acl_ids.z_fuidp, vap);
 
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
 
 	*xipp = ZTOI(xzp);
 
 	return (0);
 }
 
 /*
  * Return a znode for the extended attribute directory for zp.
  * ** If the directory does not already exist, it is created **
  *
  *	IN:	zp	- znode to obtain attribute directory from
  *		cr	- credentials of caller
  *		flags	- flags from the VOP_LOOKUP call
  *
  *	OUT:	xipp	- pointer to extended attribute znode
  *
  *	RETURN:	0 on success
  *		error number on failure
  */
 int
 zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
 {
 	zfs_sb_t	*zsb = ZTOZSB(zp);
 	znode_t		*xzp;
 	zfs_dirlock_t	*dl;
 	vattr_t		va;
 	int		error;
 top:
 	error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
 	if (error)
 		return (error);
 
 	if (xzp != NULL) {
 		*xipp = ZTOI(xzp);
 		zfs_dirent_unlock(dl);
 		return (0);
 	}
 
 	if (!(flags & CREATE_XATTR_DIR)) {
 		zfs_dirent_unlock(dl);
 		return (ENOENT);
 	}
 
 	if (zfs_is_readonly(zsb)) {
 		zfs_dirent_unlock(dl);
 		return (EROFS);
 	}
 
 	/*
 	 * The ability to 'create' files in an attribute
 	 * directory comes from the write_xattr permission on the base file.
 	 *
 	 * The ability to 'search' an attribute directory requires
 	 * read_xattr permission on the base file.
 	 *
 	 * Once in a directory the ability to read/write attributes
 	 * is controlled by the permissions on the attribute file.
 	 */
 	va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
 	va.va_mode = S_IFDIR | S_ISVTX | 0777;
 	zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
 
 	va.va_dentry = NULL;
 	error = zfs_make_xattrdir(zp, &va, xipp, cr);
 	zfs_dirent_unlock(dl);
 
 	if (error == ERESTART) {
 		/* NB: we already did dmu_tx_wait() if necessary */
 		goto top;
 	}
 
 	return (error);
 }
 
 /*
  * Decide whether it is okay to remove within a sticky directory.
  *
  * In sticky directories, write access is not sufficient;
  * you can remove entries from a directory only if:
  *
  *	you own the directory,
  *	you own the entry,
  *	the entry is a plain file and you have write access,
  *	or you are privileged (checked in secpolicy...).
  *
  * The function returns 0 if remove access is granted.
  */
 int
 zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
 {
 	uid_t		uid;
 	uid_t		downer;
 	uid_t		fowner;
 	zfs_sb_t	*zsb = ZTOZSB(zdp);
 
 	if (zsb->z_replay)
 		return (0);
 
 	if ((zdp->z_mode & S_ISVTX) == 0)
 		return (0);
 
 	downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER);
 	fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER);
 
 	if ((uid = crgetuid(cr)) == downer || uid == fowner ||
 	    (S_ISDIR(ZTOI(zp)->i_mode) &&
 	    zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
 		return (0);
 	else
 		return (secpolicy_vnode_remove(cr));
 }
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 90f9055afdc5..175dca821462 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1,1593 +1,1587 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/pathname.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/vfs_opreg.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/cmn_err.h>
 #include "fs/fs_subr.h"
 #include <sys/zfs_znode.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_dir.h>
 #include <sys/zil.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
 #include <sys/mkdev.h>
 #include <sys/modctl.h>
 #include <sys/refstr.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/bootconf.h>
 #include <sys/sunddi.h>
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
 #include <sys/sa.h>
 #include <sys/zpl.h>
 #include "zfs_comutil.h"
 
 
 /*ARGSUSED*/
 int
 zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 
 	/*
 	 * Data integrity is job one.  We don't want a compromised kernel
 	 * writing to the storage pool, so we never sync during panic.
 	 */
 	if (unlikely(oops_in_progress))
 		return (0);
 
 	/*
 	 * Semantically, the only requirement is that the sync be initiated.
 	 * The DMU syncs out txgs frequently, so there's nothing to do.
 	 */
 	if (!wait)
 		return (0);
 
 	if (zsb != NULL) {
 		/*
 		 * Sync a specific filesystem.
 		 */
 		dsl_pool_t *dp;
 
 		ZFS_ENTER(zsb);
 		dp = dmu_objset_pool(zsb->z_os);
 
 		/*
 		 * If the system is shutting down, then skip any
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (spa_suspended(dp->dp_spa)) {
 			ZFS_EXIT(zsb);
 			return (0);
 		}
 
 		if (zsb->z_log != NULL)
 			zil_commit(zsb->z_log, 0);
 
 		ZFS_EXIT(zsb);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
 		 * request by waiting for all pools to commit all dirty data.
 		 */
 		spa_sync_allpools();
 	}
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sync);
 
 boolean_t
 zfs_is_readonly(zfs_sb_t *zsb)
 {
 	return (!!(zsb->z_sb->s_flags & MS_RDONLY));
 }
 EXPORT_SYMBOL(zfs_is_readonly);
 
 static void
 atime_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_atime = newval;
 }
 
 static void
 xattr_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 
 	if (newval == ZFS_XATTR_OFF) {
 		zsb->z_flags &= ~ZSB_XATTR;
 	} else {
 		zsb->z_flags |= ZSB_XATTR;
 
 		if (newval == ZFS_XATTR_SA)
 			zsb->z_xattr_sa = B_TRUE;
 		else
 			zsb->z_xattr_sa = B_FALSE;
 	}
 }
 
 static void
 blksz_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 
 	if (newval < SPA_MINBLOCKSIZE ||
 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
 		newval = SPA_MAXBLOCKSIZE;
 
 	zsb->z_max_blksz = newval;
 }
 
 static void
 readonly_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 	struct super_block *sb = zsb->z_sb;
 
 	if (sb == NULL)
 		return;
 
 	if (newval)
 		sb->s_flags |= MS_RDONLY;
 	else
 		sb->s_flags &= ~MS_RDONLY;
 }
 
 static void
 devices_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 setuid_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 exec_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 nbmand_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 	struct super_block *sb = zsb->z_sb;
 
 	if (sb == NULL)
 		return;
 
 	if (newval == TRUE)
 		sb->s_flags |= MS_MANDLOCK;
 	else
 		sb->s_flags &= ~MS_MANDLOCK;
 }
 
 static void
 snapdir_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_show_ctldir = newval;
 }
 
 static void
 vscan_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_vscan = newval;
 }
 
 static void
 acl_inherit_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_acl_inherit = newval;
 }
 
 int
 zfs_register_callbacks(zfs_sb_t *zsb)
 {
 	struct dsl_dataset *ds = NULL;
 	objset_t *os = zsb->z_os;
 	int error = 0;
 
 	if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os)))
 		readonly_changed_cb(zsb, B_TRUE);
 
 	/*
 	 * Register property callbacks.
 	 *
 	 * It would probably be fine to just check for i/o error from
 	 * the first prop_register(), but I guess I like to go
 	 * overboard...
 	 */
 	ds = dmu_objset_ds(os);
 	error = dsl_prop_register(ds,
 	    "atime", atime_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "xattr", xattr_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "recordsize", blksz_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "readonly", readonly_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "devices", devices_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "setuid", setuid_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "exec", exec_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "snapdir", snapdir_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "aclinherit", acl_inherit_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "vscan", vscan_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "nbmand", nbmand_changed_cb, zsb);
 	if (error)
 		goto unregister;
 
 	return (0);
 
 unregister:
 	/*
 	 * We may attempt to unregister some callbacks that are not
 	 * registered, but this is OK; it will simply return ENOMSG,
 	 * which we will ignore.
 	 */
 	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
 	    zsb);
 	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb);
 
 	return (error);
 }
 EXPORT_SYMBOL(zfs_register_callbacks);
 
 static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
 	znode_phys_t *znp = data;
 	int error = 0;
 
 	/*
 	 * Is it a valid type of object to track?
 	 */
 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 		return (ENOENT);
 
 	/*
 	 * If we have a NULL data pointer
 	 * then assume the id's aren't changing and
 	 * return EEXIST to the dmu to let it know to
 	 * use the same ids
 	 */
 	if (data == NULL)
 		return (EEXIST);
 
 	if (bonustype == DMU_OT_ZNODE) {
 		*userp = znp->zp_uid;
 		*groupp = znp->zp_gid;
 	} else {
 		int hdrsize;
 
 		ASSERT(bonustype == DMU_OT_SA);
 		hdrsize = sa_hdrsize(data);
 
 		if (hdrsize != 0) {
 			*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
 			    SA_UID_OFFSET));
 			*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
 			    SA_GID_OFFSET));
 		} else {
 			/*
 			 * This should only happen for newly created
 			 * files that haven't had the znode data filled
 			 * in yet.
 			 */
 			*userp = 0;
 			*groupp = 0;
 		}
 	}
 	return (error);
 }
 
 static void
 fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr,
     char *domainbuf, int buflen, uid_t *ridp)
 {
 	uint64_t fuid;
 	const char *domain;
 
 	fuid = strtonum(fuidstr, NULL);
 
 	domain = zfs_fuid_find_by_idx(zsb, FUID_INDEX(fuid));
 	if (domain)
 		(void) strlcpy(domainbuf, domain, buflen);
 	else
 		domainbuf[0] = '\0';
 	*ridp = FUID_RID(fuid);
 }
 
 static uint64_t
 zfs_userquota_prop_to_obj(zfs_sb_t *zsb, zfs_userquota_prop_t type)
 {
 	switch (type) {
 	case ZFS_PROP_USERUSED:
 		return (DMU_USERUSED_OBJECT);
 	case ZFS_PROP_GROUPUSED:
 		return (DMU_GROUPUSED_OBJECT);
 	case ZFS_PROP_USERQUOTA:
 		return (zsb->z_userquota_obj);
 	case ZFS_PROP_GROUPQUOTA:
 		return (zsb->z_groupquota_obj);
 	default:
 		return (ENOTSUP);
 	}
 	return (0);
 }
 
 int
 zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
 {
 	int error;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	zfs_useracct_t *buf = vbuf;
 	uint64_t obj;
 
 	if (!dmu_objset_userspace_present(zsb->z_os))
 		return (ENOTSUP);
 
 	obj = zfs_userquota_prop_to_obj(zsb, type);
 	if (obj == 0) {
 		*bufsizep = 0;
 		return (0);
 	}
 
 	for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep);
 	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
 		    *bufsizep)
 			break;
 
 		fuidstr_to_sid(zsb, za.za_name,
 		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
 
 		buf->zu_space = za.za_first_integer;
 		buf++;
 	}
 	if (error == ENOENT)
 		error = 0;
 
 	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
 	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
 	*cookiep = zap_cursor_serialize(&zc);
 	zap_cursor_fini(&zc);
 	return (error);
 }
 EXPORT_SYMBOL(zfs_userspace_many);
 
 /*
  * buf must be big enough (eg, 32 bytes)
  */
 static int
 id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid,
     char *buf, boolean_t addok)
 {
 	uint64_t fuid;
 	int domainid = 0;
 
 	if (domain && domain[0]) {
 		domainid = zfs_fuid_find_by_domain(zsb, domain, NULL, addok);
 		if (domainid == -1)
 			return (ENOENT);
 	}
 	fuid = FUID_ENCODE(domainid, rid);
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	return (0);
 }
 
 int
 zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t *valp)
 {
 	char buf[32];
 	int err;
 	uint64_t obj;
 
 	*valp = 0;
 
 	if (!dmu_objset_userspace_present(zsb->z_os))
 		return (ENOTSUP);
 
 	obj = zfs_userquota_prop_to_obj(zsb, type);
 	if (obj == 0)
 		return (0);
 
 	err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE);
 	if (err)
 		return (err);
 
 	err = zap_lookup(zsb->z_os, obj, buf, 8, 1, valp);
 	if (err == ENOENT)
 		err = 0;
 	return (err);
 }
 EXPORT_SYMBOL(zfs_userspace_one);
 
 int
 zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t quota)
 {
 	char buf[32];
 	int err;
 	dmu_tx_t *tx;
 	uint64_t *objp;
 	boolean_t fuid_dirtied;
 
 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
 		return (EINVAL);
 
 	if (zsb->z_version < ZPL_VERSION_USERSPACE)
 		return (ENOTSUP);
 
 	objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj :
 	    &zsb->z_groupquota_obj;
 
 	err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE);
 	if (err)
 		return (err);
 	fuid_dirtied = zsb->z_fuid_dirty;
 
 	tx = dmu_tx_create(zsb->z_os);
 	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
 	if (*objp == 0) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    zfs_userquota_prop_prefixes[type]);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zsb, tx);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
 	mutex_enter(&zsb->z_lock);
 	if (*objp == 0) {
 		*objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA,
 		    DMU_OT_NONE, 0, tx);
 		VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ,
 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 	}
 	mutex_exit(&zsb->z_lock);
 
 	if (quota == 0) {
 		err = zap_remove(zsb->z_os, *objp, buf, tx);
 		if (err == ENOENT)
 			err = 0;
 	} else {
 		err = zap_update(zsb->z_os, *objp, buf, 8, 1, &quota, tx);
 	}
 	ASSERT(err == 0);
 	if (fuid_dirtied)
 		zfs_fuid_sync(zsb, tx);
 	dmu_tx_commit(tx);
 	return (err);
 }
 EXPORT_SYMBOL(zfs_set_userquota);
 
 boolean_t
 zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
 {
 	char buf[32];
 	uint64_t used, quota, usedobj, quotaobj;
 	int err;
 
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
 
 	if (quotaobj == 0 || zsb->z_replay)
 		return (B_FALSE);
 
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, &quota);
 	if (err != 0)
 		return (B_FALSE);
 
 	err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used);
 	if (err != 0)
 		return (B_FALSE);
 	return (used >= quota);
 }
 EXPORT_SYMBOL(zfs_fuid_overquota);
 
 boolean_t
 zfs_owner_overquota(zfs_sb_t *zsb, znode_t *zp, boolean_t isgroup)
 {
 	uint64_t fuid;
 	uint64_t quotaobj;
 
 	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
 
 	fuid = isgroup ? zp->z_gid : zp->z_uid;
 
 	if (quotaobj == 0 || zsb->z_replay)
 		return (B_FALSE);
 
 	return (zfs_fuid_overquota(zsb, isgroup, fuid));
 }
 EXPORT_SYMBOL(zfs_owner_overquota);
 
 int
 zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
 {
 	objset_t *os;
 	zfs_sb_t *zsb;
 	uint64_t zval;
 	int i, error;
 	uint64_t sa_obj;
 
 	zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP | KM_NODEBUG);
 
 	/*
 	 * We claim to always be readonly so we can open snapshots;
 	 * other ZPL code will prevent us from writing to snapshots.
 	 */
 	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os);
 	if (error) {
 		kmem_free(zsb, sizeof (zfs_sb_t));
 		return (error);
 	}
 
 	/*
 	 * Initialize the zfs-specific filesystem structure.
 	 * Should probably make this a kmem cache, shuffle fields,
 	 * and just bzero up to z_hold_mtx[].
 	 */
 	zsb->z_sb = NULL;
 	zsb->z_parent = zsb;
 	zsb->z_max_blksz = SPA_MAXBLOCKSIZE;
 	zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 	zsb->z_os = os;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version);
 	if (error) {
 		goto out;
 	} else if (zsb->z_version >
 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 		(void) printk("Can't mount a version %lld file system "
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zsb->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
 		error = ENOTSUP;
 		goto out;
 	}
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
 		goto out;
 	zsb->z_norm = (int)zval;
 
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
 		goto out;
 	zsb->z_utf8 = (zval != 0);
 
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
 		goto out;
 	zsb->z_case = (uint_t)zval;
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE ||
 	    zsb->z_case == ZFS_CASE_MIXED)
 		zsb->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
 	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
 
 	if (zsb->z_use_sa) {
 		/* should either have both of these objects or none */
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 		    &sa_obj);
 		if (error)
 			goto out;
 
 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval);
 		if ((error == 0) && (zval == ZFS_XATTR_SA))
 			zsb->z_xattr_sa = B_TRUE;
 	} else {
 		/*
 		 * Pre SA versions file systems should never touch
 		 * either the attribute registration or layout objects.
 		 */
 		sa_obj = 0;
 	}
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zsb->z_attr_table);
 	if (error)
 		goto out;
 
 	if (zsb->z_version >= ZPL_VERSION_SA)
 		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zsb->z_root);
 	if (error)
 		goto out;
 	ASSERT(zsb->z_root != 0);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 	    &zsb->z_unlinkedobj);
 	if (error)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 	    8, 1, &zsb->z_userquota_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 	    8, 1, &zsb->z_groupquota_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 	    &zsb->z_fuid_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 	    &zsb->z_shares_dir);
 	if (error && error != ENOENT)
 		goto out;
 
 	mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zsb->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 	rrw_init(&zsb->z_teardown_lock);
 	rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	avl_create(&zsb->z_ctldir_snaps, snapentry_compare,
 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
 	mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	*zsbp = zsb;
 	return (0);
 
 out:
 	dmu_objset_disown(os, zsb);
 	*zsbp = NULL;
 	kmem_free(zsb, sizeof (zfs_sb_t));
 	return (error);
 }
 EXPORT_SYMBOL(zfs_sb_create);
 
 int
 zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
 {
 	int error;
 
 	error = zfs_register_callbacks(zsb);
 	if (error)
 		return (error);
 
 	/*
 	 * Set the objset user_ptr to track its zsb.
 	 */
 	mutex_enter(&zsb->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zsb->z_os, zsb);
 	mutex_exit(&zsb->z_os->os_user_ptr_lock);
 
 	zsb->z_log = zil_open(zsb->z_os, zfs_get_data);
 
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
 	 * operations out since we closed the ZIL.
 	 */
 	if (mounting) {
 		boolean_t readonly;
 
 		/*
 		 * During replay we remove the read only flag to
 		 * allow replays to succeed.
 		 */
 		readonly = zfs_is_readonly(zsb);
 		if (readonly != 0)
 			readonly_changed_cb(zsb, B_FALSE);
 		else
 			zfs_unlinked_drain(zsb);
 
 		/*
 		 * Parse and replay the intent log.
 		 *
 		 * Because of ziltest, this must be done after
 		 * zfs_unlinked_drain().  (Further note: ziltest
 		 * doesn't use readonly mounts, where
 		 * zfs_unlinked_drain() isn't called.)  This is because
 		 * ziltest causes spa_sync() to think it's committed,
 		 * but actually it is not, so the intent log contains
 		 * many txg's worth of changes.
 		 *
 		 * In particular, if object N is in the unlinked set in
 		 * the last txg to actually sync, then it could be
 		 * actually freed in a later txg and then reallocated
 		 * in a yet later txg.  This would write a "create
 		 * object N" record to the intent log.  Normally, this
 		 * would be fine because the spa_sync() would have
 		 * written out the fact that object N is free, before
 		 * we could write the "create object N" intent log
 		 * record.
 		 *
 		 * But when we are in ziltest mode, we advance the "open
 		 * txg" without actually spa_sync()-ing the changes to
 		 * disk.  So we would see that object N is still
 		 * allocated and in the unlinked set, and there is an
 		 * intent log record saying to allocate it.
 		 */
 		if (spa_writeable(dmu_objset_spa(zsb->z_os))) {
 			if (zil_replay_disable) {
 				zil_destroy(zsb->z_log, B_FALSE);
 			} else {
 				zsb->z_replay = B_TRUE;
 				zil_replay(zsb->z_os, zsb,
 				    zfs_replay_vector);
 				zsb->z_replay = B_FALSE;
 			}
 		}
 
 		/* restore readonly bit */
 		if (readonly != 0)
 			readonly_changed_cb(zsb, B_TRUE);
 	}
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_setup);
 
 void
 zfs_sb_free(zfs_sb_t *zsb)
 {
 	int i;
 
 	zfs_fuid_destroy(zsb);
 
 	mutex_destroy(&zsb->z_znodes_lock);
 	mutex_destroy(&zsb->z_lock);
 	list_destroy(&zsb->z_all_znodes);
 	rrw_destroy(&zsb->z_teardown_lock);
 	rw_destroy(&zsb->z_teardown_inactive_lock);
 	rw_destroy(&zsb->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zsb->z_hold_mtx[i]);
 	mutex_destroy(&zsb->z_ctldir_lock);
 	avl_destroy(&zsb->z_ctldir_snaps);
 	kmem_free(zsb, sizeof (zfs_sb_t));
 }
 EXPORT_SYMBOL(zfs_sb_free);
 
 static void
 zfs_set_fuid_feature(zfs_sb_t *zsb)
 {
 	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
 	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
 }
 
 void
 zfs_unregister_callbacks(zfs_sb_t *zsb)
 {
 	objset_t *os = zsb->z_os;
 	struct dsl_dataset *ds;
 
 	/*
 	 * Unregister properties.
 	 */
 	if (!dmu_objset_is_snapshot(os)) {
 		ds = dmu_objset_ds(os);
 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
 		    acl_inherit_changed_cb, zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "vscan",
 		    vscan_changed_cb, zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "nbmand",
 		    nbmand_changed_cb, zsb) == 0);
 	}
 }
 EXPORT_SYMBOL(zfs_unregister_callbacks);
 
 #ifdef HAVE_MLSLABEL
 /*
  * zfs_check_global_label:
  *	Check that the hex label string is appropriate for the dataset
  *	being mounted into the global_zone proper.
  *
  *	Return an error if the hex label string is not default or
  *	admin_low/admin_high.  For admin_low labels, the corresponding
  *	dataset must be readonly.
  */
 int
 zfs_check_global_label(const char *dsname, const char *hexsl)
 {
 	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
 		/* must be readonly */
 		uint64_t rdonly;
 
 		if (dsl_prop_get_integer(dsname,
 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
 			return (EACCES);
 		return (rdonly ? 0 : EACCES);
 	}
 	return (EACCES);
 }
 EXPORT_SYMBOL(zfs_check_global_label);
 #endif /* HAVE_MLSLABEL */
 
 int
 zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
 {
 	zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 	uint64_t fsid;
 	uint32_t bshift;
 
 	ZFS_ENTER(zsb);
 
 	dmu_objset_space(zsb->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	fsid = dmu_objset_fsid_guid(zsb->z_os);
 	/*
 	 * The underlying storage pool actually uses multiple block
 	 * size.  Under Solaris frsize (fragment size) is reported as
 	 * the smallest block size we support, and bsize (block size)
 	 * as the filesystem's maximum block size.  Unfortunately,
 	 * under Linux the fragment size and block size are often used
 	 * interchangeably.  Thus we are forced to report both of them
 	 * as the filesystem's maximum block size.
 	 */
 	statp->f_frsize = zsb->z_max_blksz;
 	statp->f_bsize = zsb->z_max_blksz;
 	bshift = fls(statp->f_bsize) - 1;
 
 	/*
 	 * The following report "total" blocks of various kinds in
 	 * the file system, but reported in terms of f_bsize - the
 	 * "preferred" size.
 	 */
 
 	statp->f_blocks = (refdbytes + availbytes) >> bshift;
 	statp->f_bfree = availbytes >> bshift;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
 	 * statvfs() should really be called statufs(), because it assumes
 	 * static metadata.  ZFS doesn't preallocate files, so the best
 	 * we can do is report the max that could possibly fit in f_files,
 	 * and that minus the number actually used in f_ffree.
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
 	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
 	statp->f_files = statp->f_ffree + usedobjs;
 	statp->f_fsid.val[0] = (uint32_t)fsid;
 	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
 	statp->f_type = ZFS_SUPER_MAGIC;
 	statp->f_namelen = ZFS_MAXNAMELEN;
 
 	/*
 	 * We have all of 40 characters to stuff a string here.
 	 * Is there anything useful we could/should provide?
 	 */
 	bzero(statp->f_spare, sizeof (statp->f_spare));
 
 	ZFS_EXIT(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_statvfs);
 
 int
 zfs_root(zfs_sb_t *zsb, struct inode **ipp)
 {
 	znode_t *rootzp;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	error = zfs_zget(zsb, zsb->z_root, &rootzp);
 	if (error == 0)
 		*ipp = ZTOI(rootzp);
 
 	ZFS_EXIT(zsb);
 	return (error);
 }
 EXPORT_SYMBOL(zfs_root);
 
 #ifdef HAVE_SHRINK
 int
 zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	struct shrinker *shrinker = &sb->s_shrink;
 	struct shrink_control sc = {
 		.nr_to_scan = nr_to_scan,
 		.gfp_mask = GFP_KERNEL,
 	};
 
 	ZFS_ENTER(zsb);
 	*objects = (*shrinker->shrink)(shrinker, &sc);
 	ZFS_EXIT(zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_prune);
 #endif /* HAVE_SHRINK */
 
 /*
  * Teardown the zfs_sb_t::z_os.
  *
  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
  */
 int
 zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
 {
 	znode_t	*zp;
 
 	rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);
 
 	if (!unmounting) {
 		/*
 		 * We purge the parent filesystem's super block as the
 		 * parent filesystem and all of its snapshots have their
 		 * inode's super block set to the parent's filesystem's
 		 * super block.  Note,  'z_parent' is self referential
 		 * for non-snapshots.
 		 */
 		shrink_dcache_sb(zsb->z_parent->z_sb);
 		(void) spl_invalidate_inodes(zsb->z_parent->z_sb, 0);
 	}
 
-	/*
-	 * Drain the iput_taskq to ensure all active references to the
-	 * zfs_sb_t have been handled only then can it be safely destroyed.
-	 */
-	taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
-
 	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
 	if (zsb->z_log) {
 		zil_close(zsb->z_log);
 		zsb->z_log = NULL;
 	}
 
 	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);
 
 	/*
 	 * If we are not unmounting (ie: online recv) and someone already
 	 * unmounted this file system while we were doing the switcheroo,
 	 * or a reopen of z_os failed then just bail out now.
 	 */
 	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
 		rw_exit(&zsb->z_teardown_inactive_lock);
 		rrw_exit(&zsb->z_teardown_lock, FTAG);
 		return (EIO);
 	}
 
 	/*
 	 * At this point there are no vops active, and any new vops will
 	 * fail with EIO since we have z_teardown_lock for writer (only
 	 * relavent for forced unmount).
 	 *
 	 * Release all holds on dbufs.
 	 */
 	mutex_enter(&zsb->z_znodes_lock);
 	for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
 	    zp = list_next(&zsb->z_all_znodes, zp))
 		if (zp->z_sa_hdl) {
 			ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
 			zfs_znode_dmu_fini(zp);
 		}
 	mutex_exit(&zsb->z_znodes_lock);
 
 	/*
 	 * If we are unmounting, set the unmounted flag and let new vops
 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
 	 * other vops will fail with EIO.
 	 */
 	if (unmounting) {
 		zsb->z_unmounted = B_TRUE;
 		rrw_exit(&zsb->z_teardown_lock, FTAG);
 		rw_exit(&zsb->z_teardown_inactive_lock);
 	}
 
 	/*
 	 * z_os will be NULL if there was an error in attempting to reopen
 	 * zsb, so just return as the properties had already been
 	 *
 	 * unregistered and cached data had been evicted before.
 	 */
 	if (zsb->z_os == NULL)
 		return (0);
 
 	/*
 	 * Unregister properties.
 	 */
 	zfs_unregister_callbacks(zsb);
 
 	/*
 	 * Evict cached data
 	 */
 	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
 	    !zfs_is_readonly(zsb))
 		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
 	(void) dmu_objset_evict_dbufs(zsb->z_os);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_teardown);
 
 #if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)
 atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
 #endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */
 
 int
 zfs_domount(struct super_block *sb, void *data, int silent)
 {
 	zpl_mount_data_t *zmd = data;
 	const char *osname = zmd->z_osname;
 	zfs_sb_t *zsb;
 	struct inode *root_inode;
 	uint64_t recordsize;
 	int error;
 
 	error = zfs_sb_create(osname, &zsb);
 	if (error)
 		return (error);
 
 	if ((error = dsl_prop_get_integer(osname, "recordsize",
 	    &recordsize, NULL)))
 		goto out;
 
 	zsb->z_sb = sb;
 	sb->s_fs_info = zsb;
 	sb->s_magic = ZFS_SUPER_MAGIC;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_time_gran = 1;
 	sb->s_blocksize = recordsize;
 	sb->s_blocksize_bits = ilog2(recordsize);
 
 #ifdef HAVE_BDI
 	/*
 	 * 2.6.32 API change,
 	 * Added backing_device_info (BDI) per super block interfaces.  A BDI
 	 * must be configured when using a non-device backed filesystem for
 	 * proper writeback.  This is not required for older pdflush kernels.
 	 *
 	 * NOTE: Linux read-ahead is disabled in favor of zfs read-ahead.
 	 */
 	zsb->z_bdi.ra_pages = 0;
 	sb->s_bdi = &zsb->z_bdi;
 
 	error = -bdi_setup_and_register(&zsb->z_bdi, "zfs", BDI_CAP_MAP_COPY);
 	if (error)
 		goto out;
 #endif /* HAVE_BDI */
 
 	/* Set callback operations for the file system. */
 	sb->s_op = &zpl_super_operations;
 	sb->s_xattr = zpl_xattr_handlers;
 	sb->s_export_op = &zpl_export_operations;
 
 	/* Set features for file system. */
 	zfs_set_fuid_feature(zsb);
 
 	if (dmu_objset_is_snapshot(zsb->z_os)) {
 		uint64_t pval;
 
 		atime_changed_cb(zsb, B_FALSE);
 		readonly_changed_cb(zsb, B_TRUE);
 		if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL)))
 			goto out;
 		xattr_changed_cb(zsb, pval);
 		zsb->z_issnap = B_TRUE;
 		zsb->z_os->os_sync = ZFS_SYNC_DISABLED;
 
 		mutex_enter(&zsb->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zsb->z_os, zsb);
 		mutex_exit(&zsb->z_os->os_user_ptr_lock);
 	} else {
 		error = zfs_sb_setup(zsb, B_TRUE);
 	}
 
 	/* Allocate a root inode for the filesystem. */
 	error = zfs_root(zsb, &root_inode);
 	if (error) {
 		(void) zfs_umount(sb);
 		goto out;
 	}
 
 	/* Allocate a root dentry for the filesystem */
 	sb->s_root = d_make_root(root_inode);
 	if (sb->s_root == NULL) {
 		(void) zfs_umount(sb);
 		error = ENOMEM;
 		goto out;
 	}
 
 	if (!zsb->z_issnap)
 		zfsctl_create(zsb);
 out:
 	if (error) {
 		dmu_objset_disown(zsb->z_os, zsb);
 		zfs_sb_free(zsb);
 	}
 
 	return (error);
 }
 EXPORT_SYMBOL(zfs_domount);
 
 /*
  * Called when an unmount is requested and certain sanity checks have
  * already passed.  At this point no dentries or inodes have been reclaimed
  * from their respective caches.  We drop the extra reference on the .zfs
  * control directory to allow everything to be reclaimed.  All snapshots
  * must already have been unmounted to reach this point.
  */
 void
 zfs_preumount(struct super_block *sb)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 
 	if (zsb != NULL && zsb->z_ctldir != NULL)
 		zfsctl_destroy(zsb);
 }
 EXPORT_SYMBOL(zfs_preumount);
 
 /*
  * Called once all other unmount released tear down has occurred.
  * It is our responsibility to release any remaining infrastructure.
  */
 /*ARGSUSED*/
 int
 zfs_umount(struct super_block *sb)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	objset_t *os;
 
 	VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
 	os = zsb->z_os;
 
 #ifdef HAVE_BDI
 	bdi_destroy(sb->s_bdi);
 #endif /* HAVE_BDI */
 
 	/*
 	 * z_os will be NULL if there was an error in
 	 * attempting to reopen zsb.
 	 */
 	if (os != NULL) {
 		/*
 		 * Unset the objset user_ptr.
 		 */
 		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
 		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
 		dmu_objset_disown(os, zsb);
 	}
 
 	zfs_sb_free(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_umount);
 
 int
 zfs_remount(struct super_block *sb, int *flags, char *data)
 {
 	/*
 	 * All namespace flags (MNT_*) and super block flags (MS_*) will
 	 * be handled by the Linux VFS.  Only handle custom options here.
 	 */
 	return (0);
 }
 EXPORT_SYMBOL(zfs_remount);
 
 int
 zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 {
 	zfs_sb_t	*zsb = sb->s_fs_info;
 	znode_t		*zp;
 	uint64_t	object = 0;
 	uint64_t	fid_gen = 0;
 	uint64_t	gen_mask;
 	uint64_t	zp_gen;
 	int		i, err;
 
 	*ipp = NULL;
 
 	ZFS_ENTER(zsb);
 
 	if (fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 		uint64_t	setgen = 0;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
 		ZFS_EXIT(zsb);
 
 		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
 		if (err)
 			return (EINVAL);
 
 		ZFS_ENTER(zsb);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
 
 		for (i = 0; i < sizeof (zfid->zf_object); i++)
 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		ZFS_EXIT(zsb);
 		return (EINVAL);
 	}
 
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
 		*ipp = zsb->z_ctldir;
 		ASSERT(*ipp != NULL);
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
 			    0, kcred, NULL, NULL) == 0);
 		} else {
 			igrab(*ipp);
 		}
 		ZFS_EXIT(zsb);
 		return (0);
 	}
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
 	if ((err = zfs_zget(zsb, object, &zp))) {
 		ZFS_EXIT(zsb);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
 	    sizeof (uint64_t));
 	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
 		iput(ZTOI(zp));
 		ZFS_EXIT(zsb);
 		return (EINVAL);
 	}
 
 	*ipp = ZTOI(zp);
 	if (*ipp)
 		zfs_inode_update(ITOZ(*ipp));
 
 	ZFS_EXIT(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_vget);
 
 /*
  * Block out VOPs and close zfs_sb_t::z_os
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.
  */
 int
 zfs_suspend_fs(zfs_sb_t *zsb)
 {
 	int error;
 
 	if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
 		return (error);
 	dmu_objset_disown(zsb->z_os, zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_suspend_fs);
 
 /*
  * Reopen zfs_sb_t::z_os and release VOPs.
  */
 int
 zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
 {
 	int err, err2;
 
 	ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));
 
 	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os);
 	if (err) {
 		zsb->z_os = NULL;
 	} else {
 		znode_t *zp;
 		uint64_t sa_obj = 0;
 
 		err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
 
 		if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA)
 			goto bail;
 
 
 		if ((err = sa_setup(zsb->z_os, sa_obj,
 		    zfs_attr_table,  ZPL_END, &zsb->z_attr_table)) != 0)
 			goto bail;
 
 		VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
 
 		/*
 		 * Attempt to re-establish all the active znodes with
 		 * their dbufs.  If a zfs_rezget() fails, then we'll let
 		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
 		 * when they try to use their znode.
 		 */
 		mutex_enter(&zsb->z_znodes_lock);
 		for (zp = list_head(&zsb->z_all_znodes); zp;
 		    zp = list_next(&zsb->z_all_znodes, zp)) {
 			(void) zfs_rezget(zp);
 		}
 		mutex_exit(&zsb->z_znodes_lock);
 
 	}
 
 bail:
 	/* release the VOPs */
 	rw_exit(&zsb->z_teardown_inactive_lock);
 	rrw_exit(&zsb->z_teardown_lock, FTAG);
 
 	if (err) {
 		/*
 		 * Since we couldn't reopen zfs_sb_t::z_os, force
 		 * unmount this file system.
 		 */
 		(void) zfs_umount(zsb->z_sb);
 	}
 	return (err);
 }
 EXPORT_SYMBOL(zfs_resume_fs);
 
 int
 zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
 {
 	int error;
 	objset_t *os = zsb->z_os;
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (EINVAL);
 
 	if (newvers < zsb->z_version)
 		return (EINVAL);
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zsb->z_os)))
 		return (ENOTSUP);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    ZFS_SA_ATTRS);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
 		uint64_t sa_obj;
 
 		ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=,
 		    SPA_VERSION_SA);
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 
 		error = zap_add(os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT3U(error, ==, 0);
 
 		VERIFY(0 == sa_set_sa_object(os, sa_obj));
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
 	spa_history_log_internal(LOG_DS_UPGRADE,
 	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
 	    zsb->z_version, newvers, dmu_objset_id(os));
 
 	dmu_tx_commit(tx);
 
 	zsb->z_version = newvers;
 
 	if (zsb->z_version >= ZPL_VERSION_FUID)
 		zfs_set_fuid_feature(zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_set_version);
 
 /*
  * Read a property stored within the master node.
  */
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
 	const char *pname;
 	int error = ENOENT;
 
 	/*
 	 * Look up the file system's value for the property.  For the
 	 * version property, we look up a slightly different string.
 	 */
 	if (prop == ZFS_PROP_VERSION)
 		pname = ZPL_VERSION_STR;
 	else
 		pname = zfs_prop_to_name(prop);
 
 	if (os != NULL)
 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 
 	if (error == ENOENT) {
 		/* No value set, use the default value */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			*value = ZPL_VERSION;
 			break;
 		case ZFS_PROP_NORMALIZE:
 		case ZFS_PROP_UTF8ONLY:
 			*value = 0;
 			break;
 		case ZFS_PROP_CASE:
 			*value = ZFS_CASE_SENSITIVE;
 			break;
 		default:
 			return (error);
 		}
 		error = 0;
 	}
 	return (error);
 }
 EXPORT_SYMBOL(zfs_get_zplprop);
 
 void
 zfs_init(void)
 {
 	zfsctl_init();
 	zfs_znode_init();
 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
 	register_filesystem(&zpl_fs_type);
 	(void) arc_add_prune_callback(zpl_prune_sbs, NULL);
 }
 
 void
 zfs_fini(void)
 {
 	unregister_filesystem(&zpl_fs_type);
 	zfs_znode_fini();
 	zfsctl_fini();
 }