Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	(revision 332526)
@@ -1,2003 +1,2010 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 RackTop Systems.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/range_tree.h>
 
 static kmem_cache_t *dnode_cache;
 /*
  * Define DNODE_STATS to turn on statistic gathering. By default, it is only
  * turned on when DEBUG is also defined.
  */
 #ifdef	DEBUG
 #define	DNODE_STATS
 #endif	/* DEBUG */
 
 #ifdef	DNODE_STATS
 #define	DNODE_STAT_ADD(stat)			((stat)++)
 #else
 #define	DNODE_STAT_ADD(stat)			/* nothing */
 #endif	/* DNODE_STATS */
 
 static dnode_phys_t dnode_phys_zero;
 
 int zfs_default_bs = SPA_MINBLOCKSHIFT;
 int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
 
 SYSCTL_DECL(_vfs_zfs);
 SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
     &zfs_default_bs, 0, "Default dnode block shift");
 SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
     &zfs_default_ibs, 0, "Default dnode indirect block shift");
 
 #ifdef illumos
+#ifdef	_KERNEL
 static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
+#endif	/* _KERNEL */
 #endif
 
 static int
 dbuf_compare(const void *x1, const void *x2)
 {
 	const dmu_buf_impl_t *d1 = x1;
 	const dmu_buf_impl_t *d2 = x2;
 
 	if (d1->db_level < d2->db_level) {
 		return (-1);
 	}
 	if (d1->db_level > d2->db_level) {
 		return (1);
 	}
 
 	if (d1->db_blkid < d2->db_blkid) {
 		return (-1);
 	}
 	if (d1->db_blkid > d2->db_blkid) {
 		return (1);
 	}
 
 	if (d1->db_state == DB_SEARCH) {
 		ASSERT3S(d2->db_state, !=, DB_SEARCH);
 		return (-1);
 	} else if (d2->db_state == DB_SEARCH) {
 		ASSERT3S(d1->db_state, !=, DB_SEARCH);
 		return (1);
 	}
 
 	if ((uintptr_t)d1 < (uintptr_t)d2) {
 		return (-1);
 	}
 	if ((uintptr_t)d1 > (uintptr_t)d2) {
 		return (1);
 	}
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dnode_cons(void *arg, void *unused, int kmflag)
 {
 	dnode_t *dn = arg;
 	int i;
 
 	rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
 
 	/*
 	 * Every dbuf has a reference, and dropping a tracked reference is
 	 * O(number of references), so don't track dn_holds.
 	 */
 	refcount_create_untracked(&dn->dn_holds);
 	refcount_create(&dn->dn_tx_holds);
 	list_link_init(&dn->dn_link);
 
 	bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
 	bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
 	bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
 	bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype));
 	bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
 	bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
 	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_link_init(&dn->dn_dirty_link[i]);
 		dn->dn_free_ranges[i] = NULL;
 		list_create(&dn->dn_dirty_records[i],
 		    sizeof (dbuf_dirty_record_t),
 		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
 	}
 
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
 	dn->dn_dirtyctx = 0;
 	dn->dn_dirtyctx_firstset = NULL;
 	dn->dn_bonus = NULL;
 	dn->dn_have_spill = B_FALSE;
 	dn->dn_zio = NULL;
 	dn->dn_oldused = 0;
 	dn->dn_oldflags = 0;
 	dn->dn_olduid = 0;
 	dn->dn_oldgid = 0;
 	dn->dn_newuid = 0;
 	dn->dn_newgid = 0;
 	dn->dn_id_flags = 0;
 
 	dn->dn_dbufs_count = 0;
 	avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 
 	dn->dn_moved = 0;
 	POINTER_INVALIDATE(&dn->dn_objset);
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dnode_dest(void *arg, void *unused)
 {
 	int i;
 	dnode_t *dn = arg;
 
 	rw_destroy(&dn->dn_struct_rwlock);
 	mutex_destroy(&dn->dn_mtx);
 	mutex_destroy(&dn->dn_dbufs_mtx);
 	cv_destroy(&dn->dn_notxholds);
 	refcount_destroy(&dn->dn_holds);
 	refcount_destroy(&dn->dn_tx_holds);
 	ASSERT(!list_link_active(&dn->dn_link));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 		list_destroy(&dn->dn_dirty_records[i]);
 		ASSERT0(dn->dn_next_nblkptr[i]);
 		ASSERT0(dn->dn_next_nlevels[i]);
 		ASSERT0(dn->dn_next_indblkshift[i]);
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_bonuslen[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
 	}
 
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_free_txg);
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT0(dn->dn_dirtyctx);
 	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
 	ASSERT3P(dn->dn_bonus, ==, NULL);
 	ASSERT(!dn->dn_have_spill);
 	ASSERT3P(dn->dn_zio, ==, NULL);
 	ASSERT0(dn->dn_oldused);
 	ASSERT0(dn->dn_oldflags);
 	ASSERT0(dn->dn_olduid);
 	ASSERT0(dn->dn_oldgid);
 	ASSERT0(dn->dn_newuid);
 	ASSERT0(dn->dn_newgid);
 	ASSERT0(dn->dn_id_flags);
 
 	ASSERT0(dn->dn_dbufs_count);
 	avl_destroy(&dn->dn_dbufs);
 }
 
 void
 dnode_init(void)
 {
 	ASSERT(dnode_cache == NULL);
 	dnode_cache = kmem_cache_create("dnode_t",
 	    sizeof (dnode_t),
 	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
+#ifdef	_KERNEL
 	kmem_cache_set_move(dnode_cache, dnode_move);
+#endif	/* _KERNEL */
 }
 
 void
 dnode_fini(void)
 {
 	kmem_cache_destroy(dnode_cache);
 	dnode_cache = NULL;
 }
 
 
 #ifdef ZFS_DEBUG
 void
 dnode_verify(dnode_t *dn)
 {
 	int drop_struct_lock = FALSE;
 
 	ASSERT(dn->dn_phys);
 	ASSERT(dn->dn_objset);
 	ASSERT(dn->dn_handle->dnh_dnode == dn);
 
 	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
 		return;
 
 	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 		drop_struct_lock = TRUE;
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
 		int i;
 		ASSERT3U(dn->dn_indblkshift, >=, 0);
 		ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
 		if (dn->dn_datablkshift) {
 			ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
 			ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
 			ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
 		}
 		ASSERT3U(dn->dn_nlevels, <=, 30);
 		ASSERT(DMU_OT_IS_VALID(dn->dn_type));
 		ASSERT3U(dn->dn_nblkptr, >=, 1);
 		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
 		ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
 		ASSERT3U(dn->dn_datablksz, ==,
 		    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 		ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
 		ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
 		    dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
 		for (i = 0; i < TXG_SIZE; i++) {
 			ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
 		}
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE)
 		ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
 	ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
 	if (dn->dn_dbuf != NULL) {
 		ASSERT3P(dn->dn_phys, ==,
 		    (dnode_phys_t *)dn->dn_dbuf->db.db_data +
 		    (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
 	}
 	if (drop_struct_lock)
 		rw_exit(&dn->dn_struct_rwlock);
 }
 #endif
 
 void
 dnode_byteswap(dnode_phys_t *dnp)
 {
 	uint64_t *buf64 = (void*)&dnp->dn_blkptr;
 	int i;
 
 	if (dnp->dn_type == DMU_OT_NONE) {
 		bzero(dnp, sizeof (dnode_phys_t));
 		return;
 	}
 
 	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
 	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
 	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
 	dnp->dn_used = BSWAP_64(dnp->dn_used);
 
 	/*
 	 * dn_nblkptr is only one byte, so it's OK to read it in either
 	 * byte order.  We can't read dn_bouslen.
 	 */
 	ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT);
 	ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR);
 	for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
 		buf64[i] = BSWAP_64(buf64[i]);
 
 	/*
 	 * OK to check dn_bonuslen for zero, because it won't matter if
 	 * we have the wrong byte order.  This is necessary because the
 	 * dnode dnode is smaller than a regular dnode.
 	 */
 	if (dnp->dn_bonuslen != 0) {
 		/*
 		 * Note that the bonus length calculated here may be
 		 * longer than the actual bonus buffer.  This is because
 		 * we always put the bonus buffer after the last block
 		 * pointer (instead of packing it against the end of the
 		 * dnode buffer).
 		 */
 		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
 		size_t len = DN_MAX_BONUSLEN - off;
 		ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(dnp->dn_bonustype);
 		dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
 	}
 
 	/* Swap SPILL block if we have one */
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
 		byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
 
 }
 
 void
 dnode_buf_byteswap(void *vbuf, size_t size)
 {
 	dnode_phys_t *buf = vbuf;
 	int i;
 
 	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
 	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
 
 	size >>= DNODE_SHIFT;
 	for (i = 0; i < size; i++) {
 		dnode_byteswap(buf);
 		buf++;
 	}
 }
 
 void
 dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 
 	dnode_setdirty(dn, tx);
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	ASSERT3U(newsize, <=, DN_MAX_BONUSLEN -
 	    (dn->dn_nblkptr-1) * sizeof (blkptr_t));
 	dn->dn_bonuslen = newsize;
 	if (newsize == 0)
 		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;
 	else
 		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 void
 dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 	dnode_setdirty(dn, tx);
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	dn->dn_bonustype = newtype;
 	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 void
 dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 	dnode_setdirty(dn, tx);
 	dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
 	dn->dn_have_spill = B_FALSE;
 }
 
 static void
 dnode_setdblksz(dnode_t *dn, int size)
 {
 	ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
 	ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
 	    1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
 	dn->dn_datablksz = size;
 	dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
 	dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
 }
 
 static dnode_t *
 dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
     uint64_t object, dnode_handle_t *dnh)
 {
 	dnode_t *dn;
 
 	dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+#ifdef _KERNEL
 	ASSERT(!POINTER_IS_VALID(dn->dn_objset));
+#endif /* _KERNEL */
 	dn->dn_moved = 0;
 
 	/*
 	 * Defer setting dn_objset until the dnode is ready to be a candidate
 	 * for the dnode_move() callback.
 	 */
 	dn->dn_object = object;
 	dn->dn_dbuf = db;
 	dn->dn_handle = dnh;
 	dn->dn_phys = dnp;
 
 	if (dnp->dn_datablkszsec) {
 		dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 	} else {
 		dn->dn_datablksz = 0;
 		dn->dn_datablkszsec = 0;
 		dn->dn_datablkshift = 0;
 	}
 	dn->dn_indblkshift = dnp->dn_indblkshift;
 	dn->dn_nlevels = dnp->dn_nlevels;
 	dn->dn_type = dnp->dn_type;
 	dn->dn_nblkptr = dnp->dn_nblkptr;
 	dn->dn_checksum = dnp->dn_checksum;
 	dn->dn_compress = dnp->dn_compress;
 	dn->dn_bonustype = dnp->dn_bonustype;
 	dn->dn_bonuslen = dnp->dn_bonuslen;
 	dn->dn_maxblkid = dnp->dn_maxblkid;
 	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
 	dn->dn_id_flags = 0;
 
 	dmu_zfetch_init(&dn->dn_zfetch, dn);
 
 	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	mutex_enter(&os->os_lock);
 	if (dnh->dnh_dnode != NULL) {
 		/* Lost the allocation race. */
 		mutex_exit(&os->os_lock);
 		kmem_cache_free(dnode_cache, dn);
 		return (dnh->dnh_dnode);
 	}
 
 	/*
 	 * Exclude special dnodes from os_dnodes so an empty os_dnodes
 	 * signifies that the special dnodes have no references from
 	 * their children (the entries in os_dnodes).  This allows
 	 * dnode_destroy() to easily determine if the last child has
 	 * been removed and then complete eviction of the objset.
 	 */
 	if (!DMU_OBJECT_IS_SPECIAL(object))
 		list_insert_head(&os->os_dnodes, dn);
 	membar_producer();
 
 	/*
 	 * Everything else must be valid before assigning dn_objset
 	 * makes the dnode eligible for dnode_move().
 	 */
 	dn->dn_objset = os;
 
 	dnh->dnh_dnode = dn;
 	mutex_exit(&os->os_lock);
 
 	arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
 	return (dn);
 }
 
 /*
  * Caller must be holding the dnode handle, which is released upon return.
  */
 static void
 dnode_destroy(dnode_t *dn)
 {
 	objset_t *os = dn->dn_objset;
 	boolean_t complete_os_eviction = B_FALSE;
 
 	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
 
 	mutex_enter(&os->os_lock);
 	POINTER_INVALIDATE(&dn->dn_objset);
 	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 		list_remove(&os->os_dnodes, dn);
 		complete_os_eviction =
 		    list_is_empty(&os->os_dnodes) &&
 		    list_link_active(&os->os_evicting_node);
 	}
 	mutex_exit(&os->os_lock);
 
 	/* the dnode can no longer move, so we can release the handle */
 	zrl_remove(&dn->dn_handle->dnh_zrlock);
 
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
 
 	dn->dn_dirtyctx = 0;
 	if (dn->dn_dirtyctx_firstset != NULL) {
 		kmem_free(dn->dn_dirtyctx_firstset, 1);
 		dn->dn_dirtyctx_firstset = NULL;
 	}
 	if (dn->dn_bonus != NULL) {
 		mutex_enter(&dn->dn_bonus->db_mtx);
 		dbuf_destroy(dn->dn_bonus);
 		dn->dn_bonus = NULL;
 	}
 	dn->dn_zio = NULL;
 
 	dn->dn_have_spill = B_FALSE;
 	dn->dn_oldused = 0;
 	dn->dn_oldflags = 0;
 	dn->dn_olduid = 0;
 	dn->dn_oldgid = 0;
 	dn->dn_newuid = 0;
 	dn->dn_newgid = 0;
 	dn->dn_id_flags = 0;
 
 	dmu_zfetch_fini(&dn->dn_zfetch);
 	kmem_cache_free(dnode_cache, dn);
 	arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
 
 	if (complete_os_eviction)
 		dmu_objset_evict_done(os);
 }
 
 void
 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	int i;
 
 	ASSERT3U(blocksize, <=,
 	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	if (blocksize == 0)
 		blocksize = 1 << zfs_default_bs;
 	else
 		blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
 
 	if (ibs == 0)
 		ibs = zfs_default_ibs;
 
 	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
 
 	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
 	    dn->dn_object, tx->tx_txg, blocksize, ibs);
 
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
 	ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
 	ASSERT(ot != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(ot));
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT0(dn->dn_maxblkid);
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 	ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
 	ASSERT(avl_is_empty(&dn->dn_dbufs));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		ASSERT0(dn->dn_next_nblkptr[i]);
 		ASSERT0(dn->dn_next_nlevels[i]);
 		ASSERT0(dn->dn_next_indblkshift[i]);
 		ASSERT0(dn->dn_next_bonuslen[i]);
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
 		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 	}
 
 	dn->dn_type = ot;
 	dnode_setdblksz(dn, blocksize);
 	dn->dn_indblkshift = ibs;
 	dn->dn_nlevels = 1;
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		dn->dn_nblkptr = 1;
 	else
 		dn->dn_nblkptr = 1 +
 		    ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
 	dn->dn_compress = ZIO_COMPRESS_INHERIT;
 	dn->dn_dirtyctx = 0;
 
 	dn->dn_free_txg = 0;
 	if (dn->dn_dirtyctx_firstset) {
 		kmem_free(dn->dn_dirtyctx_firstset, 1);
 		dn->dn_dirtyctx_firstset = NULL;
 	}
 
 	dn->dn_allocated_txg = tx->tx_txg;
 	dn->dn_id_flags = 0;
 
 	dnode_setdirty(dn, tx);
 	dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
 	dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
 	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
 	dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
 }
 
 void
 dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	int nblkptr;
 
 	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
 	ASSERT3U(blocksize, <=,
 	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	ASSERT0(blocksize % SPA_MINBLOCKSIZE);
 	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 	ASSERT(tx->tx_txg != 0);
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 
 	/* clean up any unreferenced dbufs */
 	dnode_evict_dbufs(dn);
 
 	dn->dn_id_flags = 0;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	dnode_setdirty(dn, tx);
 	if (dn->dn_datablksz != blocksize) {
 		/* change blocksize */
 		ASSERT(dn->dn_maxblkid == 0 &&
 		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
 		    dnode_block_freed(dn, 0)));
 		dnode_setdblksz(dn, blocksize);
 		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
 	}
 	if (dn->dn_bonuslen != bonuslen)
 		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
 
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		nblkptr = 1;
 	else
 		nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	if (dn->dn_bonustype != bonustype)
 		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
 	if (dn->dn_nblkptr != nblkptr)
 		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
 	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		dbuf_rm_spill(dn, tx);
 		dnode_rm_spill(dn, tx);
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/* change type */
 	dn->dn_type = ot;
 
 	/* change bonus size and type */
 	mutex_enter(&dn->dn_mtx);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_nblkptr = nblkptr;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
 	dn->dn_compress = ZIO_COMPRESS_INHERIT;
 	ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
 
 	/* fix up the bonus db_size */
 	if (dn->dn_bonus) {
 		dn->dn_bonus->db.db_size =
 		    DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
 		ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
 	}
 
 	dn->dn_allocated_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 }
 
 #ifdef	DNODE_STATS
 static struct {
 	uint64_t dms_dnode_invalid;
 	uint64_t dms_dnode_recheck1;
 	uint64_t dms_dnode_recheck2;
 	uint64_t dms_dnode_special;
 	uint64_t dms_dnode_handle;
 	uint64_t dms_dnode_rwlock;
 	uint64_t dms_dnode_active;
 } dnode_move_stats;
 #endif	/* DNODE_STATS */
 
+#ifdef	_KERNEL
 static void
 dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 {
 	int i;
 
 	ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
 	ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
 
 	/* Copy fields. */
 	ndn->dn_objset = odn->dn_objset;
 	ndn->dn_object = odn->dn_object;
 	ndn->dn_dbuf = odn->dn_dbuf;
 	ndn->dn_handle = odn->dn_handle;
 	ndn->dn_phys = odn->dn_phys;
 	ndn->dn_type = odn->dn_type;
 	ndn->dn_bonuslen = odn->dn_bonuslen;
 	ndn->dn_bonustype = odn->dn_bonustype;
 	ndn->dn_nblkptr = odn->dn_nblkptr;
 	ndn->dn_checksum = odn->dn_checksum;
 	ndn->dn_compress = odn->dn_compress;
 	ndn->dn_nlevels = odn->dn_nlevels;
 	ndn->dn_indblkshift = odn->dn_indblkshift;
 	ndn->dn_datablkshift = odn->dn_datablkshift;
 	ndn->dn_datablkszsec = odn->dn_datablkszsec;
 	ndn->dn_datablksz = odn->dn_datablksz;
 	ndn->dn_maxblkid = odn->dn_maxblkid;
 	bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
 	    sizeof (odn->dn_next_nblkptr));
 	bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
 	    sizeof (odn->dn_next_nlevels));
 	bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0],
 	    sizeof (odn->dn_next_indblkshift));
 	bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0],
 	    sizeof (odn->dn_next_bonustype));
 	bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0],
 	    sizeof (odn->dn_rm_spillblk));
 	bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0],
 	    sizeof (odn->dn_next_bonuslen));
 	bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
 	    sizeof (odn->dn_next_blksz));
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_move_tail(&ndn->dn_dirty_records[i],
 		    &odn->dn_dirty_records[i]);
 	}
 	bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
 	    sizeof (odn->dn_free_ranges));
 	ndn->dn_allocated_txg = odn->dn_allocated_txg;
 	ndn->dn_free_txg = odn->dn_free_txg;
 	ndn->dn_assigned_txg = odn->dn_assigned_txg;
 	ndn->dn_dirtyctx = odn->dn_dirtyctx;
 	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
 	ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
 	refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
 	ASSERT(avl_is_empty(&ndn->dn_dbufs));
 	avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
 	ndn->dn_dbufs_count = odn->dn_dbufs_count;
 	ndn->dn_bonus = odn->dn_bonus;
 	ndn->dn_have_spill = odn->dn_have_spill;
 	ndn->dn_zio = odn->dn_zio;
 	ndn->dn_oldused = odn->dn_oldused;
 	ndn->dn_oldflags = odn->dn_oldflags;
 	ndn->dn_olduid = odn->dn_olduid;
 	ndn->dn_oldgid = odn->dn_oldgid;
 	ndn->dn_newuid = odn->dn_newuid;
 	ndn->dn_newgid = odn->dn_newgid;
 	ndn->dn_id_flags = odn->dn_id_flags;
 	dmu_zfetch_init(&ndn->dn_zfetch, NULL);
 	list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
 	ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
 
 	/*
 	 * Update back pointers. Updating the handle fixes the back pointer of
 	 * every descendant dbuf as well as the bonus dbuf.
 	 */
 	ASSERT(ndn->dn_handle->dnh_dnode == odn);
 	ndn->dn_handle->dnh_dnode = ndn;
 	if (ndn->dn_zfetch.zf_dnode == odn) {
 		ndn->dn_zfetch.zf_dnode = ndn;
 	}
 
 	/*
 	 * Invalidate the original dnode by clearing all of its back pointers.
 	 */
 	odn->dn_dbuf = NULL;
 	odn->dn_handle = NULL;
 	avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 	odn->dn_dbufs_count = 0;
 	odn->dn_bonus = NULL;
 	odn->dn_zfetch.zf_dnode = NULL;
 
 	/*
 	 * Set the low bit of the objset pointer to ensure that dnode_move()
 	 * recognizes the dnode as invalid in any subsequent callback.
 	 */
 	POINTER_INVALIDATE(&odn->dn_objset);
 
 	/*
 	 * Satisfy the destructor.
 	 */
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_create(&odn->dn_dirty_records[i],
 		    sizeof (dbuf_dirty_record_t),
 		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
 		odn->dn_free_ranges[i] = NULL;
 		odn->dn_next_nlevels[i] = 0;
 		odn->dn_next_indblkshift[i] = 0;
 		odn->dn_next_bonustype[i] = 0;
 		odn->dn_rm_spillblk[i] = 0;
 		odn->dn_next_bonuslen[i] = 0;
 		odn->dn_next_blksz[i] = 0;
 	}
 	odn->dn_allocated_txg = 0;
 	odn->dn_free_txg = 0;
 	odn->dn_assigned_txg = 0;
 	odn->dn_dirtyctx = 0;
 	odn->dn_dirtyctx_firstset = NULL;
 	odn->dn_have_spill = B_FALSE;
 	odn->dn_zio = NULL;
 	odn->dn_oldused = 0;
 	odn->dn_oldflags = 0;
 	odn->dn_olduid = 0;
 	odn->dn_oldgid = 0;
 	odn->dn_newuid = 0;
 	odn->dn_newgid = 0;
 	odn->dn_id_flags = 0;
 
 	/*
 	 * Mark the dnode.
 	 */
 	ndn->dn_moved = 1;
 	odn->dn_moved = (uint8_t)-1;
 }
 
 #ifdef illumos
-#ifdef	_KERNEL
 /*ARGSUSED*/
 static kmem_cbrc_t
 dnode_move(void *buf, void *newbuf, size_t size, void *arg)
 {
 	dnode_t *odn = buf, *ndn = newbuf;
 	objset_t *os;
 	int64_t refcount;
 	uint32_t dbufs;
 
 	/*
 	 * The dnode is on the objset's list of known dnodes if the objset
 	 * pointer is valid. We set the low bit of the objset pointer when
 	 * freeing the dnode to invalidate it, and the memory patterns written
 	 * by kmem (baddcafe and deadbeef) set at least one of the two low bits.
 	 * A newly created dnode sets the objset pointer last of all to indicate
 	 * that the dnode is known and in a valid state to be moved by this
 	 * function.
 	 */
 	os = odn->dn_objset;
 	if (!POINTER_IS_VALID(os)) {
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * Ensure that the objset does not go away during the move.
 	 */
 	rw_enter(&os_lock, RW_WRITER);
 	if (os != odn->dn_objset) {
 		rw_exit(&os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * If the dnode is still valid, then so is the objset. We know that no
 	 * valid objset can be freed while we hold os_lock, so we can safely
 	 * ensure that the objset remains in use.
 	 */
 	mutex_enter(&os->os_lock);
 
 	/*
 	 * Recheck the objset pointer in case the dnode was removed just before
 	 * acquiring the lock.
 	 */
 	if (os != odn->dn_objset) {
 		mutex_exit(&os->os_lock);
 		rw_exit(&os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * At this point we know that as long as we hold os->os_lock, the dnode
 	 * cannot be freed and fields within the dnode can be safely accessed.
 	 * The objset listing this dnode cannot go away as long as this dnode is
 	 * on its list.
 	 */
 	rw_exit(&os_lock);
 	if (DMU_OBJECT_IS_SPECIAL(odn->dn_object)) {
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special);
 		return (KMEM_CBRC_NO);
 	}
 	ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */
 
 	/*
 	 * Lock the dnode handle to prevent the dnode from obtaining any new
 	 * holds. This also prevents the descendant dbufs and the bonus dbuf
 	 * from accessing the dnode, so that we can discount their holds. The
 	 * handle is safe to access because we know that while the dnode cannot
 	 * go away, neither can its handle. Once we hold dnh_zrlock, we can
 	 * safely move any dnode referenced only by dbufs.
 	 */
 	if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) {
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/*
 	 * Ensure a consistent view of the dnode's holds and the dnode's dbufs.
 	 * We need to guarantee that there is a hold for every dbuf in order to
 	 * determine whether the dnode is actively referenced. Falsely matching
 	 * a dbuf to an active hold would lead to an unsafe move. It's possible
 	 * that a thread already having an active dnode hold is about to add a
 	 * dbuf, and we can't compare hold and dbuf counts while the add is in
 	 * progress.
 	 */
 	if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) {
 		zrl_exit(&odn->dn_handle->dnh_zrlock);
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/*
 	 * A dbuf may be removed (evicted) without an active dnode hold. In that
 	 * case, the dbuf count is decremented under the handle lock before the
 	 * dbuf's hold is released. This order ensures that if we count the hold
 	 * after the dbuf is removed but before its hold is released, we will
 	 * treat the unmatched hold as active and exit safely. If we count the
 	 * hold before the dbuf is removed, the hold is discounted, and the
 	 * removal is blocked until the move completes.
 	 */
 	refcount = refcount_count(&odn->dn_holds);
 	ASSERT(refcount >= 0);
 	dbufs = odn->dn_dbufs_count;
 
 	/* We can't have more dbufs than dnode holds. */
 	ASSERT3U(dbufs, <=, refcount);
 	DTRACE_PROBE3(dnode__move, dnode_t *, odn, int64_t, refcount,
 	    uint32_t, dbufs);
 
 	if (refcount > dbufs) {
 		rw_exit(&odn->dn_struct_rwlock);
 		zrl_exit(&odn->dn_handle->dnh_zrlock);
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active);
 		return (KMEM_CBRC_LATER);
 	}
 
 	rw_exit(&odn->dn_struct_rwlock);
 
 	/*
 	 * At this point we know that anyone with a hold on the dnode is not
 	 * actively referencing it. The dnode is known and in a valid state to
 	 * move. We're holding the locks needed to execute the critical section.
 	 */
 	dnode_move_impl(odn, ndn);
 
 	list_link_replace(&odn->dn_link, &ndn->dn_link);
 	/* If the dnode was safe to move, the refcount cannot have changed. */
 	ASSERT(refcount == refcount_count(&ndn->dn_holds));
 	ASSERT(dbufs == ndn->dn_dbufs_count);
 	zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
 	mutex_exit(&os->os_lock);
 
 	return (KMEM_CBRC_YES);
 }
-#endif	/* _KERNEL */
 #endif	/* illumos */
+#endif	/* _KERNEL */
 
 void
 dnode_special_close(dnode_handle_t *dnh)
 {
 	dnode_t *dn = dnh->dnh_dnode;
 
 	/*
 	 * Wait for final references to the dnode to clear.  This can
 	 * only happen if the arc is asyncronously evicting state that
 	 * has a hold on this dnode while we are trying to evict this
 	 * dnode.
 	 */
 	while (refcount_count(&dn->dn_holds) > 0)
 		delay(1);
 	ASSERT(dn->dn_dbuf == NULL ||
 	    dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
 	zrl_add(&dnh->dnh_zrlock);
 	dnode_destroy(dn); /* implicit zrl_remove() */
 	zrl_destroy(&dnh->dnh_zrlock);
 	dnh->dnh_dnode = NULL;
 }
 
 void
 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
     dnode_handle_t *dnh)
 {
 	dnode_t *dn;
 
 	dn = dnode_create(os, dnp, NULL, object, dnh);
 	zrl_init(&dnh->dnh_zrlock);
 	DNODE_VERIFY(dn);
 }
 
 static void
 dnode_buf_evict_async(void *dbu)
 {
 	dnode_children_t *children_dnodes = dbu;
 	int i;
 
 	for (i = 0; i < children_dnodes->dnc_count; i++) {
 		dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
 		dnode_t *dn;
 
 		/*
 		 * The dnode handle lock guards against the dnode moving to
 		 * another valid address, so there is no need here to guard
 		 * against changes to or from NULL.
 		 */
 		if (dnh->dnh_dnode == NULL) {
 			zrl_destroy(&dnh->dnh_zrlock);
 			continue;
 		}
 
 		zrl_add(&dnh->dnh_zrlock);
 		dn = dnh->dnh_dnode;
 		/*
 		 * If there are holds on this dnode, then there should
 		 * be holds on the dnode's containing dbuf as well; thus
 		 * it wouldn't be eligible for eviction and this function
 		 * would not have been called.
 		 */
 		ASSERT(refcount_is_zero(&dn->dn_holds));
 		ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 
 		dnode_destroy(dn); /* implicit zrl_remove() */
 		zrl_destroy(&dnh->dnh_zrlock);
 		dnh->dnh_dnode = NULL;
 	}
 	kmem_free(children_dnodes, sizeof (dnode_children_t) +
 	    children_dnodes->dnc_count * sizeof (dnode_handle_t));
 }
 
 /*
  * errors:
  * EINVAL - invalid object number.
  * EIO - i/o error.
  * succeeds even for free dnodes.
  */
 int
 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
     void *tag, dnode_t **dnp)
 {
 	int epb, idx, err;
 	int drop_struct_lock = FALSE;
 	int type;
 	uint64_t blk;
 	dnode_t *mdn, *dn;
 	dmu_buf_impl_t *db;
 	dnode_children_t *children_dnodes;
 	dnode_handle_t *dnh;
 
 	/*
 	 * If you are holding the spa config lock as writer, you shouldn't
 	 * be asking the DMU to do *anything* unless it's the root pool
 	 * which may require us to read from the root filesystem while
 	 * holding some (not all) of the locks as writer.
 	 */
 	ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
 	    (spa_is_root(os->os_spa) &&
 	    spa_config_held(os->os_spa, SCL_STATE, RW_WRITER)));
 
 	if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
 		dn = (object == DMU_USERUSED_OBJECT) ?
 		    DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os);
 		if (dn == NULL)
 			return (SET_ERROR(ENOENT));
 		type = dn->dn_type;
 		if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
 			return (SET_ERROR(ENOENT));
 		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
 			return (SET_ERROR(EEXIST));
 		DNODE_VERIFY(dn);
 		(void) refcount_add(&dn->dn_holds, tag);
 		*dnp = dn;
 		return (0);
 	}
 
 	if (object == 0 || object >= DN_MAX_OBJECT)
 		return (SET_ERROR(EINVAL));
 
 	mdn = DMU_META_DNODE(os);
 	ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
 
 	DNODE_VERIFY(mdn);
 
 	if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) {
 		rw_enter(&mdn->dn_struct_rwlock, RW_READER);
 		drop_struct_lock = TRUE;
 	}
 
 	blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
 
 	db = dbuf_hold(mdn, blk, FTAG);
 	if (drop_struct_lock)
 		rw_exit(&mdn->dn_struct_rwlock);
 	if (db == NULL)
 		return (SET_ERROR(EIO));
 	err = dbuf_read(db, NULL, DB_RF_CANFAIL);
 	if (err) {
 		dbuf_rele(db, FTAG);
 		return (err);
 	}
 
 	ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
 	epb = db->db.db_size >> DNODE_SHIFT;
 
 	idx = object & (epb-1);
 
 	ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
 	children_dnodes = dmu_buf_get_user(&db->db);
 	if (children_dnodes == NULL) {
 		int i;
 		dnode_children_t *winner;
 		children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
 		    epb * sizeof (dnode_handle_t), KM_SLEEP);
 		children_dnodes->dnc_count = epb;
 		dnh = &children_dnodes->dnc_children[0];
 		for (i = 0; i < epb; i++) {
 			zrl_init(&dnh[i].dnh_zrlock);
 		}
 		dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
 		    dnode_buf_evict_async, NULL);
 		winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
 		if (winner != NULL) {
 
 			for (i = 0; i < epb; i++) {
 				zrl_destroy(&dnh[i].dnh_zrlock);
 			}
 
 			kmem_free(children_dnodes, sizeof (dnode_children_t) +
 			    epb * sizeof (dnode_handle_t));
 			children_dnodes = winner;
 		}
 	}
 	ASSERT(children_dnodes->dnc_count == epb);
 
 	dnh = &children_dnodes->dnc_children[idx];
 	zrl_add(&dnh->dnh_zrlock);
 	dn = dnh->dnh_dnode;
 	if (dn == NULL) {
 		dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
 
 		dn = dnode_create(os, phys, db, object, dnh);
 	}
 
 	mutex_enter(&dn->dn_mtx);
 	type = dn->dn_type;
 	if (dn->dn_free_txg ||
 	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
 	    ((flag & DNODE_MUST_BE_FREE) &&
 	    (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
 		mutex_exit(&dn->dn_mtx);
 		zrl_remove(&dnh->dnh_zrlock);
 		dbuf_rele(db, FTAG);
 		return (type == DMU_OT_NONE ? ENOENT : EEXIST);
 	}
 	if (refcount_add(&dn->dn_holds, tag) == 1)
 		dbuf_add_ref(db, dnh);
 	mutex_exit(&dn->dn_mtx);
 
 	/* Now we can rely on the hold to prevent the dnode from moving. */
 	zrl_remove(&dnh->dnh_zrlock);
 
 	DNODE_VERIFY(dn);
 	ASSERT3P(dn->dn_dbuf, ==, db);
 	ASSERT3U(dn->dn_object, ==, object);
 	dbuf_rele(db, FTAG);
 
 	*dnp = dn;
 	return (0);
 }
 
 /*
  * Return held dnode if the object is allocated, NULL if not.
  */
 int
 dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
 {
 	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
 }
 
 /*
  * Can only add a reference if there is already at least one
  * reference on the dnode.  Returns FALSE if unable to add a
  * new reference.
  */
 boolean_t
 dnode_add_ref(dnode_t *dn, void *tag)
 {
 	mutex_enter(&dn->dn_mtx);
 	if (refcount_is_zero(&dn->dn_holds)) {
 		mutex_exit(&dn->dn_mtx);
 		return (FALSE);
 	}
 	VERIFY(1 < refcount_add(&dn->dn_holds, tag));
 	mutex_exit(&dn->dn_mtx);
 	return (TRUE);
 }
 
 void
 dnode_rele(dnode_t *dn, void *tag)
 {
 	mutex_enter(&dn->dn_mtx);
 	dnode_rele_and_unlock(dn, tag);
 }
 
 void
 dnode_rele_and_unlock(dnode_t *dn, void *tag)
 {
 	uint64_t refs;
 	/* Get while the hold prevents the dnode from moving. */
 	dmu_buf_impl_t *db = dn->dn_dbuf;
 	dnode_handle_t *dnh = dn->dn_handle;
 
 	refs = refcount_remove(&dn->dn_holds, tag);
 	mutex_exit(&dn->dn_mtx);
 
 	/*
 	 * It's unsafe to release the last hold on a dnode by dnode_rele() or
 	 * indirectly by dbuf_rele() while relying on the dnode handle to
 	 * prevent the dnode from moving, since releasing the last hold could
 	 * result in the dnode's parent dbuf evicting its dnode handles. For
 	 * that reason anyone calling dnode_rele() or dbuf_rele() without some
 	 * other direct or indirect hold on the dnode must first drop the dnode
 	 * handle.
 	 */
 	ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread);
 
 	/* NOTE: the DNODE_DNODE does not have a dn_dbuf */
 	if (refs == 0 && db != NULL) {
 		/*
 		 * Another thread could add a hold to the dnode handle in
 		 * dnode_hold_impl() while holding the parent dbuf. Since the
 		 * hold on the parent dbuf prevents the handle from being
 		 * destroyed, the hold on the handle is OK. We can't yet assert
 		 * that the handle has zero references, but that will be
 		 * asserted anyway when the handle gets destroyed.
 		 */
 		dbuf_rele(db, dnh);
 	}
 }
 
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
 	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 		dsl_dataset_dirty(os->os_dsl_dataset, tx);
 		return;
 	}
 
 	DNODE_VERIFY(dn);
 
 #ifdef ZFS_DEBUG
 	mutex_enter(&dn->dn_mtx);
 	ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg);
 	ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg);
 	mutex_exit(&dn->dn_mtx);
 #endif
 
 	/*
 	 * Determine old uid/gid when necessary
 	 */
 	dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
 
 	multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
 	multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
 
 	/*
 	 * If we are already marked dirty, we're done.
 	 */
 	if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
 		multilist_sublist_unlock(mls);
 		return;
 	}
 
 	ASSERT(!refcount_is_zero(&dn->dn_holds) ||
 	    !avl_is_empty(&dn->dn_dbufs));
 	ASSERT(dn->dn_datablksz != 0);
 	ASSERT0(dn->dn_next_bonuslen[txg&TXG_MASK]);
 	ASSERT0(dn->dn_next_blksz[txg&TXG_MASK]);
 	ASSERT0(dn->dn_next_bonustype[txg&TXG_MASK]);
 
 	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
 	    dn->dn_object, txg);
 
 	multilist_sublist_insert_head(mls, dn);
 
 	multilist_sublist_unlock(mls);
 
 	/*
 	 * The dnode maintains a hold on its containing dbuf as
 	 * long as there are holds on it.  Each instantiated child
 	 * dbuf maintains a hold on the dnode.  When the last child
 	 * drops its hold, the dnode will drop its hold on the
 	 * containing dbuf. We add a "dirty hold" here so that the
 	 * dnode will hang around after we finish processing its
 	 * children.
 	 */
 	VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
 
 	(void) dbuf_dirty(dn->dn_dbuf, tx);
 
 	dsl_dataset_dirty(os->os_dsl_dataset, tx);
 }
 
 void
 dnode_free(dnode_t *dn, dmu_tx_t *tx)
 {
 	mutex_enter(&dn->dn_mtx);
 	if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
 		mutex_exit(&dn->dn_mtx);
 		return;
 	}
 	dn->dn_free_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 
 	dnode_setdirty(dn, tx);
 }
 
 /*
  * Try to change the block size for the indicated dnode.  This can only
  * succeed if there are no blocks allocated or dirty beyond first block
  */
 int
 dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
 	int err;
 
 	ASSERT3U(size, <=, spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	if (size == 0)
 		size = SPA_MINBLOCKSIZE;
 	else
 		size = P2ROUNDUP(size, SPA_MINBLOCKSIZE);
 
 	if (ibs == dn->dn_indblkshift)
 		ibs = 0;
 
 	if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
 		return (0);
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 
 	/* Check for any allocated blocks beyond the first */
 	if (dn->dn_maxblkid != 0)
 		goto fail;
 
 	mutex_enter(&dn->dn_dbufs_mtx);
 	for (db = avl_first(&dn->dn_dbufs); db != NULL;
 	    db = AVL_NEXT(&dn->dn_dbufs, db)) {
 		if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
 		    db->db_blkid != DMU_SPILL_BLKID) {
 			mutex_exit(&dn->dn_dbufs_mtx);
 			goto fail;
 		}
 	}
 	mutex_exit(&dn->dn_dbufs_mtx);
 
 	if (ibs && dn->dn_nlevels != 1)
 		goto fail;
 
 	/* resize the old block */
 	err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
 	if (err == 0)
 		dbuf_new_size(db, size, tx);
 	else if (err != ENOENT)
 		goto fail;
 
 	dnode_setdblksz(dn, size);
 	dnode_setdirty(dn, tx);
 	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
 	if (ibs) {
 		dn->dn_indblkshift = ibs;
 		dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
 	}
 	/* rele after we have fixed the blocksize in the dnode */
 	if (db)
 		dbuf_rele(db, FTAG);
 
 	rw_exit(&dn->dn_struct_rwlock);
 	return (0);
 
 fail:
 	rw_exit(&dn->dn_struct_rwlock);
 	return (SET_ERROR(ENOTSUP));
 }
 
 /* read-holding callers must not rely on the lock being continuously held */
 void
 dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
 {
 	uint64_t txgoff = tx->tx_txg & TXG_MASK;
 	int epbs, new_nlevels;
 	uint64_t sz;
 
 	ASSERT(blkid != DMU_BONUS_BLKID);
 
 	ASSERT(have_read ?
 	    RW_READ_HELD(&dn->dn_struct_rwlock) :
 	    RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
 	/*
 	 * if we have a read-lock, check to see if we need to do any work
 	 * before upgrading to a write-lock.
 	 */
 	if (have_read) {
 		if (blkid <= dn->dn_maxblkid)
 			return;
 
 		if (!rw_tryupgrade(&dn->dn_struct_rwlock)) {
 			rw_exit(&dn->dn_struct_rwlock);
 			rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 		}
 	}
 
 	if (blkid <= dn->dn_maxblkid)
 		goto out;
 
 	dn->dn_maxblkid = blkid;
 
 	/*
 	 * Compute the number of levels necessary to support the new maxblkid.
 	 */
 	new_nlevels = 1;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	for (sz = dn->dn_nblkptr;
 	    sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
 		new_nlevels++;
 
 	if (new_nlevels > dn->dn_nlevels) {
 		int old_nlevels = dn->dn_nlevels;
 		dmu_buf_impl_t *db;
 		list_t *list;
 		dbuf_dirty_record_t *new, *dr, *dr_next;
 
 		dn->dn_nlevels = new_nlevels;
 
 		ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
 		dn->dn_next_nlevels[txgoff] = new_nlevels;
 
 		/* dirty the left indirects */
 		db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
 		ASSERT(db != NULL);
 		new = dbuf_dirty(db, tx);
 		dbuf_rele(db, FTAG);
 
 		/* transfer the dirty records to the new indirect */
 		mutex_enter(&dn->dn_mtx);
 		mutex_enter(&new->dt.di.dr_mtx);
 		list = &dn->dn_dirty_records[txgoff];
 		for (dr = list_head(list); dr; dr = dr_next) {
 			dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
 			if (dr->dr_dbuf->db_level != new_nlevels-1 &&
 			    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
 			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
 				ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
 				list_remove(&dn->dn_dirty_records[txgoff], dr);
 				list_insert_tail(&new->dt.di.dr_children, dr);
 				dr->dr_parent = new;
 			}
 		}
 		mutex_exit(&new->dt.di.dr_mtx);
 		mutex_exit(&dn->dn_mtx);
 	}
 
 out:
 	if (have_read)
 		rw_downgrade(&dn->dn_struct_rwlock);
 }
 
 static void
 dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG);
 	if (db != NULL) {
 		dmu_buf_will_dirty(&db->db, tx);
 		dbuf_rele(db, FTAG);
 	}
 }
 
 void
 dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
 	uint64_t blkoff, blkid, nblks;
 	int blksz, blkshift, head, tail;
 	int trunc = FALSE;
 	int epbs;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	blksz = dn->dn_datablksz;
 	blkshift = dn->dn_datablkshift;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
 	if (len == DMU_OBJECT_END) {
 		len = UINT64_MAX - off;
 		trunc = TRUE;
 	}
 
 	/*
 	 * First, block align the region to free:
 	 */
 	if (ISP2(blksz)) {
 		head = P2NPHASE(off, blksz);
 		blkoff = P2PHASE(off, blksz);
 		if ((off >> blkshift) > dn->dn_maxblkid)
 			goto out;
 	} else {
 		ASSERT(dn->dn_maxblkid == 0);
 		if (off == 0 && len >= blksz) {
 			/*
 			 * Freeing the whole block; fast-track this request.
 			 * Note that we won't dirty any indirect blocks,
 			 * which is fine because we will be freeing the entire
 			 * file and thus all indirect blocks will be freed
 			 * by free_children().
 			 */
 			blkid = 0;
 			nblks = 1;
 			goto done;
 		} else if (off >= blksz) {
 			/* Freeing past end-of-data */
 			goto out;
 		} else {
 			/* Freeing part of the block. */
 			head = blksz - off;
 			ASSERT3U(head, >, 0);
 		}
 		blkoff = off;
 	}
 	/* zero out any partial block data at the start of the range */
 	if (head) {
 		ASSERT3U(blkoff + head, ==, blksz);
 		if (len < head)
 			head = len;
 		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
 		    TRUE, FALSE, FTAG, &db) == 0) {
 			caddr_t data;
 
 			/* don't dirty if it isn't on disk and isn't dirty */
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
 				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				data = db->db.db_data;
 				bzero(data + blkoff, head);
 			}
 			dbuf_rele(db, FTAG);
 		}
 		off += head;
 		len -= head;
 	}
 
 	/* If the range was less than one block, we're done */
 	if (len == 0)
 		goto out;
 
 	/* If the remaining range is past end of file, we're done */
 	if ((off >> blkshift) > dn->dn_maxblkid)
 		goto out;
 
 	ASSERT(ISP2(blksz));
 	if (trunc)
 		tail = 0;
 	else
 		tail = P2PHASE(len, blksz);
 
 	ASSERT0(P2PHASE(off, blksz));
 	/* zero out any partial block data at the end of the range */
 	if (tail) {
 		if (len < tail)
 			tail = len;
 		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
 		    TRUE, FALSE, FTAG, &db) == 0) {
 			/* don't dirty if not on disk and not dirty */
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
 				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				bzero(db->db.db_data, tail);
 			}
 			dbuf_rele(db, FTAG);
 		}
 		len -= tail;
 	}
 
 	/* If the range did not include a full block, we are done */
 	if (len == 0)
 		goto out;
 
 	ASSERT(IS_P2ALIGNED(off, blksz));
 	ASSERT(trunc || IS_P2ALIGNED(len, blksz));
 	blkid = off >> blkshift;
 	nblks = len >> blkshift;
 	if (trunc)
 		nblks += 1;
 
 	/*
 	 * Dirty all the indirect blocks in this range.  Note that only
 	 * the first and last indirect blocks can actually be written
 	 * (if they were partially freed) -- they must be dirtied, even if
 	 * they do not exist on disk yet.  The interior blocks will
 	 * be freed by free_children(), so they will not actually be written.
 	 * Even though these interior blocks will not be written, we
 	 * dirty them for two reasons:
 	 *
 	 *  - It ensures that the indirect blocks remain in memory until
 	 *    syncing context.  (They have already been prefetched by
 	 *    dmu_tx_hold_free(), so we don't have to worry about reading
 	 *    them serially here.)
 	 *
 	 *  - The dirty space accounting will put pressure on the txg sync
 	 *    mechanism to begin syncing, and to delay transactions if there
 	 *    is a large amount of freeing.  Even though these indirect
 	 *    blocks will not be written, we could need to write the same
 	 *    amount of space if we copy the freed BPs into deadlists.
 	 */
 	if (dn->dn_nlevels > 1) {
 		uint64_t first, last;
 
 		first = blkid >> epbs;
 		dnode_dirty_l1(dn, first, tx);
 		if (trunc)
 			last = dn->dn_maxblkid >> epbs;
 		else
 			last = (blkid + nblks - 1) >> epbs;
 		if (last != first)
 			dnode_dirty_l1(dn, last, tx);
 
 		int shift = dn->dn_datablkshift + dn->dn_indblkshift -
 		    SPA_BLKPTRSHIFT;
 		for (uint64_t i = first + 1; i < last; i++) {
 			/*
 			 * Set i to the blockid of the next non-hole
 			 * level-1 indirect block at or after i.  Note
 			 * that dnode_next_offset() operates in terms of
 			 * level-0-equivalent bytes.
 			 */
 			uint64_t ibyte = i << shift;
 			int err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
 			    &ibyte, 2, 1, 0);
 			i = ibyte >> shift;
 			if (i >= last)
 				break;
 
 			/*
 			 * Normally we should not see an error, either
 			 * from dnode_next_offset() or dbuf_hold_level()
 			 * (except for ESRCH from dnode_next_offset).
 			 * If there is an i/o error, then when we read
 			 * this block in syncing context, it will use
 			 * ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according
 			 * to the "failmode" property.  dnode_next_offset()
 			 * doesn't have a flag to indicate MUSTSUCCEED.
 			 */
 			if (err != 0)
 				break;
 
 			dnode_dirty_l1(dn, i, tx);
 		}
 	}
 
 done:
 	/*
 	 * Add this range to the dnode range list.
 	 * We will finish up this free operation in the syncing phase.
 	 */
 	mutex_enter(&dn->dn_mtx);
 	int txgoff = tx->tx_txg & TXG_MASK;
 	if (dn->dn_free_ranges[txgoff] == NULL) {
 		dn->dn_free_ranges[txgoff] = range_tree_create(NULL, NULL);
 	}
 	range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
 	range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
 	dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
 	    blkid, nblks, tx->tx_txg);
 	mutex_exit(&dn->dn_mtx);
 
 	dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
 	dnode_setdirty(dn, tx);
 out:
 
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 static boolean_t
 dnode_spill_freed(dnode_t *dn)
 {
 	int i;
 
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {
 		if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
 			break;
 	}
 	mutex_exit(&dn->dn_mtx);
 	return (i < TXG_SIZE);
 }
 
 /* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
 uint64_t
 dnode_block_freed(dnode_t *dn, uint64_t blkid)
 {
 	void *dp = spa_get_dsl(dn->dn_objset->os_spa);
 	int i;
 
 	if (blkid == DMU_BONUS_BLKID)
 		return (FALSE);
 
 	/*
 	 * If we're in the process of opening the pool, dp will not be
 	 * set yet, but there shouldn't be anything dirty.
 	 */
 	if (dp == NULL)
 		return (FALSE);
 
 	if (dn->dn_free_txg)
 		return (TRUE);
 
 	if (blkid == DMU_SPILL_BLKID)
 		return (dnode_spill_freed(dn));
 
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {
 		if (dn->dn_free_ranges[i] != NULL &&
 		    range_tree_contains(dn->dn_free_ranges[i], blkid, 1))
 			break;
 	}
 	mutex_exit(&dn->dn_mtx);
 	return (i < TXG_SIZE);
 }
 
 /* call from syncing context when we actually write/free space for this dnode */
 void
 dnode_diduse_space(dnode_t *dn, int64_t delta)
 {
 	uint64_t space;
 	dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
 	    dn, dn->dn_phys,
 	    (u_longlong_t)dn->dn_phys->dn_used,
 	    (longlong_t)delta);
 
 	mutex_enter(&dn->dn_mtx);
 	space = DN_USED_BYTES(dn->dn_phys);
 	if (delta > 0) {
 		ASSERT3U(space + delta, >=, space); /* no overflow */
 	} else {
 		ASSERT3U(space, >=, -delta); /* no underflow */
 	}
 	space += delta;
 	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
 		ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
 		ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT));
 		dn->dn_phys->dn_used = space >> DEV_BSHIFT;
 	} else {
 		dn->dn_phys->dn_used = space;
 		dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
 	}
 	mutex_exit(&dn->dn_mtx);
 }
 
 /*
  * Scans a block at the indicated "level" looking for a hole or data,
  * depending on 'flags'.
  *
  * If level > 0, then we are scanning an indirect block looking at its
  * pointers.  If level == 0, then we are looking at a block of dnodes.
  *
  * If we don't find what we are looking for in the block, we return ESRCH.
  * Otherwise, return with *offset pointing to the beginning (if searching
  * forwards) or end (if searching backwards) of the range covered by the
  * block pointer we matched on (or dnode).
  *
  * The basic search algorithm used below by dnode_next_offset() is to
  * use this function to search up the block tree (widen the search) until
  * we find something (i.e., we don't return ESRCH) and then search back
  * down the tree (narrow the search) until we reach our original search
  * level.
  */
 static int
 dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
     int lvl, uint64_t blkfill, uint64_t txg)
 {
 	dmu_buf_impl_t *db = NULL;
 	void *data = NULL;
 	uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
 	uint64_t epb = 1ULL << epbs;
 	uint64_t minfill, maxfill;
 	boolean_t hole;
 	int i, inc, error, span;
 
 	dprintf("probing object %llu offset %llx level %d of %u\n",
 	    dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
 
 	hole = ((flags & DNODE_FIND_HOLE) != 0);
 	inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
 	ASSERT(txg == 0 || !hole);
 
 	if (lvl == dn->dn_phys->dn_nlevels) {
 		error = 0;
 		epb = dn->dn_phys->dn_nblkptr;
 		data = dn->dn_phys->dn_blkptr;
 	} else {
 		uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
 		error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
 		if (error) {
 			if (error != ENOENT)
 				return (error);
 			if (hole)
 				return (0);
 			/*
 			 * This can only happen when we are searching up
 			 * the block tree for data.  We don't really need to
 			 * adjust the offset, as we will just end up looking
 			 * at the pointer to this block in its parent, and its
 			 * going to be unallocated, so we will skip over it.
 			 */
 			return (SET_ERROR(ESRCH));
 		}
 		error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
 		if (error) {
 			dbuf_rele(db, FTAG);
 			return (error);
 		}
 		data = db->db.db_data;
 	}
 
 
 	if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
 	    db->db_blkptr->blk_birth <= txg ||
 	    BP_IS_HOLE(db->db_blkptr))) {
 		/*
 		 * This can only happen when we are searching up the tree
 		 * and these conditions mean that we need to keep climbing.
 		 */
 		error = SET_ERROR(ESRCH);
 	} else if (lvl == 0) {
 		dnode_phys_t *dnp = data;
 		span = DNODE_SHIFT;
 		ASSERT(dn->dn_type == DMU_OT_DNODE);
 
 		for (i = (*offset >> span) & (blkfill - 1);
 		    i >= 0 && i < blkfill; i += inc) {
 			if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
 				break;
 			*offset += (1ULL << span) * inc;
 		}
 		if (i < 0 || i == blkfill)
 			error = SET_ERROR(ESRCH);
 	} else {
 		blkptr_t *bp = data;
 		uint64_t start = *offset;
 		span = (lvl - 1) * epbs + dn->dn_datablkshift;
 		minfill = 0;
 		maxfill = blkfill << ((lvl - 1) * epbs);
 
 		if (hole)
 			maxfill--;
 		else
 			minfill++;
 
 		*offset = *offset >> span;
 		for (i = BF64_GET(*offset, 0, epbs);
 		    i >= 0 && i < epb; i += inc) {
 			if (BP_GET_FILL(&bp[i]) >= minfill &&
 			    BP_GET_FILL(&bp[i]) <= maxfill &&
 			    (hole || bp[i].blk_birth > txg))
 				break;
 			if (inc > 0 || *offset > 0)
 				*offset += inc;
 		}
 		*offset = *offset << span;
 		if (inc < 0) {
 			/* traversing backwards; position offset at the end */
 			ASSERT3U(*offset, <=, start);
 			*offset = MIN(*offset + (1ULL << span) - 1, start);
 		} else if (*offset < start) {
 			*offset = start;
 		}
 		if (i < 0 || i >= epb)
 			error = SET_ERROR(ESRCH);
 	}
 
 	if (db)
 		dbuf_rele(db, FTAG);
 
 	return (error);
 }
 
 /*
  * Find the next hole, data, or sparse region at or after *offset.
  * The value 'blkfill' tells us how many items we expect to find
  * in an L0 data block; this value is 1 for normal objects,
  * DNODES_PER_BLOCK for the meta dnode, and some fraction of
  * DNODES_PER_BLOCK when searching for sparse regions thereof.
  *
  * Examples:
  *
  * dnode_next_offset(dn, flags, offset, 1, 1, 0);
  *	Finds the next/previous hole/data in a file.
  *	Used in dmu_offset_next().
  *
  * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg);
  *	Finds the next free/allocated dnode an objset's meta-dnode.
  *	Only finds objects that have new contents since txg (ie.
  *	bonus buffer changes and content removal are ignored).
  *	Used in dmu_object_next().
  *
  * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0);
  *	Finds the next L2 meta-dnode bp that's at most 1/4 full.
  *	Used in dmu_object_alloc().
  */
 int
 dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
     int minlvl, uint64_t blkfill, uint64_t txg)
 {
 	uint64_t initial_offset = *offset;
 	int lvl, maxlvl;
 	int error = 0;
 
 	if (!(flags & DNODE_FIND_HAVELOCK))
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
 	if (dn->dn_phys->dn_nlevels == 0) {
 		error = SET_ERROR(ESRCH);
 		goto out;
 	}
 
 	if (dn->dn_datablkshift == 0) {
 		if (*offset < dn->dn_datablksz) {
 			if (flags & DNODE_FIND_HOLE)
 				*offset = dn->dn_datablksz;
 		} else {
 			error = SET_ERROR(ESRCH);
 		}
 		goto out;
 	}
 
 	maxlvl = dn->dn_phys->dn_nlevels;
 
 	for (lvl = minlvl; lvl <= maxlvl; lvl++) {
 		error = dnode_next_offset_level(dn,
 		    flags, offset, lvl, blkfill, txg);
 		if (error != ESRCH)
 			break;
 	}
 
 	while (error == 0 && --lvl >= minlvl) {
 		error = dnode_next_offset_level(dn,
 		    flags, offset, lvl, blkfill, txg);
 	}
 
 	/*
 	 * There's always a "virtual hole" at the end of the object, even
 	 * if all BP's which physically exist are non-holes.
 	 */
 	if ((flags & DNODE_FIND_HOLE) && error == ESRCH && txg == 0 &&
 	    minlvl == 1 && blkfill == 1 && !(flags & DNODE_FIND_BACKWARDS)) {
 		error = 0;
 	}
 
 	if (error == 0 && (flags & DNODE_FIND_BACKWARDS ?
 	    initial_offset < *offset : initial_offset > *offset))
 		error = SET_ERROR(ESRCH);
 out:
 	if (!(flags & DNODE_FIND_HAVELOCK))
 		rw_exit(&dn->dn_struct_rwlock);
 
 	return (error);
 }
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/acl.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/acl.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/acl.h	(revision 332526)
@@ -1,312 +1,313 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  *
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_ACL_H
 #define	_SYS_ACL_H
 
 #include <sys/types.h>
 #include <sys/acl_impl.h>
 
 #if defined(_KERNEL)
 /*
  * When compiling OpenSolaris kernel code, this file is included instead of the
  * FreeBSD one.  Include the original sys/acl.h as well.
  */
 #undef _SYS_ACL_H
 #include_next <sys/acl.h>
 #define	_SYS_ACL_H
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	MAX_ACL_ENTRIES		(1024)	/* max entries of each type */
 typedef struct {
 	int		a_type;		/* the type of ACL entry */
 	uid_t		a_id;		/* the entry in -uid or gid */
 	o_mode_t	a_perm;		/* the permission field */
 } aclent_t;
 
 typedef struct ace {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
 	uint16_t	a_flags;	/* see below */
 	uint16_t	a_type;		/* allow or deny */
 } ace_t;
 
 #ifndef _KERNEL
 typedef struct acl_info acl_t;
 #endif
 
 /*
  * The following are Defined types for an aclent_t.
  */
 #define	USER_OBJ	(0x01)		/* object owner */
 #define	USER		(0x02)		/* additional users */
 #define	GROUP_OBJ	(0x04)		/* owning group of the object */
 #define	GROUP		(0x08)		/* additional groups */
 #define	CLASS_OBJ	(0x10)		/* file group class and mask entry */
 #define	OTHER_OBJ	(0x20)		/* other entry for the object */
 #define	ACL_DEFAULT	(0x1000)	/* default flag */
 /* default object owner */
 #define	DEF_USER_OBJ	(ACL_DEFAULT | USER_OBJ)
 /* default additional users */
 #define	DEF_USER	(ACL_DEFAULT | USER)
 /* default owning group */
 #define	DEF_GROUP_OBJ	(ACL_DEFAULT | GROUP_OBJ)
 /* default additional groups */
 #define	DEF_GROUP	(ACL_DEFAULT | GROUP)
 /* default mask entry */
 #define	DEF_CLASS_OBJ	(ACL_DEFAULT | CLASS_OBJ)
 /* default other entry */
 #define	DEF_OTHER_OBJ	(ACL_DEFAULT | OTHER_OBJ)
 
 /*
  * The following are defined for ace_t.
  */
 #define	ACE_READ_DATA		0x00000001
 #define	ACE_LIST_DIRECTORY	0x00000001
 #define	ACE_WRITE_DATA		0x00000002
 #define	ACE_ADD_FILE		0x00000002
 #define	ACE_APPEND_DATA		0x00000004
 #define	ACE_ADD_SUBDIRECTORY	0x00000004
 #define	ACE_READ_NAMED_ATTRS	0x00000008
 #define	ACE_WRITE_NAMED_ATTRS	0x00000010
 #define	ACE_EXECUTE		0x00000020
 #define	ACE_DELETE_CHILD	0x00000040
 #define	ACE_READ_ATTRIBUTES	0x00000080
 #define	ACE_WRITE_ATTRIBUTES	0x00000100
 #define	ACE_DELETE		0x00010000
 #define	ACE_READ_ACL		0x00020000
 #define	ACE_WRITE_ACL		0x00040000
 #define	ACE_WRITE_OWNER		0x00080000
 #define	ACE_SYNCHRONIZE		0x00100000
 
 #define	ACE_FILE_INHERIT_ACE		0x0001
 #define	ACE_DIRECTORY_INHERIT_ACE	0x0002
 #define	ACE_NO_PROPAGATE_INHERIT_ACE	0x0004
 #define	ACE_INHERIT_ONLY_ACE		0x0008
 #define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG	0x0010
 #define	ACE_FAILED_ACCESS_ACE_FLAG	0x0020
 #define	ACE_IDENTIFIER_GROUP		0x0040
 #define	ACE_INHERITED_ACE		0x0080
 #define	ACE_OWNER			0x1000
 #define	ACE_GROUP			0x2000
 #define	ACE_EVERYONE			0x4000
 
 #define	ACE_ACCESS_ALLOWED_ACE_TYPE	0x0000
 #define	ACE_ACCESS_DENIED_ACE_TYPE	0x0001
 #define	ACE_SYSTEM_AUDIT_ACE_TYPE	0x0002
 #define	ACE_SYSTEM_ALARM_ACE_TYPE	0x0003
 
 #define	ACL_AUTO_INHERIT		0x0001
 #define	ACL_PROTECTED			0x0002
 #define	ACL_DEFAULTED			0x0004
 #define	ACL_FLAGS_ALL			(ACL_AUTO_INHERIT|ACL_PROTECTED| \
     ACL_DEFAULTED)
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 
 /*
  * These are only applicable in a CIFS context.
  */
 #define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
 #define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
 #define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
 #define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
 #define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
 #define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
 #define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
 #define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
 #define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
 #define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
 #define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
 #define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
 #define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
 
 #define	ACE_ALL_TYPES	0x001F
 
 typedef struct ace_object {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
 	uint16_t	a_flags;	/* see below */
 	uint16_t	a_type;		/* allow or deny */
 	uint8_t		a_obj_type[16];	/* obj type */
 	uint8_t		a_inherit_obj_type[16];  /* inherit obj */
 } ace_object_t;
 
 #endif
 
 #define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
 
 #define	ACE_ALL_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA| \
     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS|ACE_WRITE_ACL| \
     ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD)
 
 #define	ACE_READ_PERMS	(ACE_READ_DATA|ACE_READ_ACL|ACE_READ_ATTRIBUTES| \
     ACE_READ_NAMED_ATTRS)
 
 #define	ACE_WRITE_PERMS	(ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES| \
     ACE_WRITE_NAMED_ATTRS)
 
 #define	ACE_MODIFY_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_SYNCHRONIZE)
 /*
  * The following flags are supported by both NFSv4 ACLs and ace_t.
  */
 #define	ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \
     ACE_DIRECTORY_INHERIT_ACE | \
     ACE_NO_PROPAGATE_INHERIT_ACE | \
     ACE_INHERIT_ONLY_ACE | \
     ACE_INHERITED_ACE | \
     ACE_IDENTIFIER_GROUP)
 
 #define	ACE_TYPE_FLAGS		(ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \
     ACE_IDENTIFIER_GROUP)
 #define	ACE_INHERIT_FLAGS	(ACE_FILE_INHERIT_ACE| ACL_INHERITED_ACE| \
     ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
 
 /* cmd args to acl(2) for aclent_t  */
 #define	GETACL			1
 #define	SETACL			2
 #define	GETACLCNT		3
 
 /* cmd's to manipulate ace acls. */
 #define	ACE_GETACL		4
 #define	ACE_SETACL		5
 #define	ACE_GETACLCNT		6
 
 /* minimal acl entries from GETACLCNT */
 #define	MIN_ACL_ENTRIES		4
 
 #if !defined(_KERNEL)
 
 /* acl check errors */
 #define	GRP_ERROR		1
 #define	USER_ERROR		2
 #define	OTHER_ERROR		3
 #define	CLASS_ERROR		4
 #define	DUPLICATE_ERROR		5
 #define	MISS_ERROR		6
 #define	MEM_ERROR		7
 #define	ENTRY_ERROR		8
 
 
 /*
  * similar to ufs_acl.h: changed to char type for user commands (tar, cpio)
  * Attribute types
  */
 #define	UFSD_FREE	('0')	/* Free entry */
 #define	UFSD_ACL	('1')	/* Access Control Lists */
 #define	UFSD_DFACL	('2')	/* reserved for future use */
 #define	ACE_ACL		('3')	/* ace_t style acls */
 
 /*
  * flag to [f]acl_get()
  * controls whether a trivial acl should be returned.
  */
 #define	ACL_NO_TRIVIAL	0x2
 
 
 /*
  * Flags to control acl_totext()
  */
 
 #define	ACL_APPEND_ID	0x1 	/* append uid/gid to user/group entries */
 #define	ACL_COMPACT_FMT	0x2 	/* build ACL in ls -V format */
 #define	ACL_NORESOLVE	0x4	/* don't do name service lookups */
 #define	ACL_SID_FMT	0x8	/* use usersid/groupsid when appropriate */
 
 /*
  * Legacy aclcheck errors for aclent_t ACLs
  */
 #define	EACL_GRP_ERROR		GRP_ERROR
 #define	EACL_USER_ERROR		USER_ERROR
 #define	EACL_OTHER_ERROR	OTHER_ERROR
 #define	EACL_CLASS_ERROR	CLASS_ERROR
 #define	EACL_DUPLICATE_ERROR	DUPLICATE_ERROR
 #define	EACL_MISS_ERROR		MISS_ERROR
 #define	EACL_MEM_ERROR		MEM_ERROR
 #define	EACL_ENTRY_ERROR	ENTRY_ERROR
 
 #define	EACL_INHERIT_ERROR	9		/* invalid inherit flags */
 #define	EACL_FLAGS_ERROR	10		/* unknown flag value */
 #define	EACL_PERM_MASK_ERROR	11		/* unknown permission */
 #define	EACL_COUNT_ERROR	12		/* invalid acl count */
 
 #define	EACL_INVALID_SLOT	13		/* invalid acl slot */
 #define	EACL_NO_ACL_ENTRY	14		/* Entry doesn't exist */
 #define	EACL_DIFF_TYPE		15		/* acls aren't same type */
 
 #define	EACL_INVALID_USER_GROUP	16		/* need user/group name */
 #define	EACL_INVALID_STR	17		/* invalid acl string */
 #define	EACL_FIELD_NOT_BLANK	18		/* can't have blank field */
 #define	EACL_INVALID_ACCESS_TYPE 19		/* invalid access type */
 #define	EACL_UNKNOWN_DATA	20		/* Unrecognized data in ACL */
 #define	EACL_MISSING_FIELDS	21		/* missing fields in acl */
 
 #define	EACL_INHERIT_NOTDIR	22		/* Need dir for inheritance */
 
 extern int aclcheck(aclent_t *, int, int *);
 extern int acltomode(aclent_t *, int, mode_t *);
 extern int aclfrommode(aclent_t *, int, mode_t *);
 extern int aclsort(int, int, aclent_t *);
 extern char *acltotext(aclent_t *, int);
 extern aclent_t *aclfromtext(char *, int *);
 extern void acl_free(acl_t *);
 extern int acl_get(const char *, int, acl_t **);
 extern int facl_get(int, int, acl_t **);
 extern int acl_set(const char *, acl_t *acl);
 extern int facl_set(int, acl_t *acl);
 extern int acl_strip(const char *, uid_t, gid_t, mode_t);
 extern int acl_trivial(const char *);
 extern char *acl_totext(acl_t *, int);
 extern int acl_fromtext(const char *, acl_t **);
 extern int acl_check(acl_t *, int);
 
 #else	/* !defined(_KERNEL) */
 
 extern void ksort(caddr_t, int, int, int (*)(void *, void *));
 extern int cmp2acls(void *, void *);
 
 #endif	/* !defined(_KERNEL) */
 
 extern int acl(const char *path, int cmd, int cnt, void *buf);
 extern int facl(int fd, int cmd, int cnt, void *buf);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_ACL_H */
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/bitmap.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/bitmap.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/bitmap.h	(revision 332526)
@@ -1,197 +1,198 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
 
 #ifndef _SYS_BITMAP_H
 #define	_SYS_BITMAP_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/feature_tests.h>
 #if defined(__GNUC__) && defined(_ASM_INLINES) && \
 	(defined(__i386) || defined(__amd64))
 #include <asm/bitmap.h>
 #endif
 
 /*
  * Operations on bitmaps of arbitrary size
  * A bitmap is a vector of 1 or more ulong_t's.
  * The user of the package is responsible for range checks and keeping
  * track of sizes.
  */
 
 #ifdef _LP64
 #define	BT_ULSHIFT	6 /* log base 2 of BT_NBIPUL, to extract word index */
 #define	BT_ULSHIFT32	5 /* log base 2 of BT_NBIPUL, to extract word index */
 #else
 #define	BT_ULSHIFT	5 /* log base 2 of BT_NBIPUL, to extract word index */
 #endif
 
 #define	BT_NBIPUL	(1 << BT_ULSHIFT)	/* n bits per ulong_t */
 #define	BT_ULMASK	(BT_NBIPUL - 1)		/* to extract bit index */
 
 #ifdef _LP64
 #define	BT_NBIPUL32	(1 << BT_ULSHIFT32)	/* n bits per ulong_t */
 #define	BT_ULMASK32	(BT_NBIPUL32 - 1)	/* to extract bit index */
 #define	BT_ULMAXMASK	0xffffffffffffffff	/* used by bt_getlowbit */
 #else
 #define	BT_ULMAXMASK	0xffffffff
 #endif
 
 /*
  * bitmap is a ulong_t *, bitindex an index_t
  *
  * The macros BT_WIM and BT_BIW internal; there is no need
  * for users of this package to use them.
  */
 
 /*
  * word in map
  */
 #define	BT_WIM(bitmap, bitindex) \
 	((bitmap)[(bitindex) >> BT_ULSHIFT])
 /*
  * bit in word
  */
 #define	BT_BIW(bitindex) \
 	(1UL << ((bitindex) & BT_ULMASK))
 
 #ifdef _LP64
 #define	BT_WIM32(bitmap, bitindex) \
 	((bitmap)[(bitindex) >> BT_ULSHIFT32])
 
 #define	BT_BIW32(bitindex) \
 	(1UL << ((bitindex) & BT_ULMASK32))
 #endif
 
 /*
  * These are public macros
  *
  * BT_BITOUL == n bits to n ulong_t's
  */
 #define	BT_BITOUL(nbits) \
 	(((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
 #define	BT_SIZEOFMAP(nbits) \
 	(BT_BITOUL(nbits) * sizeof (ulong_t))
 #define	BT_TEST(bitmap, bitindex) \
 	((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
 #define	BT_SET(bitmap, bitindex) \
 	{ BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
 #define	BT_CLEAR(bitmap, bitindex) \
 	{ BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
 
 #ifdef _LP64
 #define	BT_BITOUL32(nbits) \
 	(((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
 #define	BT_SIZEOFMAP32(nbits) \
 	(BT_BITOUL32(nbits) * sizeof (uint_t))
 #define	BT_TEST32(bitmap, bitindex) \
 	((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
 #define	BT_SET32(bitmap, bitindex) \
 	{ BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
 #define	BT_CLEAR32(bitmap, bitindex) \
 	{ BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
 #endif /* _LP64 */
 
 
 /*
  * BIT_ONLYONESET is a private macro not designed for bitmaps of
  * arbitrary size.  u must be an unsigned integer/long.  It returns
  * true if one and only one bit is set in u.
  */
 #define	BIT_ONLYONESET(u) \
 	((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
 
-#if defined(_KERNEL) && !defined(_ASM)
+#if (defined(_KERNEL) || defined(_FAKE_KERNEL)) && !defined(_ASM)
 #include <sys/atomic.h>
 
 /*
  * return next available bit index from map with specified number of bits
  */
 extern index_t	bt_availbit(ulong_t *bitmap, size_t nbits);
 /*
  * find the highest order bit that is on, and is within or below
  * the word specified by wx
  */
 extern int	bt_gethighbit(ulong_t *mapp, int wx);
 extern int	bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2,
 			size_t end_pos);
 /*
  * Find highest and lowest one bit set.
  *	Returns bit number + 1 of bit that is set, otherwise returns 0.
  * Low order bit is 0, high order bit is 31.
  */
 extern int	highbit(ulong_t);
 extern int	highbit64(uint64_t);
 extern int	lowbit(ulong_t);
 extern int	bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop);
 extern void	bt_copy(ulong_t *, ulong_t *, ulong_t);
 
 /*
  * find the parity
  */
 extern int	odd_parity(ulong_t);
 
 /*
  * Atomically set/clear bits
  * Atomic exclusive operations will set "result" to "-1"
  * if the bit is already set/cleared. "result" will be set
  * to 0 otherwise.
  */
 #define	BT_ATOMIC_SET(bitmap, bitindex) \
 	{ atomic_or_ulong(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
 #define	BT_ATOMIC_CLEAR(bitmap, bitindex) \
 	{ atomic_and_ulong(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
 
 #define	BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
 	{ result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)),	\
 	    (bitindex) % BT_NBIPUL); }
 #define	BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \
 	{ result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)),	\
 	    (bitindex) % BT_NBIPUL); }
 
 /*
  * Extracts bits between index h (high, inclusive) and l (low, exclusive) from
  * u, which must be an unsigned integer.
  */
 #define	BITX(u, h, l)	(((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
 
-#endif	/* _KERNEL && !_ASM */
+#endif	/* (_KERNEL || _FAKE_KERNEL) && !_ASM */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_BITMAP_H */
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpupart.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpupart.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpupart.h	(revision 332526)
@@ -1,157 +1,158 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef	_SYS_CPUPART_H
 #define	_SYS_CPUPART_H
 
 #include <sys/types.h>
 #include <sys/processor.h>
 #include <sys/cpuvar.h>
 #include <sys/disp.h>
 #include <sys/pset.h>
 #include <sys/lgrp.h>
 #include <sys/lgrp_user.h>
 #include <sys/pg.h>
 #include <sys/bitset.h>
 #include <sys/time.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 
 typedef int	cpupartid_t;
 
 /*
  * Special partition id.
  */
 #define	CP_DEFAULT	0
 
 /*
  * Flags for cpupart_list()
  */
 #define	CP_ALL		0		/* return all cpu partitions */
 #define	CP_NONEMPTY	1		/* return only non-empty ones */
 
 typedef struct cpupart {
 	disp_t		cp_kp_queue;	/* partition-wide kpreempt queue */
 	cpupartid_t	cp_id;		/* partition ID */
 	int		cp_ncpus;	/* number of online processors */
 	struct cpupart	*cp_next;	/* next partition in list */
 	struct cpupart	*cp_prev;	/* previous partition in list */
 	struct cpu	*cp_cpulist;	/* processor list */
 	struct kstat	*cp_kstat;	/* per-partition statistics */
 
 	/*
 	 * cp_nrunnable and cp_nrunning are used to calculate load average.
 	 */
 	uint_t		cp_nrunnable;	/* current # of runnable threads */
 	uint_t		cp_nrunning;	/* current # of running threads */
 
 	/*
 	 * cp_updates, cp_nrunnable_cum, cp_nwaiting_cum, and cp_hp_avenrun
 	 * are used to generate kstat information on an as-needed basis.
 	 */
 	uint64_t	cp_updates;	/* number of statistics updates */
 	uint64_t	cp_nrunnable_cum; /* cum. # of runnable threads */
 	uint64_t	cp_nwaiting_cum;  /* cum. # of waiting threads */
 
 	struct loadavg_s cp_loadavg;	/* cpupart loadavg */
 
 	klgrpset_t	cp_lgrpset;	/* set of lgroups on which this */
 					/*    partition has cpus */
 	lpl_t		*cp_lgrploads;	/* table of load averages for this  */
 					/*    partition, indexed by lgrp ID */
 	int		cp_nlgrploads;	/* size of cp_lgrploads table */
 	uint64_t	cp_hp_avenrun[3]; /* high-precision load average */
 	uint_t		cp_attr;	/* bitmask of attributes */
 	lgrp_gen_t	cp_gen;		/* generation number */
 	lgrp_id_t	cp_lgrp_hint;	/* last home lgroup chosen */
 	bitset_t	cp_cmt_pgs;	/* CMT PGs represented */
 	bitset_t	cp_haltset;	/* halted CPUs */
 } cpupart_t;
 
 typedef struct cpupart_kstat {
 	kstat_named_t	cpk_updates;		/* number of updates */
 	kstat_named_t	cpk_runnable;		/* cum # of runnable threads */
 	kstat_named_t	cpk_waiting;		/* cum # waiting for I/O */
 	kstat_named_t	cpk_ncpus;		/* current # of CPUs */
 	kstat_named_t	cpk_avenrun_1min;	/* 1-minute load average */
 	kstat_named_t	cpk_avenrun_5min;	/* 5-minute load average */
 	kstat_named_t	cpk_avenrun_15min;	/* 15-minute load average */
 } cpupart_kstat_t;
 
 /*
  * Macro to obtain the maximum run priority for the global queue associated
  * with given cpu partition.
  */
 #define	CP_MAXRUNPRI(cp)	((cp)->cp_kp_queue.disp_maxrunpri)
 
 /*
  * This macro is used to determine if the given thread must surrender
  * CPU to higher priority runnable threads on one of its dispatch queues.
  * This should really be defined in <sys/disp.h> but it is not because
  * including <sys/cpupart.h> there would cause recursive includes.
  */
 #define	DISP_MUST_SURRENDER(t)				\
 	((DISP_MAXRUNPRI(t) > DISP_PRIO(t)) ||		\
 	(CP_MAXRUNPRI(t->t_cpupart) > DISP_PRIO(t)))
 
 extern cpupart_t	cp_default;
 extern cpupart_t	*cp_list_head;
 extern uint_t		cp_numparts;
 extern uint_t		cp_numparts_nonempty;
 
 /*
  * Each partition contains a bitset that indicates which CPUs are halted and
  * which ones are running. Given the growing number of CPUs in current and
  * future platforms, it's important to fanout each CPU within its partition's
  * haltset to prevent contention due to false sharing. The fanout factor
  * is platform specific, and declared accordingly.
  */
 extern uint_t cp_haltset_fanout;
 
 extern void	cpupart_initialize_default();
 extern cpupart_t *cpupart_find(psetid_t);
 extern int	cpupart_create(psetid_t *);
 extern int	cpupart_destroy(psetid_t);
 extern psetid_t	cpupart_query_cpu(cpu_t *);
 extern int	cpupart_attach_cpu(psetid_t, cpu_t *, int);
 extern int	cpupart_get_cpus(psetid_t *, processorid_t *, uint_t *);
 extern int	cpupart_bind_thread(kthread_id_t, psetid_t, int, void *,
     void *);
 extern void	cpupart_kpqalloc(pri_t);
 extern int	cpupart_get_loadavg(psetid_t, int *, int);
 extern uint_t	cpupart_list(psetid_t *, uint_t, int);
 extern int	cpupart_setattr(psetid_t, uint_t);
 extern int	cpupart_getattr(psetid_t, uint_t *);
 
-#endif	/* _KERNEL */
+#endif	/* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_CPUPART_H */
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/cpuvar.h	(revision 332526)
@@ -1,828 +1,830 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_CPUVAR_H
 #define	_SYS_CPUVAR_H
 
 #include <sys/thread.h>
 #include <sys/sysinfo.h>	/* has cpu_stat_t definition */
 #include <sys/disp.h>
 #include <sys/processor.h>
+#include <sys/kcpc.h>		/* has kcpc_ctx_t definition */
 
 #include <sys/loadavg.h>
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 #include <sys/machcpuvar.h>
 #endif
 
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/bitmap.h>
 #include <sys/rwlock.h>
 #include <sys/msacct.h>
 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) && \
 	(defined(__i386) || defined(__amd64))
 #include <asm/cpuvar.h>
 #endif
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct squeue_set_s;
 
 #define	CPU_CACHE_COHERENCE_SIZE	64
 
 /*
  * For fast event tracing.
  */
 struct ftrace_record;
 typedef struct ftrace_data {
 	int			ftd_state;	/* ftrace flags */
 	kmutex_t		ftd_unused;	/* ftrace buffer lock, unused */
 	struct ftrace_record	*ftd_cur;	/* current record */
 	struct ftrace_record	*ftd_first;	/* first record */
 	struct ftrace_record	*ftd_last;	/* last record */
 } ftrace_data_t;
 
 struct cyc_cpu;
 struct nvlist;
 
 /*
  * Per-CPU data.
  *
  * Be careful adding new members: if they are not the same in all modules (e.g.
  * change size depending on a #define), CTF uniquification can fail to work
  * properly.  Furthermore, this is transitive in that it applies recursively to
  * all types pointed to by cpu_t.
  */
 typedef struct cpu {
 	processorid_t	cpu_id;			/* CPU number */
 	processorid_t	cpu_seqid;	/* sequential CPU id (0..ncpus-1) */
 	volatile cpu_flag_t cpu_flags;		/* flags indicating CPU state */
 	struct cpu	*cpu_self;		/* pointer to itself */
 	kthread_t	*cpu_thread;		/* current thread */
 	kthread_t	*cpu_idle_thread;	/* idle thread for this CPU */
 	kthread_t	*cpu_pause_thread;	/* pause thread for this CPU */
 	klwp_id_t	cpu_lwp;		/* current lwp (if any) */
 	klwp_id_t	cpu_fpowner;		/* currently loaded fpu owner */
 	struct cpupart	*cpu_part;		/* partition with this CPU */
 	struct lgrp_ld	*cpu_lpl;		/* pointer to this cpu's load */
 	int		cpu_cache_offset;	/* see kmem.c for details */
 
 	/*
 	 * Links to other CPUs.  It is safe to walk these lists if
 	 * one of the following is true:
 	 * 	- cpu_lock held
 	 * 	- preemption disabled via kpreempt_disable
 	 * 	- PIL >= DISP_LEVEL
 	 * 	- acting thread is an interrupt thread
 	 * 	- all other CPUs are paused
 	 */
 	struct cpu	*cpu_next;		/* next existing CPU */
 	struct cpu	*cpu_prev;		/* prev existing CPU */
 	struct cpu	*cpu_next_onln;		/* next online (enabled) CPU */
 	struct cpu	*cpu_prev_onln;		/* prev online (enabled) CPU */
 	struct cpu	*cpu_next_part;		/* next CPU in partition */
 	struct cpu	*cpu_prev_part;		/* prev CPU in partition */
 	struct cpu	*cpu_next_lgrp;		/* next CPU in latency group */
 	struct cpu	*cpu_prev_lgrp;		/* prev CPU in latency group */
 	struct cpu	*cpu_next_lpl;		/* next CPU in lgrp partition */
 	struct cpu	*cpu_prev_lpl;
 
 	struct cpu_pg	*cpu_pg;		/* cpu's processor groups */
 
 	void		*cpu_reserved[4];	/* reserved for future use */
 
 	/*
 	 * Scheduling variables.
 	 */
 	disp_t		*cpu_disp;		/* dispatch queue data */
 	/*
 	 * Note that cpu_disp is set before the CPU is added to the system
 	 * and is never modified.  Hence, no additional locking is needed
 	 * beyond what's necessary to access the cpu_t structure.
 	 */
 	char		cpu_runrun;	/* scheduling flag - set to preempt */
 	char		cpu_kprunrun;		/* force kernel preemption */
 	pri_t		cpu_chosen_level; 	/* priority at which cpu */
 						/* was chosen for scheduling */
 	kthread_t	*cpu_dispthread; /* thread selected for dispatch */
 	disp_lock_t	cpu_thread_lock; /* dispatcher lock on current thread */
 	uint8_t		cpu_disp_flags;	/* flags used by dispatcher */
 	/*
 	 * The following field is updated when ever the cpu_dispthread
 	 * changes. Also in places, where the current thread(cpu_dispthread)
 	 * priority changes. This is used in disp_lowpri_cpu()
 	 */
 	pri_t		cpu_dispatch_pri; /* priority of cpu_dispthread */
 	clock_t		cpu_last_swtch;	/* last time switched to new thread */
 
 	/*
 	 * Interrupt data.
 	 */
 	caddr_t		cpu_intr_stack;	/* interrupt stack */
 	kthread_t	*cpu_intr_thread; /* interrupt thread list */
 	uint_t		cpu_intr_actv;	/* interrupt levels active (bitmask) */
 	int		cpu_base_spl;	/* priority for highest rupt active */
 
 	/*
 	 * Statistics.
 	 */
 	cpu_stats_t	cpu_stats;		/* per-CPU statistics */
 	struct kstat	*cpu_info_kstat;	/* kstat for cpu info */
 
 	uintptr_t	cpu_profile_pc;	/* kernel PC in profile interrupt */
 	uintptr_t	cpu_profile_upc; /* user PC in profile interrupt */
 	uintptr_t	cpu_profile_pil; /* PIL when profile interrupted */
 
 	ftrace_data_t	cpu_ftrace;		/* per cpu ftrace data */
 
 	clock_t		cpu_deadman_counter;	/* used by deadman() */
 	uint_t		cpu_deadman_countdown;	/* used by deadman() */
 
 	kmutex_t	cpu_cpc_ctxlock; /* protects context for idle thread */
 	kcpc_ctx_t	*cpu_cpc_ctx;	/* performance counter context */
 
 	/*
 	 * Configuration information for the processor_info system call.
 	 */
 	processor_info_t cpu_type_info;	/* config info */
 	time_t		cpu_state_begin; /* when CPU entered current state */
 	char		cpu_cpr_flags;	/* CPR related info */
 	struct cyc_cpu	*cpu_cyclic;	/* per cpu cyclic subsystem data */
 	struct squeue_set_s *cpu_squeue_set;	/* per cpu squeue set */
 	struct nvlist	*cpu_props;	/* pool-related properties */
 
 	krwlock_t	cpu_ft_lock;		/* DTrace: fasttrap lock */
 	uintptr_t	cpu_dtrace_caller;	/* DTrace: caller, if any */
 	hrtime_t	cpu_dtrace_chillmark;	/* DTrace: chill mark time */
 	hrtime_t	cpu_dtrace_chilled;	/* DTrace: total chill time */
 	volatile uint16_t cpu_mstate;		/* cpu microstate */
 	volatile uint16_t cpu_mstate_gen;	/* generation counter */
 	volatile hrtime_t cpu_mstate_start;	/* cpu microstate start time */
 	volatile hrtime_t cpu_acct[NCMSTATES];	/* cpu microstate data */
 	hrtime_t	cpu_intracct[NCMSTATES]; /* interrupt mstate data */
 	hrtime_t	cpu_waitrq;		/* cpu run-queue wait time */
 	struct loadavg_s cpu_loadavg;		/* loadavg info for this cpu */
 
 	char		*cpu_idstr;	/* for printing and debugging */
 	char		*cpu_brandstr;	/* for printing */
 
 	/*
 	 * Sum of all device interrupt weights that are currently directed at
 	 * this cpu. Cleared at start of interrupt redistribution.
 	 */
 	int32_t		cpu_intr_weight;
 	void		*cpu_vm_data;
 
 	struct cpu_physid *cpu_physid;	/* physical associations */
 
 	uint64_t	cpu_curr_clock;		/* current clock freq in Hz */
 	char		*cpu_supp_freqs;	/* supported freqs in Hz */
 
 	uintptr_t	cpu_cpcprofile_pc;	/* kernel PC in cpc interrupt */
 	uintptr_t	cpu_cpcprofile_upc;	/* user PC in cpc interrupt */
 
 	/*
 	 * Interrupt load factor used by dispatcher & softcall
 	 */
 	hrtime_t	cpu_intrlast;   /* total interrupt time (nsec) */
 	int		cpu_intrload;   /* interrupt load factor (0-99%) */
 
 	uint_t		cpu_rotor;	/* for cheap pseudo-random numbers */
 
 	struct cu_cpu_info	*cpu_cu_info;	/* capacity & util. info */
 
 	/*
 	 * cpu_generation is updated whenever CPU goes on-line or off-line.
 	 * Updates to cpu_generation are protected by cpu_lock.
 	 *
 	 * See CPU_NEW_GENERATION() macro below.
 	 */
 	volatile uint_t		cpu_generation;	/* tracking on/off-line */
 
 	/*
 	 * New members must be added /before/ this member, as the CTF tools
 	 * rely on this being the last field before cpu_m, so they can
 	 * correctly calculate the offset when synthetically adding the cpu_m
 	 * member in objects that do not have it.  This fixup is required for
 	 * uniquification to work correctly.
 	 */
 	uintptr_t	cpu_m_pad;
 
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 	struct machcpu	cpu_m;		/* per architecture info */
 #endif
 } cpu_t;
 
 /*
  * The cpu_core structure consists of per-CPU state available in any context.
  * On some architectures, this may mean that the page(s) containing the
  * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
  * is up to the platform to assure that this is performed properly.  Note that
  * the structure is sized to avoid false sharing.
  */
 #define	CPUC_SIZE		(sizeof (uint16_t) + sizeof (uint8_t) + \
 				sizeof (uintptr_t) + sizeof (kmutex_t))
 #define	CPUC_PADSIZE		CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE
 
 typedef struct cpu_core {
 	uint16_t	cpuc_dtrace_flags;	/* DTrace flags */
 	uint8_t		cpuc_dcpc_intr_state;	/* DCPC provider intr state */
 	uint8_t		cpuc_pad[CPUC_PADSIZE];	/* padding */
 	uintptr_t	cpuc_dtrace_illval;	/* DTrace illegal value */
 	kmutex_t	cpuc_pid_lock;		/* DTrace pid provider lock */
 } cpu_core_t;
 
 #ifdef _KERNEL
 extern cpu_core_t cpu_core[];
 #endif /* _KERNEL */
 
 /*
  * CPU_ON_INTR() macro. Returns non-zero if currently on interrupt stack.
  * Note that this isn't a test for a high PIL.  For example, cpu_intr_actv
  * does not get updated when we go through sys_trap from TL>0 at high PIL.
  * getpil() should be used instead to check for PIL levels.
  */
 #define	CPU_ON_INTR(cpup) ((cpup)->cpu_intr_actv >> (LOCK_LEVEL + 1))
 
 /*
  * Check to see if an interrupt thread might be active at a given ipl.
  * If so return true.
  * We must be conservative--it is ok to give a false yes, but a false no
  * will cause disaster.  (But if the situation changes after we check it is
  * ok--the caller is trying to ensure that an interrupt routine has been
  * exited).
  * This is used when trying to remove an interrupt handler from an autovector
  * list in avintr.c.
  */
 #define	INTR_ACTIVE(cpup, level)	\
 	((level) <= LOCK_LEVEL ? 	\
 	((cpup)->cpu_intr_actv & (1 << (level))) : (CPU_ON_INTR(cpup)))
 
 /*
  * CPU_PSEUDO_RANDOM() returns a per CPU value that changes each time one
  * looks at it. It's meant as a cheap mechanism to be incorporated in routines
  * wanting to avoid biasing, but where true randomness isn't needed (just
  * something that changes).
  */
 #define	CPU_PSEUDO_RANDOM() (CPU->cpu_rotor++)
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
 
 #define	INTR_STACK_SIZE	MAX(DEFAULTSTKSZ, PAGESIZE)
 
 /* MEMBERS PROTECTED BY "atomicity": cpu_flags */
 
 /*
  * Flags in the CPU structure.
  *
  * These are protected by cpu_lock (except during creation).
  *
  * Offlined-CPUs have three stages of being offline:
  *
  * CPU_ENABLE indicates that the CPU is participating in I/O interrupts
  * that can be directed at a number of different CPUs.  If CPU_ENABLE
  * is off, the CPU will not be given interrupts that can be sent elsewhere,
  * but will still get interrupts from devices associated with that CPU only,
  * and from other CPUs.
  *
  * CPU_OFFLINE indicates that the dispatcher should not allow any threads
  * other than interrupt threads to run on that CPU.  A CPU will not have
  * CPU_OFFLINE set if there are any bound threads (besides interrupts).
  *
  * CPU_QUIESCED is set if p_offline was able to completely turn idle the
  * CPU and it will not have to run interrupt threads.  In this case it'll
  * stay in the idle loop until CPU_QUIESCED is turned off.
  *
  * CPU_FROZEN is used only by CPR to mark CPUs that have been successfully
  * suspended (in the suspend path), or have yet to be resumed (in the resume
  * case).
  *
  * On some platforms CPUs can be individually powered off.
  * The following flags are set for powered off CPUs: CPU_QUIESCED,
  * CPU_OFFLINE, and CPU_POWEROFF.  The following flags are cleared:
  * CPU_RUNNING, CPU_READY, CPU_EXISTS, and CPU_ENABLE.
  */
 #define	CPU_RUNNING	0x001		/* CPU running */
 #define	CPU_READY	0x002		/* CPU ready for cross-calls */
 #define	CPU_QUIESCED	0x004		/* CPU will stay in idle */
 #define	CPU_EXISTS	0x008		/* CPU is configured */
 #define	CPU_ENABLE	0x010		/* CPU enabled for interrupts */
 #define	CPU_OFFLINE	0x020		/* CPU offline via p_online */
 #define	CPU_POWEROFF	0x040		/* CPU is powered off */
 #define	CPU_FROZEN	0x080		/* CPU is frozen via CPR suspend */
 #define	CPU_SPARE	0x100		/* CPU offline available for use */
 #define	CPU_FAULTED	0x200		/* CPU offline diagnosed faulty */
 
 #define	FMT_CPU_FLAGS							\
 	"\20\12fault\11spare\10frozen"					\
 	"\7poweroff\6offline\5enable\4exist\3quiesced\2ready\1run"
 
 #define	CPU_ACTIVE(cpu)	(((cpu)->cpu_flags & CPU_OFFLINE) == 0)
 
 /*
  * Flags for cpu_offline(), cpu_faulted(), and cpu_spare().
  */
 #define	CPU_FORCED	0x0001		/* Force CPU offline */
 
 /*
  * DTrace flags.
  */
 #define	CPU_DTRACE_NOFAULT	0x0001	/* Don't fault */
 #define	CPU_DTRACE_DROP		0x0002	/* Drop this ECB */
 #define	CPU_DTRACE_BADADDR	0x0004	/* DTrace fault: bad address */
 #define	CPU_DTRACE_BADALIGN	0x0008	/* DTrace fault: bad alignment */
 #define	CPU_DTRACE_DIVZERO	0x0010	/* DTrace fault: divide by zero */
 #define	CPU_DTRACE_ILLOP	0x0020	/* DTrace fault: illegal operation */
 #define	CPU_DTRACE_NOSCRATCH	0x0040	/* DTrace fault: out of scratch */
 #define	CPU_DTRACE_KPRIV	0x0080	/* DTrace fault: bad kernel access */
 #define	CPU_DTRACE_UPRIV	0x0100	/* DTrace fault: bad user access */
 #define	CPU_DTRACE_TUPOFLOW	0x0200	/* DTrace fault: tuple stack overflow */
 #if defined(__sparc)
 #define	CPU_DTRACE_FAKERESTORE	0x0400	/* pid provider hint to getreg */
 #endif
 #define	CPU_DTRACE_ENTRY	0x0800	/* pid provider hint to ustack() */
 #define	CPU_DTRACE_BADSTACK	0x1000	/* DTrace fault: bad stack */
 
 #define	CPU_DTRACE_FAULT	(CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \
 				CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \
 				CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \
 				CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \
 				CPU_DTRACE_BADSTACK)
 #define	CPU_DTRACE_ERROR	(CPU_DTRACE_FAULT | CPU_DTRACE_DROP)
 
 /*
  * Dispatcher flags
  * These flags must be changed only by the current CPU.
  */
 #define	CPU_DISP_DONTSTEAL	0x01	/* CPU undergoing context swtch */
 #define	CPU_DISP_HALTED		0x02	/* CPU halted waiting for interrupt */
 
 #endif /* _KERNEL || _KMEMUSER */
 
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 
 /*
  * Macros for manipulating sets of CPUs as a bitmap.  Note that this
  * bitmap may vary in size depending on the maximum CPU id a specific
  * platform supports.  This may be different than the number of CPUs
  * the platform supports, since CPU ids can be sparse.  We define two
  * sets of macros; one for platforms where the maximum CPU id is less
  * than the number of bits in a single word (32 in a 32-bit kernel,
  * 64 in a 64-bit kernel), and one for platforms that require bitmaps
  * of more than one word.
  */
 
 #define	CPUSET_WORDS	BT_BITOUL(NCPU)
 #define	CPUSET_NOTINSET	((uint_t)-1)
 
 #if	CPUSET_WORDS > 1
 
 typedef struct cpuset {
 	ulong_t	cpub[CPUSET_WORDS];
 } cpuset_t;
 
 /*
  * Private functions for manipulating cpusets that do not fit in a
  * single word.  These should not be used directly; instead the
  * CPUSET_* macros should be used so the code will be portable
  * across different definitions of NCPU.
  */
 extern	void	cpuset_all(cpuset_t *);
 extern	void	cpuset_all_but(cpuset_t *, uint_t);
 extern	int	cpuset_isnull(cpuset_t *);
 extern	int	cpuset_cmp(cpuset_t *, cpuset_t *);
 extern	void	cpuset_only(cpuset_t *, uint_t);
 extern	uint_t	cpuset_find(cpuset_t *);
 extern	void	cpuset_bounds(cpuset_t *, uint_t *, uint_t *);
 
 #define	CPUSET_ALL(set)			cpuset_all(&(set))
 #define	CPUSET_ALL_BUT(set, cpu)	cpuset_all_but(&(set), cpu)
 #define	CPUSET_ONLY(set, cpu)		cpuset_only(&(set), cpu)
 #define	CPU_IN_SET(set, cpu)		BT_TEST((set).cpub, cpu)
 #define	CPUSET_ADD(set, cpu)		BT_SET((set).cpub, cpu)
 #define	CPUSET_DEL(set, cpu)		BT_CLEAR((set).cpub, cpu)
 #define	CPUSET_ISNULL(set)		cpuset_isnull(&(set))
 #define	CPUSET_ISEQUAL(set1, set2)	cpuset_cmp(&(set1), &(set2))
 
 /*
  * Find one CPU in the cpuset.
  * Sets "cpu" to the id of the found CPU, or CPUSET_NOTINSET if no cpu
  * could be found. (i.e. empty set)
  */
 #define	CPUSET_FIND(set, cpu)		{		\
 	cpu = cpuset_find(&(set));			\
 }
 
 /*
  * Determine the smallest and largest CPU id in the set. Returns
  * CPUSET_NOTINSET in smallest and largest when set is empty.
  */
 #define	CPUSET_BOUNDS(set, smallest, largest)	{		\
 	cpuset_bounds(&(set), &(smallest), &(largest));		\
 }
 
 /*
  * Atomic cpuset operations
  * These are safe to use for concurrent cpuset manipulations.
  * "xdel" and "xadd" are exclusive operations, that set "result" to "0"
  * if the add or del was successful, or "-1" if not successful.
  * (e.g. attempting to add a cpu to a cpuset that's already there, or
  * deleting a cpu that's not in the cpuset)
  */
 
 #define	CPUSET_ATOMIC_DEL(set, cpu)	BT_ATOMIC_CLEAR((set).cpub, (cpu))
 #define	CPUSET_ATOMIC_ADD(set, cpu)	BT_ATOMIC_SET((set).cpub, (cpu))
 
 #define	CPUSET_ATOMIC_XADD(set, cpu, result) \
 	BT_ATOMIC_SET_EXCL((set).cpub, cpu, result)
 
 #define	CPUSET_ATOMIC_XDEL(set, cpu, result) \
 	BT_ATOMIC_CLEAR_EXCL((set).cpub, cpu, result)
 
 
 #define	CPUSET_OR(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] |= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_XOR(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] ^= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_AND(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] &= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_ZERO(set)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set).cpub[_i] = 0;			\
 }
 
 #elif	CPUSET_WORDS == 1
 
 typedef	ulong_t	cpuset_t;	/* a set of CPUs */
 
 #define	CPUSET(cpu)			(1UL << (cpu))
 
 #define	CPUSET_ALL(set)			((void)((set) = ~0UL))
 #define	CPUSET_ALL_BUT(set, cpu)	((void)((set) = ~CPUSET(cpu)))
 #define	CPUSET_ONLY(set, cpu)		((void)((set) = CPUSET(cpu)))
 #define	CPU_IN_SET(set, cpu)		((set) & CPUSET(cpu))
 #define	CPUSET_ADD(set, cpu)		((void)((set) |= CPUSET(cpu)))
 #define	CPUSET_DEL(set, cpu)		((void)((set) &= ~CPUSET(cpu)))
 #define	CPUSET_ISNULL(set)		((set) == 0)
 #define	CPUSET_ISEQUAL(set1, set2)	((set1) == (set2))
 #define	CPUSET_OR(set1, set2)		((void)((set1) |= (set2)))
 #define	CPUSET_XOR(set1, set2)		((void)((set1) ^= (set2)))
 #define	CPUSET_AND(set1, set2)		((void)((set1) &= (set2)))
 #define	CPUSET_ZERO(set)		((void)((set) = 0))
 
 #define	CPUSET_FIND(set, cpu)		{		\
 	cpu = (uint_t)(lowbit(set) - 1);				\
 }
 
 #define	CPUSET_BOUNDS(set, smallest, largest)	{	\
 	smallest = (uint_t)(lowbit(set) - 1);		\
 	largest = (uint_t)(highbit(set) - 1);		\
 }
 
 #define	CPUSET_ATOMIC_DEL(set, cpu)	atomic_and_ulong(&(set), ~CPUSET(cpu))
 #define	CPUSET_ATOMIC_ADD(set, cpu)	atomic_or_ulong(&(set), CPUSET(cpu))
 
 #define	CPUSET_ATOMIC_XADD(set, cpu, result) \
 	{ result = atomic_set_long_excl(&(set), (cpu)); }
 
 #define	CPUSET_ATOMIC_XDEL(set, cpu, result) \
 	{ result = atomic_clear_long_excl(&(set), (cpu)); }
 
 #else	/* CPUSET_WORDS <= 0 */
 
 #error NCPU is undefined or invalid
 
 #endif	/* CPUSET_WORDS	*/
 
 extern cpuset_t cpu_seqid_inuse;
 
 #endif	/* (_KERNEL || _KMEMUSER) && _MACHDEP */
 
 #define	CPU_CPR_OFFLINE		0x0
 #define	CPU_CPR_ONLINE		0x1
 #define	CPU_CPR_IS_OFFLINE(cpu)	(((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE) == 0)
 #define	CPU_CPR_IS_ONLINE(cpu)	((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE)
 #define	CPU_SET_CPR_FLAGS(cpu, flag)	((cpu)->cpu_cpr_flags |= flag)
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
 
 extern struct cpu	*cpu[];		/* indexed by CPU number */
 extern struct cpu	**cpu_seq;	/* indexed by sequential CPU id */
 extern cpu_t		*cpu_list;	/* list of CPUs */
 extern cpu_t		*cpu_active;	/* list of active CPUs */
 extern int		ncpus;		/* number of CPUs present */
 extern int		ncpus_online;	/* number of CPUs not quiesced */
 extern int		max_ncpus;	/* max present before ncpus is known */
 extern int		boot_max_ncpus;	/* like max_ncpus but for real */
 extern int		boot_ncpus;	/* # cpus present @ boot */
 extern processorid_t	max_cpuid;	/* maximum CPU number */
 extern struct cpu	*cpu_inmotion;	/* offline or partition move target */
 extern cpu_t		*clock_cpu_list;
 extern processorid_t	max_cpu_seqid_ever;	/* maximum seqid ever given */
 
 #if defined(__i386) || defined(__amd64)
 extern struct cpu *curcpup(void);
 #define	CPU		(curcpup())	/* Pointer to current CPU */
 #else
 #define	CPU		(curthread->t_cpu)	/* Pointer to current CPU */
 #endif
 
 /*
  * CPU_CURRENT indicates to thread_affinity_set to use CPU->cpu_id
  * as the target and to grab cpu_lock instead of requiring the caller
  * to grab it.
  */
 #define	CPU_CURRENT	-3
 
 /*
  * Per-CPU statistics
  *
  * cpu_stats_t contains numerous system and VM-related statistics, in the form
  * of gauges or monotonically-increasing event occurrence counts.
  */
 
 #define	CPU_STATS_ENTER_K()	kpreempt_disable()
 #define	CPU_STATS_EXIT_K()	kpreempt_enable()
 
 #define	CPU_STATS_ADD_K(class, stat, amount) \
 	{	kpreempt_disable(); /* keep from switching CPUs */\
 		CPU_STATS_ADDQ(CPU, class, stat, amount); \
 		kpreempt_enable(); \
 	}
 
 #define	CPU_STATS_ADDQ(cp, class, stat, amount)	{			\
 	extern void __dtrace_probe___cpu_##class##info_##stat(uint_t,	\
 	    uint64_t *, cpu_t *);					\
 	uint64_t *stataddr = &((cp)->cpu_stats.class.stat);		\
 	__dtrace_probe___cpu_##class##info_##stat((amount),		\
 	    stataddr, cp);						\
 	*(stataddr) += (amount);					\
 }
 
 #define	CPU_STATS(cp, stat)                                       \
 	((cp)->cpu_stats.stat)
 
 /*
  * Increment CPU generation value.
  * This macro should be called whenever CPU goes on-line or off-line.
  * Updates to cpu_generation should be protected by cpu_lock.
  */
 #define	CPU_NEW_GENERATION(cp)	((cp)->cpu_generation++)
 
 #endif /* _KERNEL || _KMEMUSER */
 
 /*
- * CPU support routines.
+ * CPU support routines (not for genassym.c)
  */
-#if	defined(_KERNEL) && defined(__STDC__)	/* not for genassym.c */
+#if	(defined(_KERNEL) || defined(_FAKE_KERNEL)) && defined(__STDC__)
 
 struct zone;
 
 void	cpu_list_init(cpu_t *);
 void	cpu_add_unit(cpu_t *);
 void	cpu_del_unit(int cpuid);
 void	cpu_add_active(cpu_t *);
 void	cpu_kstat_init(cpu_t *);
 void	cpu_visibility_add(cpu_t *, struct zone *);
 void	cpu_visibility_remove(cpu_t *, struct zone *);
 void	cpu_visibility_configure(cpu_t *, struct zone *);
 void	cpu_visibility_unconfigure(cpu_t *, struct zone *);
 void	cpu_visibility_online(cpu_t *, struct zone *);
 void	cpu_visibility_offline(cpu_t *, struct zone *);
 void	cpu_create_intrstat(cpu_t *);
 void	cpu_delete_intrstat(cpu_t *);
 int	cpu_kstat_intrstat_update(kstat_t *, int);
 void	cpu_intr_swtch_enter(kthread_t *);
 void	cpu_intr_swtch_exit(kthread_t *);
 
 void	mbox_lock_init(void);	 /* initialize cross-call locks */
 void	mbox_init(int cpun);	 /* initialize cross-calls */
 void	poke_cpu(int cpun);	 /* interrupt another CPU (to preempt) */
 
 /*
  * values for safe_list.  Pause state that CPUs are in.
  */
 #define	PAUSE_IDLE	0		/* normal state */
 #define	PAUSE_READY	1		/* paused thread ready to spl */
 #define	PAUSE_WAIT	2		/* paused thread is spl-ed high */
 #define	PAUSE_DIE	3		/* tell pause thread to leave */
 #define	PAUSE_DEAD	4		/* pause thread has left */
 
 void	mach_cpu_pause(volatile char *);
 
 void	pause_cpus(cpu_t *off_cp, void *(*func)(void *));
 void	start_cpus(void);
 int	cpus_paused(void);
 
 void	cpu_pause_init(void);
 cpu_t	*cpu_get(processorid_t cpun);	/* get the CPU struct associated */
 
 int	cpu_online(cpu_t *cp);			/* take cpu online */
 int	cpu_offline(cpu_t *cp, int flags);	/* take cpu offline */
 int	cpu_spare(cpu_t *cp, int flags);	/* take cpu to spare */
 int	cpu_faulted(cpu_t *cp, int flags);	/* take cpu to faulted */
 int	cpu_poweron(cpu_t *cp);		/* take powered-off cpu to offline */
 int	cpu_poweroff(cpu_t *cp);	/* take offline cpu to powered-off */
 
 cpu_t	*cpu_intr_next(cpu_t *cp);	/* get next online CPU taking intrs */
 int	cpu_intr_count(cpu_t *cp);	/* count # of CPUs handling intrs */
 int	cpu_intr_on(cpu_t *cp);		/* CPU taking I/O interrupts? */
 void	cpu_intr_enable(cpu_t *cp);	/* enable I/O interrupts */
 int	cpu_intr_disable(cpu_t *cp);	/* disable I/O interrupts */
 void	cpu_intr_alloc(cpu_t *cp, int n); /* allocate interrupt threads */
 
 /*
  * Routines for checking CPU states.
  */
 int	cpu_is_online(cpu_t *);		/* check if CPU is online */
 int	cpu_is_nointr(cpu_t *);		/* check if CPU can service intrs */
 int	cpu_is_active(cpu_t *);		/* check if CPU can run threads */
 int	cpu_is_offline(cpu_t *);	/* check if CPU is offline */
 int	cpu_is_poweredoff(cpu_t *);	/* check if CPU is powered off */
 
 int	cpu_flagged_online(cpu_flag_t);	/* flags show CPU is online */
 int	cpu_flagged_nointr(cpu_flag_t);	/* flags show CPU not handling intrs */
 int	cpu_flagged_active(cpu_flag_t); /* flags show CPU scheduling threads */
 int	cpu_flagged_offline(cpu_flag_t); /* flags show CPU is offline */
 int	cpu_flagged_poweredoff(cpu_flag_t); /* flags show CPU is powered off */
 
 /*
  * The processor_info(2) state of a CPU is a simplified representation suitable
  * for use by an application program.  Kernel subsystems should utilize the
  * internal per-CPU state as given by the cpu_flags member of the cpu structure,
  * as this information may include platform- or architecture-specific state
  * critical to a subsystem's disposition of a particular CPU.
  */
 void	cpu_set_state(cpu_t *);		/* record/timestamp current state */
 int	cpu_get_state(cpu_t *);		/* get current cpu state */
 const char *cpu_get_state_str(cpu_t *);	/* get current cpu state as string */
 
 
 void	cpu_set_curr_clock(uint64_t);	/* indicate the current CPU's freq */
 void	cpu_set_supp_freqs(cpu_t *, const char *); /* set the CPU supported */
 						/* frequencies */
 
 int	cpu_configure(int);
 int	cpu_unconfigure(int);
 void	cpu_destroy_bound_threads(cpu_t *cp);
 
 extern int cpu_bind_thread(kthread_t *tp, processorid_t bind,
     processorid_t *obind, int *error);
 extern int cpu_unbind(processorid_t cpu_id, boolean_t force);
 extern void thread_affinity_set(kthread_t *t, int cpu_id);
 extern void thread_affinity_clear(kthread_t *t);
 extern void affinity_set(int cpu_id);
 extern void affinity_clear(void);
 extern void init_cpu_mstate(struct cpu *, int);
 extern void term_cpu_mstate(struct cpu *);
 extern void new_cpu_mstate(int, hrtime_t);
 extern void get_cpu_mstate(struct cpu *, hrtime_t *);
 extern void thread_nomigrate(void);
 extern void thread_allowmigrate(void);
 extern void weakbinding_stop(void);
 extern void weakbinding_start(void);
 
 /*
  * The following routines affect the CPUs participation in interrupt processing,
  * if that is applicable on the architecture.  This only affects interrupts
  * which aren't directed at the processor (not cross calls).
  *
  * cpu_disable_intr returns non-zero if interrupts were previously enabled.
  */
 int	cpu_disable_intr(struct cpu *cp); /* stop issuing interrupts to cpu */
 void	cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */
 
 /*
  * The mutex cpu_lock protects cpu_flags for all CPUs, as well as the ncpus
  * and ncpus_online counts.
  */
 extern kmutex_t	cpu_lock;	/* lock protecting CPU data */
 
 /*
  * CPU state change events
  *
  * Various subsystems need to know when CPUs change their state. They get this
  * information by registering  CPU state change callbacks using
  * register_cpu_setup_func(). Whenever any CPU changes its state, the callback
  * function is called. The callback function is passed three arguments:
  *
  *   Event, described by cpu_setup_t
  *   CPU ID
  *   Transparent pointer passed when registering the callback
  *
  * The callback function is called with cpu_lock held. The return value from the
  * callback function is usually ignored, except for CPU_CONFIG and CPU_UNCONFIG
  * events. For these two events, non-zero return value indicates a failure and
  * prevents successful completion of the operation.
  *
  * New events may be added in the future. Callback functions should ignore any
  * events that they do not understand.
  *
  * The following events provide notification callbacks:
  *
  *  CPU_INIT	A new CPU is started and added to the list of active CPUs
  *		  This event is only used during boot
  *
  *  CPU_CONFIG	A newly inserted CPU is prepared for starting running code
  *		  This event is called by DR code
  *
  *  CPU_UNCONFIG CPU has been powered off and needs cleanup
  *		  This event is called by DR code
  *
  *  CPU_ON	CPU is enabled but does not run anything yet
  *
  *  CPU_INTR_ON	CPU is enabled and has interrupts enabled
  *
  *  CPU_OFF	CPU is going offline but can still run threads
  *
  *  CPU_CPUPART_OUT	CPU is going to move out of its partition
  *
  *  CPU_CPUPART_IN	CPU is going to move to a new partition
  *
  *  CPU_SETUP	CPU is set up during boot and can run threads
  */
 typedef enum {
 	CPU_INIT,
 	CPU_CONFIG,
 	CPU_UNCONFIG,
 	CPU_ON,
 	CPU_OFF,
 	CPU_CPUPART_IN,
 	CPU_CPUPART_OUT,
 	CPU_SETUP,
 	CPU_INTR_ON
 } cpu_setup_t;
 
 typedef int cpu_setup_func_t(cpu_setup_t, int, void *);
 
 /*
  * Routines used to register interest in cpu's being added to or removed
  * from the system.
  */
 extern void register_cpu_setup_func(cpu_setup_func_t *, void *);
 extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *);
 extern void cpu_state_change_notify(int, cpu_setup_t);
 
 /*
  * Call specified function on the given CPU
  */
 typedef void (*cpu_call_func_t)(uintptr_t, uintptr_t);
 extern void cpu_call(cpu_t *, cpu_call_func_t, uintptr_t, uintptr_t);
 
 
 /*
  * Create various strings that describe the given CPU for the
  * processor_info system call and configuration-related kstats.
  */
 #define	CPU_IDSTRLEN	100
 
 extern void init_cpu_info(struct cpu *);
 extern void populate_idstr(struct cpu *);
 extern void cpu_vm_data_init(struct cpu *);
 extern void cpu_vm_data_destroy(struct cpu *);
 
-#endif	/* _KERNEL */
+#endif	/* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_CPUVAR_H */
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fm/util.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fm/util.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fm/util.h	(revision 332526)
@@ -1,101 +1,102 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef	_SYS_FM_UTIL_H
 #define	_SYS_FM_UTIL_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/nvpair.h>
 #include <sys/errorq.h>
 
 /*
  * Shared user/kernel definitions for class length, error channel name,
  * and kernel event publisher string.
  */
 #define	FM_MAX_CLASS 100
 #define	FM_ERROR_CHAN	"com.sun:fm:error"
 #define	FM_PUB		"fm"
 
 /*
  * ereport dump device transport support
  *
  * Ereports are written out to the dump device at a proscribed offset from the
  * end, similar to in-transit log messages.  The ereports are represented as a
  * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
  *
  * NOTE: All of these constants and the header must be defined so they have the
  * same representation for *both* 32-bit and 64-bit producers and consumers.
  */
 #define	ERPT_MAGIC	0xf00d4eddU
 #define	ERPT_MAX_ERRS	16
 #define	ERPT_DATA_SZ	(6 * 1024)
 #define	ERPT_EVCH_MAX	256
 #define	ERPT_HIWAT	64
 
 typedef struct erpt_dump {
 	uint32_t ed_magic;	/* ERPT_MAGIC or zero to indicate end */
 	uint32_t ed_chksum;	/* checksum32() of packed nvlist data */
 	uint32_t ed_size;	/* ereport (nvl) fixed buf size */
 	uint32_t ed_pad;	/* reserved for future use */
 	hrtime_t ed_hrt_nsec;	/* hrtime of this ereport */
 	hrtime_t ed_hrt_base;	/* hrtime sample corresponding to ed_tod_base */
 	struct {
 		uint64_t sec;	/* seconds since gettimeofday() Epoch */
 		uint64_t nsec;	/* nanoseconds past ed_tod_base.sec */
 	} ed_tod_base;
 } erpt_dump_t;
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 #include <sys/systm.h>
 
 #define	FM_STK_DEPTH	20	/* maximum stack depth */
 #define	FM_SYM_SZ	64	/* maximum symbol size */
 #define	FM_ERR_PIL	2	/* PIL for ereport_errorq drain processing */
 
 #define	FM_EREPORT_PAYLOAD_NAME_STACK		"stack"
 
 extern errorq_t *ereport_errorq;
 extern void *ereport_dumpbuf;
 extern size_t ereport_dumplen;
 
 extern void fm_init(void);
 extern void fm_nvprint(nvlist_t *);
 #define	fm_panic	panic
 extern void fm_banner(void);
 
 extern void fm_ereport_dump(void);
 extern void fm_ereport_post(nvlist_t *, int);
 
 extern int is_fm_panic();
-#endif  /* _KERNEL */
+#endif  /* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_FM_UTIL_H */
Index: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h
===================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	(revision 332525)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/vnode.h	(revision 332526)
@@ -1,426 +1,427 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
 /*
  * University Copyright- Copyright (c) 1982, 1986, 1988
  * The Regents of the University of California
  * All Rights Reserved
  *
  * University Acknowledgment- Portions of this document are derived from
  * software developed by the University of California, Berkeley, and its
  * contributors.
  */
 
 #ifndef _SYS_VNODE_H
 #define	_SYS_VNODE_H
 
 #include_next <sys/vnode.h>
 
 #define	IS_DEVVP(vp)	\
 	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
 
 #define	V_XATTRDIR	0x0000	/* attribute unnamed directory */
 
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
 /*
  * Structure of all optional attributes.
  */
 typedef struct xoptattr {
 	timestruc_t	xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
 	uint8_t		xoa_hidden;
 	uint8_t		xoa_nounlink;
 	uint8_t		xoa_immutable;
 	uint8_t		xoa_appendonly;
 	uint8_t		xoa_nodump;
 	uint8_t		xoa_opaque;
 	uint8_t		xoa_av_quarantined;
 	uint8_t		xoa_av_modified;
 	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
 	uint8_t		xoa_reparse;
 	uint64_t	xoa_generation;
 	uint8_t		xoa_offline;
 	uint8_t		xoa_sparse;
 } xoptattr_t;
 
 /*
  * The xvattr structure is really a variable length structure that
  * is made up of:
  * - The classic vattr_t (xva_vattr)
  * - a 32 bit quantity (xva_mapsize) that specifies the size of the
  *   attribute bitmaps in 32 bit words.
  * - A pointer to the returned attribute bitmap (needed because the
  *   previous element, the requested attribute bitmap) is variable lenth.
  * - The requested attribute bitmap, which is an array of 32 bit words.
  *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
  *   the attributes that are being requested.
  * - The returned attribute bitmap, which is an array of 32 bit words.
  *   File systems that support optional attributes use the XVA_SET_RTN()
  *   macro to set the bits corresponding to the attributes that are being
  *   returned.
  * - The xoptattr_t structure which contains the attribute values
  *
  * xva_mapsize determines how many words in the attribute bitmaps.
  * Immediately following the attribute bitmaps is the xoptattr_t.
  * xva_getxoptattr() is used to get the pointer to the xoptattr_t
  * section.
  */
 
 #define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
 #define	XVA_MAGIC	0x78766174	/* Magic # for verification */
 
 /*
  * The xvattr structure is an extensible structure which permits optional
  * attributes to be requested/returned.  File systems may or may not support
  * optional attributes.  They do so at their own discretion but if they do
  * support optional attributes, they must register the VFSFT_XVATTR feature
  * so that the optional attributes can be set/retrived.
  *
  * The fields of the xvattr structure are:
  *
  * xva_vattr - The first element of an xvattr is a legacy vattr structure
  * which includes the common attributes.  If AT_XVATTR is set in the va_mask
  * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
  * set, then only the xva_vattr structure can be used.
  *
  * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
  *
  * xva_mapsize - Size of requested and returned attribute bitmaps.
  *
  * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
  * size of the array before it, xva_reqattrmap[], could change which means
  * the location of xva_rtnattrmap[] could change.  This will allow unbundled
  * file systems to find the location of xva_rtnattrmap[] when the sizes change.
  *
  * xva_reqattrmap[] - Array of requested attributes.  Attributes are
  * represented by a specific bit in a specific element of the attribute
  * map array.  Callers set the bits corresponding to the attributes
  * that the caller wants to get/set.
  *
  * xva_rtnattrmap[] - Array of attributes that the file system was able to
  * process.  Not all file systems support all optional attributes.  This map
  * informs the caller which attributes the underlying file system was able
  * to set/get.  (Same structure as the requested attributes array in terms
  * of each attribute  corresponding to specific bits and array elements.)
  *
  * xva_xoptattrs - Structure containing values of optional attributes.
  * These values are only valid if the corresponding bits in xva_reqattrmap
  * are set and the underlying file system supports those attributes.
  */
 typedef struct xvattr {
 	vattr_t		xva_vattr;	/* Embedded vattr structure */
 	uint32_t	xva_magic;	/* Magic Number */
 	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
 	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
 	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
 	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
 	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
 } xvattr_t;
 
 /*
  * Attributes of interest to the caller of setattr or getattr.
  */
 #define	AT_TYPE		0x00001
 #define	AT_MODE		0x00002
 #define	AT_UID		0x00004
 #define	AT_GID		0x00008
 #define	AT_FSID		0x00010
 #define	AT_NODEID	0x00020
 #define	AT_NLINK	0x00040
 #define	AT_SIZE		0x00080
 #define	AT_ATIME	0x00100
 #define	AT_MTIME	0x00200
 #define	AT_CTIME	0x00400
 #define	AT_RDEV		0x00800
 #define	AT_BLKSIZE	0x01000
 #define	AT_NBLOCKS	0x02000
 /*			0x04000 */	/* unused */
 #define	AT_SEQ		0x08000
 /*
  * If AT_XVATTR is set then there are additional bits to process in
  * the xvattr_t's attribute bitmap.  If this is not set then the bitmap
  * MUST be ignored.  Note that this bit must be set/cleared explicitly.
  * That is, setting AT_ALL will NOT set AT_XVATTR.
  */
 #define	AT_XVATTR	0x10000
 
 #define	AT_ALL		(AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
 			AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
 			AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
 
 #define	AT_STAT		(AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
 			AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE)
 
 #define	AT_TIMES	(AT_ATIME|AT_MTIME|AT_CTIME)
 
 #define	AT_NOSET	(AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
 			AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
 
 /*
  * Attribute bits used in the extensible attribute's (xva's) attribute
  * bitmaps.  Note that the bitmaps are made up of a variable length number
  * of 32-bit words.  The convention is to use XAT{n}_{attrname} where "n"
  * is the element in the bitmap (starting at 1).  This convention is for
  * the convenience of the maintainer to keep track of which element each
  * attribute belongs to.
  *
  * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY.  CONSUMERS
  * MUST USE THE XAT_* DEFINES.
  */
 #define	XAT0_INDEX	0LL		/* Index into bitmap for XAT0 attrs */
 #define	XAT0_CREATETIME	0x00000001	/* Create time of file */
 #define	XAT0_ARCHIVE	0x00000002	/* Archive */
 #define	XAT0_SYSTEM	0x00000004	/* System */
 #define	XAT0_READONLY	0x00000008	/* Readonly */
 #define	XAT0_HIDDEN	0x00000010	/* Hidden */
 #define	XAT0_NOUNLINK	0x00000020	/* Nounlink */
 #define	XAT0_IMMUTABLE	0x00000040	/* immutable */
 #define	XAT0_APPENDONLY	0x00000080	/* appendonly */
 #define	XAT0_NODUMP	0x00000100	/* nodump */
 #define	XAT0_OPAQUE	0x00000200	/* opaque */
 #define	XAT0_AV_QUARANTINED	0x00000400	/* anti-virus quarantine */
 #define	XAT0_AV_MODIFIED	0x00000800	/* anti-virus modified */
 #define	XAT0_AV_SCANSTAMP	0x00001000	/* anti-virus scanstamp */
 #define	XAT0_REPARSE	0x00002000	/* FS reparse point */
 #define	XAT0_GEN	0x00004000	/* object generation number */
 #define	XAT0_OFFLINE	0x00008000	/* offline */
 #define	XAT0_SPARSE	0x00010000	/* sparse */
 
 #define	XAT0_ALL_ATTRS	(XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
     XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
     XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED|  XAT0_AV_MODIFIED| \
     XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE)
 
 /* Support for XAT_* optional attributes */
 #define	XVA_MASK		0xffffffff	/* Used to mask off 32 bits */
 #define	XVA_SHFT		32		/* Used to shift index */
 
 /*
  * Used to pry out the index and attribute bits from the XAT_* attributes
  * defined below.  Note that we're masking things down to 32 bits then
  * casting to uint32_t.
  */
 #define	XVA_INDEX(attr)		((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
 #define	XVA_ATTRBIT(attr)	((uint32_t)((attr) & XVA_MASK))
 
 /*
  * The following defines present a "flat namespace" so that consumers don't
  * need to keep track of which element belongs to which bitmap entry.
  *
  * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
  */
 #define	XAT_CREATETIME		((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
 #define	XAT_ARCHIVE		((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
 #define	XAT_SYSTEM		((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
 #define	XAT_READONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
 #define	XAT_HIDDEN		((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
 #define	XAT_NOUNLINK		((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
 #define	XAT_IMMUTABLE		((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
 #define	XAT_APPENDONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
 #define	XAT_NODUMP		((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
 #define	XAT_OPAQUE		((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
 #define	XAT_AV_QUARANTINED	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
 #define	XAT_AV_MODIFIED		((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
 #define	XAT_AV_SCANSTAMP	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
 #define	XAT_REPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
 #define	XAT_GEN			((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
 #define	XAT_OFFLINE		((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
 #define	XAT_SPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
 
 /*
  * The returned attribute map array (xva_rtnattrmap[]) is located past the
  * requested attribute map array (xva_reqattrmap[]).  Its location changes
  * when the array sizes change.  We use a separate pointer in a known location
  * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[].  This is
  * set in xva_init()
  */
 #define	XVA_RTNATTRMAP(xvap)	((xvap)->xva_rtnattrmapp)
 
 /*
  * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
  * of requested attributes (xva_reqattrmap[]).
  */
 #define	XVA_SET_REQ(xvap, attr)	{				\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr); \
 }
 /*
  * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
  * of requested attributes (xva_reqattrmap[]).
  */
 #define	XVA_CLR_REQ(xvap, attr)	{				\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr); \
 }
 
 /*
  * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
  * of returned attributes (xva_rtnattrmap[]).
  */
 #define	XVA_SET_RTN(xvap, attr)	{				\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr); \
 }
 
 /*
  * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
  * to see of the corresponding attribute bit is set.  If so, returns non-zero.
  */
 #define	XVA_ISSET_REQ(xvap, attr)					\
 	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
 		((xvap)->xva_magic == XVA_MAGIC) &&			\
 		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
 	((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) :	0)
 
 /*
  * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
  * to see of the corresponding attribute bit is set.  If so, returns non-zero.
  */
 #define	XVA_ISSET_RTN(xvap, attr)					\
 	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
 		((xvap)->xva_magic == XVA_MAGIC) &&			\
 		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
 	((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
 
 #define	MODEMASK	07777		/* mode bits plus permission bits */
 #define	PERMMASK	00777		/* permission bits */
 
 /*
  * VOP_ACCESS flags
  */
 #define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
 
 /*
  * Flags for vnode operations.
  */
 enum rm		{ RMFILE, RMDIRECTORY };	/* rm or rmdir (remove) */
 enum create	{ CRCREAT, CRMKNOD, CRMKDIR };	/* reason for create */
 
 /*
  * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
  */
 
 typedef struct vsecattr {
 	uint_t		vsa_mask;	/* See below */
 	int		vsa_aclcnt;	/* ACL entry count */
 	void		*vsa_aclentp;	/* pointer to ACL entries */
 	int		vsa_dfaclcnt;	/* default ACL entry count */
 	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
 	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 	uint_t		vsa_aclflags;	/* ACE ACL flags */
 } vsecattr_t;
 
 /* vsa_mask values */
 #define	VSA_ACL			0x0001
 #define	VSA_ACLCNT		0x0002
 #define	VSA_DFACL		0x0004
 #define	VSA_DFACLCNT		0x0008
 #define	VSA_ACE			0x0010
 #define	VSA_ACECNT		0x0020
 #define	VSA_ACE_ALLTYPES	0x0040
 #define	VSA_ACE_ACLFLAGS	0x0080	/* get/set ACE ACL flags */
 
 /*
  * Structure used by various vnode operations to determine
  * the context (pid, host, identity) of a caller.
  *
  * The cc_caller_id is used to identify one or more callers who invoke
  * operations, possibly on behalf of others.  For example, the NFS
  * server could have it's own cc_caller_id which can be detected by
  * vnode/vfs operations or (FEM) monitors on those operations.  New
  * caller IDs are generated by fs_new_caller_id().
  */
 typedef struct caller_context {
 	pid_t		cc_pid;		/* Process ID of the caller */
 	int		cc_sysid;	/* System ID, used for remote calls */
 	u_longlong_t	cc_caller_id;	/* Identifier for (set of) caller(s) */
 	ulong_t		cc_flags;
 } caller_context_t;
 
 struct taskq;
 
 /*
  * Flags for VOP_LOOKUP
  *
  * Defined in file.h, but also possible, FIGNORECASE and FSEARCH
  *
  */
 #define	LOOKUP_DIR		0x01	/* want parent dir vp */
 #define	LOOKUP_XATTR		0x02	/* lookup up extended attr dir */
 #define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
 #define	LOOKUP_HAVE_SYSATTR_DIR	0x08	/* Already created virtual GFS dir */
 
 /*
  * Flags for VOP_READDIR
  */
 #define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
 #define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
 
 /*
  * Public vnode manipulation functions.
  */
 #ifdef	_KERNEL
 
 void	vn_rele_async(struct vnode *vp, struct taskq *taskq);
 
 /*
  * Extensible vnode attribute (xva) routines:
  * xva_init() initializes an xvattr_t (zero struct, init mapsize, set AT_XATTR)
  * xva_getxoptattr() returns a ponter to the xoptattr_t section of xvattr_t
  */
 void		xva_init(xvattr_t *);
 xoptattr_t	*xva_getxoptattr(xvattr_t *);	/* Get ptr to xoptattr_t */
 
 #define	VN_RELE_ASYNC(vp, taskq)	{ \
 	vn_rele_async(vp, taskq); \
 }
 
 #endif	/* _KERNEL */
 
 /*
  * Flags to VOP_SETATTR/VOP_GETATTR.
  */
 #define	ATTR_UTIME	0x01	/* non-default utime(2) request */
 #define	ATTR_EXEC	0x02	/* invocation from exec(2) */
 #define	ATTR_COMM	0x04	/* yield common vp attributes */
 #define	ATTR_HINT	0x08	/* information returned will be `hint' */
 #define	ATTR_REAL	0x10	/* yield attributes of the real vp */
 #define	ATTR_NOACLCHECK	0x20	/* Don't check ACL when checking permissions */
 #define	ATTR_TRIGGER	0x40	/* Mount first if vnode is a trigger mount */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_VNODE_H */
Index: stable/11
===================================================================
--- stable/11	(revision 332525)
+++ stable/11	(revision 332526)

Property changes on: stable/11
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r329755