Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(revision 337168)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	(revision 337169)
@@ -1,241 +1,251 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dnode.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 
 uint64_t
-dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
+dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	uint64_t object;
 	uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
 	    (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
 	dnode_t *dn = NULL;
 
 	mutex_enter(&os->os_obj_lock);
 	for (;;) {
 		object = os->os_obj_next;
 		/*
 		 * Each time we polish off a L1 bp worth of dnodes (2^12
 		 * objects), move to another L1 bp that's still reasonably
 		 * sparse (at most 1/4 full). Look from the beginning at most
 		 * once per txg, but after that keep looking from here.
 		 * os_scan_dnodes is set during txg sync if enough objects
 		 * have been freed since the previous rescan to justify
 		 * backfilling again. If we can't find a suitable block, just
 		 * keep going from here.
 		 *
 		 * Note that dmu_traverse depends on the behavior that we use
 		 * multiple blocks of the dnode object before going back to
 		 * reuse objects.  Any change to this algorithm should preserve
 		 * that property or find another solution to the issues
 		 * described in traverse_visitbp.
 		 */
 
 		if (P2PHASE(object, L1_dnode_count) == 0) {
 			uint64_t offset;
 			int error;
 			if (os->os_rescan_dnodes) {
 				offset = 0;
 				os->os_rescan_dnodes = B_FALSE;
 			} else {
 				offset = object << DNODE_SHIFT;
 			}
 			error = dnode_next_offset(DMU_META_DNODE(os),
 			    DNODE_FIND_HOLE,
 			    &offset, 2, DNODES_PER_BLOCK >> 2, 0);
 			if (error == 0)
 				object = offset >> DNODE_SHIFT;
 		}
 		os->os_obj_next = ++object;
 
 		/*
 		 * XXX We should check for an i/o error here and return
 		 * up to our caller.  Actually we should pre-read it in
 		 * dmu_tx_assign(), but there is currently no mechanism
 		 * to do so.
 		 */
 		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
 		    FTAG, &dn);
 		if (dn)
 			break;
 
 		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
 			os->os_obj_next = object - 1;
 	}
 
-	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
+	dnode_allocate(dn, ot, blocksize, indirect_blockshift,
+	    bonustype, bonuslen, tx);
 	mutex_exit(&os->os_obj_lock);
 
 	dmu_tx_add_new_object(tx, dn);
 	dnode_rele(dn, FTAG);
 
 	return (object);
+}
+
+uint64_t
+dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+	return (dmu_object_alloc_ibs(os, ot, blocksize, 0,
+	    bonustype, bonuslen, tx));
 }
 
 int
 dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	int err;
 
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 		return (SET_ERROR(EBADF));
 
 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
 	if (err)
 		return (err);
 	dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
 	dmu_tx_add_new_object(tx, dn);
 
 	dnode_rele(dn, FTAG);
 
 	return (0);
 }
 
 int
 dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	int err;
 
 	if (object == DMU_META_DNODE_OBJECT)
 		return (SET_ERROR(EBADF));
 
 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
 
 	dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
 
 	dnode_rele(dn, FTAG);
 	return (err);
 }
 
 int
 dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	int err;
 
 	ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 
 	err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 	    FTAG, &dn);
 	if (err)
 		return (err);
 
 	ASSERT(dn->dn_type != DMU_OT_NONE);
 	dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
 	dnode_free(dn, tx);
 	dnode_rele(dn, FTAG);
 
 	return (0);
 }
 
 /*
  * Return (in *objectp) the next object which is allocated (or a hole)
  * after *object, taking into account only objects that may have been modified
  * after the specified txg.
  */
 int
 dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
 {
 	uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
 	int error;
 
 	error = dnode_next_offset(DMU_META_DNODE(os),
 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
 
 	*objectp = offset >> DNODE_SHIFT;
 
 	return (error);
 }
 
 /*
  * Turn this object from old_type into DMU_OTN_ZAP_METADATA, and bump the
  * refcount on SPA_FEATURE_EXTENSIBLE_DATASET.
  *
  * Only for use from syncing context, on MOS objects.
  */
 void
 dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type,
     dmu_tx_t *tx)
 {
 	dnode_t *dn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	VERIFY0(dnode_hold(mos, object, FTAG, &dn));
 	if (dn->dn_type == DMU_OTN_ZAP_METADATA) {
 		dnode_rele(dn, FTAG);
 		return;
 	}
 	ASSERT3U(dn->dn_type, ==, old_type);
 	ASSERT0(dn->dn_maxblkid);
 
 	/*
 	 * We must initialize the ZAP data before changing the type,
 	 * so that concurrent calls to *_is_zapified() can determine if
 	 * the object has been completely zapified by checking the type.
 	 */
 	mzap_create_impl(mos, object, 0, 0, tx);
 
 	dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type =
 	    DMU_OTN_ZAP_METADATA;
 	dnode_setdirty(dn, tx);
 	dnode_rele(dn, FTAG);
 
 	spa_feature_incr(dmu_objset_spa(mos),
 	    SPA_FEATURE_EXTENSIBLE_DATASET, tx);
 }
 
 void
 dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	dmu_object_type_t t;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	VERIFY0(dnode_hold(mos, object, FTAG, &dn));
 	t = dn->dn_type;
 	dnode_rele(dn, FTAG);
 
 	if (t == DMU_OTN_ZAP_METADATA) {
 		spa_feature_decr(dmu_objset_spa(mos),
 		    SPA_FEATURE_EXTENSIBLE_DATASET, tx);
 	}
 	VERIFY0(dmu_object_free(mos, object, tx));
 }
Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c	(revision 337168)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c	(revision 337169)
@@ -1,1089 +1,1101 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/dnode.h>
 #include <sys/dsl_pool.h>
 #include <sys/zio.h>
 #include <sys/space_map.h>
 #include <sys/refcount.h>
 #include <sys/zfeature.h>
 
 SYSCTL_DECL(_vfs_zfs);
 
 /*
  * Note on space map block size:
  *
  * The data for a given space map can be kept on blocks of any size.
  * Larger blocks entail fewer I/O operations, but they also cause the
  * DMU to keep more data in-core, and also to waste more I/O bandwidth
  * when only a few blocks have changed since the last transaction group.
  */
 
 /*
  * Enabled whenever we want to stress test the use of double-word
  * space map entries.
  */
 boolean_t zfs_force_some_double_word_sm_entries = B_FALSE;
 
+/*
+ * Override the default indirect block size of 128K, instead using 16K for
+ * spacemaps (2^14 bytes).  This dramatically reduces write inflation since
+ * appending to a spacemap typically has to write one data block (4KB) and one
+ * or two indirect blocks (16K-32K, rather than 128K).
+ */
+int space_map_ibs = 14;
+
+SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
+    &space_map_ibs, 0, "Space map indirect block shift");
+
 boolean_t
 sm_entry_is_debug(uint64_t e)
 {
 	return (SM_PREFIX_DECODE(e) == SM_DEBUG_PREFIX);
 }
 
 boolean_t
 sm_entry_is_single_word(uint64_t e)
 {
 	uint8_t prefix = SM_PREFIX_DECODE(e);
 	return (prefix != SM_DEBUG_PREFIX && prefix != SM2_PREFIX);
 }
 
 boolean_t
 sm_entry_is_double_word(uint64_t e)
 {
 	return (SM_PREFIX_DECODE(e) == SM2_PREFIX);
 }
 
 /*
  * Iterate through the space map, invoking the callback on each (non-debug)
  * space map entry.
  */
 int
 space_map_iterate(space_map_t *sm, sm_cb_t callback, void *arg)
 {
 	uint64_t sm_len = space_map_length(sm);
 	ASSERT3U(sm->sm_blksz, !=, 0);
 
 	dmu_prefetch(sm->sm_os, space_map_object(sm), 0, 0, sm_len,
 	    ZIO_PRIORITY_SYNC_READ);
 
 	uint64_t blksz = sm->sm_blksz;
 	int error = 0;
 	for (uint64_t block_base = 0; block_base < sm_len && error == 0;
 	    block_base += blksz) {
 		dmu_buf_t *db;
 		error = dmu_buf_hold(sm->sm_os, space_map_object(sm),
 		    block_base, FTAG, &db, DMU_READ_PREFETCH);
 		if (error != 0)
 			return (error);
 
 		uint64_t *block_start = db->db_data;
 		uint64_t block_length = MIN(sm_len - block_base, blksz);
 		uint64_t *block_end = block_start +
 		    (block_length / sizeof (uint64_t));
 
 		VERIFY0(P2PHASE(block_length, sizeof (uint64_t)));
 		VERIFY3U(block_length, !=, 0);
 		ASSERT3U(blksz, ==, db->db_size);
 
 		for (uint64_t *block_cursor = block_start;
 		    block_cursor < block_end && error == 0; block_cursor++) {
 			uint64_t e = *block_cursor;
 
 			if (sm_entry_is_debug(e)) /* Skip debug entries */
 				continue;
 
 			uint64_t raw_offset, raw_run, vdev_id;
 			maptype_t type;
 			if (sm_entry_is_single_word(e)) {
 				type = SM_TYPE_DECODE(e);
 				vdev_id = SM_NO_VDEVID;
 				raw_offset = SM_OFFSET_DECODE(e);
 				raw_run = SM_RUN_DECODE(e);
 			} else {
 				/* it is a two-word entry */
 				ASSERT(sm_entry_is_double_word(e));
 				raw_run = SM2_RUN_DECODE(e);
 				vdev_id = SM2_VDEV_DECODE(e);
 
 				/* move on to the second word */
 				block_cursor++;
 				e = *block_cursor;
 				VERIFY3P(block_cursor, <=, block_end);
 
 				type = SM2_TYPE_DECODE(e);
 				raw_offset = SM2_OFFSET_DECODE(e);
 			}
 
 			uint64_t entry_offset = (raw_offset << sm->sm_shift) +
 			    sm->sm_start;
 			uint64_t entry_run = raw_run << sm->sm_shift;
 
 			VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift));
 			VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift));
 			ASSERT3U(entry_offset, >=, sm->sm_start);
 			ASSERT3U(entry_offset, <, sm->sm_start + sm->sm_size);
 			ASSERT3U(entry_run, <=, sm->sm_size);
 			ASSERT3U(entry_offset + entry_run, <=,
 			    sm->sm_start + sm->sm_size);
 
 			space_map_entry_t sme = {
 			    .sme_type = type,
 			    .sme_vdev = vdev_id,
 			    .sme_offset = entry_offset,
 			    .sme_run = entry_run
 			};
 			error = callback(&sme, arg);
 		}
 		dmu_buf_rele(db, FTAG);
 	}
 	return (error);
 }
 
 /*
  * Reads the entries from the last block of the space map into
  * buf in reverse order. Populates nwords with number of words
  * in the last block.
  *
  * Refer to block comment within space_map_incremental_destroy()
  * to understand why this function is needed.
  */
 static int
 space_map_reversed_last_block_entries(space_map_t *sm, uint64_t *buf,
     uint64_t bufsz, uint64_t *nwords)
 {
 	int error = 0;
 	dmu_buf_t *db;
 
 	/*
 	 * Find the offset of the last word in the space map and use
 	 * that to read the last block of the space map with
 	 * dmu_buf_hold().
 	 */
 	uint64_t last_word_offset =
 	    sm->sm_phys->smp_objsize - sizeof (uint64_t);
 	error = dmu_buf_hold(sm->sm_os, space_map_object(sm), last_word_offset,
 	    FTAG, &db, DMU_READ_NO_PREFETCH);
 	if (error != 0)
 		return (error);
 
 	ASSERT3U(sm->sm_object, ==, db->db_object);
 	ASSERT3U(sm->sm_blksz, ==, db->db_size);
 	ASSERT3U(bufsz, >=, db->db_size);
 	ASSERT(nwords != NULL);
 
 	uint64_t *words = db->db_data;
 	*nwords =
 	    (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
 
 	ASSERT3U(*nwords, <=, bufsz / sizeof (uint64_t));
 
 	uint64_t n = *nwords;
 	uint64_t j = n - 1;
 	for (uint64_t i = 0; i < n; i++) {
 		uint64_t entry = words[i];
 		if (sm_entry_is_double_word(entry)) {
 			/*
 			 * Since we are populating the buffer backwards
 			 * we have to be extra careful and add the two
 			 * words of the double-word entry in the right
 			 * order.
 			 */
 			ASSERT3U(j, >, 0);
 			buf[j - 1] = entry;
 
 			i++;
 			ASSERT3U(i, <, n);
 			entry = words[i];
 			buf[j] = entry;
 			j -= 2;
 		} else {
 			ASSERT(sm_entry_is_debug(entry) ||
 			    sm_entry_is_single_word(entry));
 			buf[j] = entry;
 			j--;
 		}
 	}
 
 	/*
 	 * Assert that we wrote backwards all the
 	 * way to the beginning of the buffer.
 	 */
 	ASSERT3S(j, ==, -1);
 
 	dmu_buf_rele(db, FTAG);
 	return (error);
 }
 
 /*
  * Note: This function performs destructive actions - specifically
  * it deletes entries from the end of the space map. Thus, callers
  * should ensure that they are holding the appropriate locks for
  * the space map that they provide.
  */
 int
 space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
     dmu_tx_t *tx)
 {
 	uint64_t bufsz = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
 	uint64_t *buf = zio_buf_alloc(bufsz);
 
 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
 
 	/*
 	 * Ideally we would want to iterate from the beginning of the
 	 * space map to the end in incremental steps. The issue with this
 	 * approach is that we don't have any field on-disk that points
 	 * us where to start between each step. We could try zeroing out
 	 * entries that we've destroyed, but this doesn't work either as
 	 * an entry that is 0 is a valid one (ALLOC for range [0x0:0x200]).
 	 *
 	 * As a result, we destroy its entries incrementally starting from
 	 * the end after applying the callback to each of them.
 	 *
 	 * The problem with this approach is that we cannot literally
 	 * iterate through the words in the space map backwards as we
 	 * can't distinguish two-word space map entries from their second
 	 * word. Thus we do the following:
 	 *
 	 * 1] We get all the entries from the last block of the space map
 	 *    and put them into a buffer in reverse order. This way the
 	 *    last entry comes first in the buffer, the second to last is
 	 *    second, etc.
 	 * 2] We iterate through the entries in the buffer and we apply
 	 *    the callback to each one. As we move from entry to entry we
 	 *    we decrease the size of the space map, deleting effectively
 	 *    each entry.
 	 * 3] If there are no more entries in the space map or the callback
 	 *    returns a value other than 0, we stop iterating over the
 	 *    space map. If there are entries remaining and the callback
 	 *    returned 0, we go back to step [1].
 	 */
 	int error = 0;
 	while (space_map_length(sm) > 0 && error == 0) {
 		uint64_t nwords = 0;
 		error = space_map_reversed_last_block_entries(sm, buf, bufsz,
 		    &nwords);
 		if (error != 0)
 			break;
 
 		ASSERT3U(nwords, <=, bufsz / sizeof (uint64_t));
 
 		for (uint64_t i = 0; i < nwords; i++) {
 			uint64_t e = buf[i];
 
 			if (sm_entry_is_debug(e)) {
 				sm->sm_phys->smp_objsize -= sizeof (uint64_t);
 				space_map_update(sm);
 				continue;
 			}
 
 			int words = 1;
 			uint64_t raw_offset, raw_run, vdev_id;
 			maptype_t type;
 			if (sm_entry_is_single_word(e)) {
 				type = SM_TYPE_DECODE(e);
 				vdev_id = SM_NO_VDEVID;
 				raw_offset = SM_OFFSET_DECODE(e);
 				raw_run = SM_RUN_DECODE(e);
 			} else {
 				ASSERT(sm_entry_is_double_word(e));
 				words = 2;
 
 				raw_run = SM2_RUN_DECODE(e);
 				vdev_id = SM2_VDEV_DECODE(e);
 
 				/* move to the second word */
 				i++;
 				e = buf[i];
 
 				ASSERT3P(i, <=, nwords);
 
 				type = SM2_TYPE_DECODE(e);
 				raw_offset = SM2_OFFSET_DECODE(e);
 			}
 
 			uint64_t entry_offset =
 			    (raw_offset << sm->sm_shift) + sm->sm_start;
 			uint64_t entry_run = raw_run << sm->sm_shift;
 
 			VERIFY0(P2PHASE(entry_offset, 1ULL << sm->sm_shift));
 			VERIFY0(P2PHASE(entry_run, 1ULL << sm->sm_shift));
 			VERIFY3U(entry_offset, >=, sm->sm_start);
 			VERIFY3U(entry_offset, <, sm->sm_start + sm->sm_size);
 			VERIFY3U(entry_run, <=, sm->sm_size);
 			VERIFY3U(entry_offset + entry_run, <=,
 			    sm->sm_start + sm->sm_size);
 
 			space_map_entry_t sme = {
 			    .sme_type = type,
 			    .sme_vdev = vdev_id,
 			    .sme_offset = entry_offset,
 			    .sme_run = entry_run
 			};
 			error = callback(&sme, arg);
 			if (error != 0)
 				break;
 
 			if (type == SM_ALLOC)
 				sm->sm_phys->smp_alloc -= entry_run;
 			else
 				sm->sm_phys->smp_alloc += entry_run;
 			sm->sm_phys->smp_objsize -= words * sizeof (uint64_t);
 			space_map_update(sm);
 		}
 	}
 
 	if (space_map_length(sm) == 0) {
 		ASSERT0(error);
 		ASSERT0(sm->sm_phys->smp_objsize);
 		ASSERT0(sm->sm_alloc);
 	}
 
 	zio_buf_free(buf, bufsz);
 	return (error);
 }
 
 typedef struct space_map_load_arg {
 	space_map_t	*smla_sm;
 	range_tree_t	*smla_rt;
 	maptype_t	smla_type;
 } space_map_load_arg_t;
 
 static int
 space_map_load_callback(space_map_entry_t *sme, void *arg)
 {
 	space_map_load_arg_t *smla = arg;
 	if (sme->sme_type == smla->smla_type) {
 		VERIFY3U(range_tree_space(smla->smla_rt) + sme->sme_run, <=,
 		    smla->smla_sm->sm_size);
 		range_tree_add(smla->smla_rt, sme->sme_offset, sme->sme_run);
 	} else {
 		range_tree_remove(smla->smla_rt, sme->sme_offset, sme->sme_run);
 	}
 
 	return (0);
 }
 
 /*
  * Load the space map disk into the specified range tree. Segments of maptype
  * are added to the range tree, other segment types are removed.
  */
 int
 space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
 {
 	uint64_t space;
 	int err;
 	space_map_load_arg_t smla;
 
 	VERIFY0(range_tree_space(rt));
 	space = space_map_allocated(sm);
 
 	if (maptype == SM_FREE) {
 		range_tree_add(rt, sm->sm_start, sm->sm_size);
 		space = sm->sm_size - space;
 	}
 
 	smla.smla_rt = rt;
 	smla.smla_sm = sm;
 	smla.smla_type = maptype;
 	err = space_map_iterate(sm, space_map_load_callback, &smla);
 
 	if (err == 0) {
 		VERIFY3U(range_tree_space(rt), ==, space);
 	} else {
 		range_tree_vacate(rt, NULL, NULL);
 	}
 
 	return (err);
 }
 
 void
 space_map_histogram_clear(space_map_t *sm)
 {
 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
 		return;
 
 	bzero(sm->sm_phys->smp_histogram, sizeof (sm->sm_phys->smp_histogram));
 }
 
 boolean_t
 space_map_histogram_verify(space_map_t *sm, range_tree_t *rt)
 {
 	/*
 	 * Verify that the in-core range tree does not have any
 	 * ranges smaller than our sm_shift size.
 	 */
 	for (int i = 0; i < sm->sm_shift; i++) {
 		if (rt->rt_histogram[i] != 0)
 			return (B_FALSE);
 	}
 	return (B_TRUE);
 }
 
 void
 space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
 {
 	int idx = 0;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	VERIFY3U(space_map_object(sm), !=, 0);
 
 	if (sm->sm_dbuf->db_size != sizeof (space_map_phys_t))
 		return;
 
 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
 
 	ASSERT(space_map_histogram_verify(sm, rt));
 	/*
 	 * Transfer the content of the range tree histogram to the space
 	 * map histogram. The space map histogram contains 32 buckets ranging
 	 * between 2^sm_shift to 2^(32+sm_shift-1). The range tree,
 	 * however, can represent ranges from 2^0 to 2^63. Since the space
 	 * map only cares about allocatable blocks (minimum of sm_shift) we
 	 * can safely ignore all ranges in the range tree smaller than sm_shift.
 	 */
 	for (int i = sm->sm_shift; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
 
 		/*
 		 * Since the largest histogram bucket in the space map is
 		 * 2^(32+sm_shift-1), we need to normalize the values in
 		 * the range tree for any bucket larger than that size. For
 		 * example given an sm_shift of 9, ranges larger than 2^40
 		 * would get normalized as if they were 1TB ranges. Assume
 		 * the range tree had a count of 5 in the 2^44 (16TB) bucket,
 		 * the calculation below would normalize this to 5 * 2^4 (16).
 		 */
 		ASSERT3U(i, >=, idx + sm->sm_shift);
 		sm->sm_phys->smp_histogram[idx] +=
 		    rt->rt_histogram[i] << (i - idx - sm->sm_shift);
 
 		/*
 		 * Increment the space map's index as long as we haven't
 		 * reached the maximum bucket size. Accumulate all ranges
 		 * larger than the max bucket size into the last bucket.
 		 */
 		if (idx < SPACE_MAP_HISTOGRAM_SIZE - 1) {
 			ASSERT3U(idx + sm->sm_shift, ==, i);
 			idx++;
 			ASSERT3U(idx, <, SPACE_MAP_HISTOGRAM_SIZE);
 		}
 	}
 }
 
 static void
 space_map_write_intro_debug(space_map_t *sm, maptype_t maptype, dmu_tx_t *tx)
 {
 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
 
 	uint64_t dentry = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
 	    SM_DEBUG_ACTION_ENCODE(maptype) |
 	    SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(tx->tx_pool->dp_spa)) |
 	    SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
 
 	dmu_write(sm->sm_os, space_map_object(sm), sm->sm_phys->smp_objsize,
 	    sizeof (dentry), &dentry, tx);
 
 	sm->sm_phys->smp_objsize += sizeof (dentry);
 }
 
 /*
  * Writes one or more entries given a segment.
  *
  * Note: The function may release the dbuf from the pointer initially
  * passed to it, and return a different dbuf. Also, the space map's
  * dbuf must be dirty for the changes in sm_phys to take effect.
  */
 static void
 space_map_write_seg(space_map_t *sm, range_seg_t *rs, maptype_t maptype,
     uint64_t vdev_id, uint8_t words, dmu_buf_t **dbp, void *tag, dmu_tx_t *tx)
 {
 	ASSERT3U(words, !=, 0);
 	ASSERT3U(words, <=, 2);
 
 	/* ensure the vdev_id can be represented by the space map */
 	ASSERT3U(vdev_id, <=, SM_NO_VDEVID);
 
 	/*
 	 * if this is a single word entry, ensure that no vdev was
 	 * specified.
 	 */
 	IMPLY(words == 1, vdev_id == SM_NO_VDEVID);
 
 	dmu_buf_t *db = *dbp;
 	ASSERT3U(db->db_size, ==, sm->sm_blksz);
 
 	uint64_t *block_base = db->db_data;
 	uint64_t *block_end = block_base + (sm->sm_blksz / sizeof (uint64_t));
 	uint64_t *block_cursor = block_base +
 	    (sm->sm_phys->smp_objsize - db->db_offset) / sizeof (uint64_t);
 
 	ASSERT3P(block_cursor, <=, block_end);
 
 	uint64_t size = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
 	uint64_t start = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
 	uint64_t run_max = (words == 2) ? SM2_RUN_MAX : SM_RUN_MAX;
 
 	ASSERT3U(rs->rs_start, >=, sm->sm_start);
 	ASSERT3U(rs->rs_start, <, sm->sm_start + sm->sm_size);
 	ASSERT3U(rs->rs_end - rs->rs_start, <=, sm->sm_size);
 	ASSERT3U(rs->rs_end, <=, sm->sm_start + sm->sm_size);
 
 	while (size != 0) {
 		ASSERT3P(block_cursor, <=, block_end);
 
 		/*
 		 * If we are at the end of this block, flush it and start
 		 * writing again from the beginning.
 		 */
 		if (block_cursor == block_end) {
 			dmu_buf_rele(db, tag);
 
 			uint64_t next_word_offset = sm->sm_phys->smp_objsize;
 			VERIFY0(dmu_buf_hold(sm->sm_os,
 			    space_map_object(sm), next_word_offset,
 			    tag, &db, DMU_READ_PREFETCH));
 			dmu_buf_will_dirty(db, tx);
 
 			/* update caller's dbuf */
 			*dbp = db;
 
 			ASSERT3U(db->db_size, ==, sm->sm_blksz);
 
 			block_base = db->db_data;
 			block_cursor = block_base;
 			block_end = block_base +
 			    (db->db_size / sizeof (uint64_t));
 		}
 
 		/*
 		 * If we are writing a two-word entry and we only have one
 		 * word left on this block, just pad it with an empty debug
 		 * entry and write the two-word entry in the next block.
 		 */
 		uint64_t *next_entry = block_cursor + 1;
 		if (next_entry == block_end && words > 1) {
 			ASSERT3U(words, ==, 2);
 			*block_cursor = SM_PREFIX_ENCODE(SM_DEBUG_PREFIX) |
 			    SM_DEBUG_ACTION_ENCODE(0) |
 			    SM_DEBUG_SYNCPASS_ENCODE(0) |
 			    SM_DEBUG_TXG_ENCODE(0);
 			block_cursor++;
 			sm->sm_phys->smp_objsize += sizeof (uint64_t);
 			ASSERT3P(block_cursor, ==, block_end);
 			continue;
 		}
 
 		uint64_t run_len = MIN(size, run_max);
 		switch (words) {
 		case 1:
 			*block_cursor = SM_OFFSET_ENCODE(start) |
 			    SM_TYPE_ENCODE(maptype) |
 			    SM_RUN_ENCODE(run_len);
 			block_cursor++;
 			break;
 		case 2:
 			/* write the first word of the entry */
 			*block_cursor = SM_PREFIX_ENCODE(SM2_PREFIX) |
 			    SM2_RUN_ENCODE(run_len) |
 			    SM2_VDEV_ENCODE(vdev_id);
 			block_cursor++;
 
 			/* move on to the second word of the entry */
 			ASSERT3P(block_cursor, <, block_end);
 			*block_cursor = SM2_TYPE_ENCODE(maptype) |
 			    SM2_OFFSET_ENCODE(start);
 			block_cursor++;
 			break;
 		default:
 			panic("%d-word space map entries are not supported",
 			    words);
 			break;
 		}
 		sm->sm_phys->smp_objsize += words * sizeof (uint64_t);
 
 		start += run_len;
 		size -= run_len;
 	}
 	ASSERT0(size);
 
 }
 
 /*
  * Note: The space map's dbuf must be dirty for the changes in sm_phys to
  * take effect.
  */
 static void
 space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
     uint64_t vdev_id, dmu_tx_t *tx)
 {
 	spa_t *spa = tx->tx_pool->dp_spa;
 	dmu_buf_t *db;
 
 	space_map_write_intro_debug(sm, maptype, tx);
 
 #ifdef DEBUG
 	/*
 	 * We do this right after we write the intro debug entry
 	 * because the estimate does not take it into account.
 	 */
 	uint64_t initial_objsize = sm->sm_phys->smp_objsize;
 	uint64_t estimated_growth =
 	    space_map_estimate_optimal_size(sm, rt, SM_NO_VDEVID);
 	uint64_t estimated_final_objsize = initial_objsize + estimated_growth;
 #endif
 
 	/*
 	 * Find the offset right after the last word in the space map
 	 * and use that to get a hold of the last block, so we can
 	 * start appending to it.
 	 */
 	uint64_t next_word_offset = sm->sm_phys->smp_objsize;
 	VERIFY0(dmu_buf_hold(sm->sm_os, space_map_object(sm),
 	    next_word_offset, FTAG, &db, DMU_READ_PREFETCH));
 	ASSERT3U(db->db_size, ==, sm->sm_blksz);
 
 	dmu_buf_will_dirty(db, tx);
 
 	avl_tree_t *t = &rt->rt_root;
 	for (range_seg_t *rs = avl_first(t); rs != NULL; rs = AVL_NEXT(t, rs)) {
 		uint64_t offset = (rs->rs_start - sm->sm_start) >> sm->sm_shift;
 		uint64_t length = (rs->rs_end - rs->rs_start) >> sm->sm_shift;
 		uint8_t words = 1;
 
 		/*
 		 * We only write two-word entries when both of the following
 		 * are true:
 		 *
 		 * [1] The feature is enabled.
 		 * [2] The offset or run is too big for a single-word entry,
-		 * 	or the vdev_id is set (meaning not equal to
-		 * 	SM_NO_VDEVID).
+		 *	or the vdev_id is set (meaning not equal to
+		 *	SM_NO_VDEVID).
 		 *
 		 * Note that for purposes of testing we've added the case that
 		 * we write two-word entries occasionally when the feature is
 		 * enabled and zfs_force_some_double_word_sm_entries has been
 		 * set.
 		 */
 		if (spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_V2) &&
 		    (offset >= (1ULL << SM_OFFSET_BITS) ||
 		    length > SM_RUN_MAX ||
 		    vdev_id != SM_NO_VDEVID ||
 		    (zfs_force_some_double_word_sm_entries &&
 		    spa_get_random(100) == 0)))
 			words = 2;
 
 		space_map_write_seg(sm, rs, maptype, vdev_id, words,
 		    &db, FTAG, tx);
 	}
 
 	dmu_buf_rele(db, FTAG);
 
 #ifdef DEBUG
 	/*
 	 * We expect our estimation to be based on the worst case
 	 * scenario [see comment in space_map_estimate_optimal_size()].
 	 * Therefore we expect the actual objsize to be equal or less
 	 * than whatever we estimated it to be.
 	 */
 	ASSERT3U(estimated_final_objsize, >=, sm->sm_phys->smp_objsize);
 #endif
 }
 
 /*
  * Note: This function manipulates the state of the given space map but
  * does not hold any locks implicitly. Thus the caller is responsible
  * for synchronizing writes to the space map.
  */
 void
 space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
     uint64_t vdev_id, dmu_tx_t *tx)
 {
 	objset_t *os = sm->sm_os;
 
 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
 	VERIFY3U(space_map_object(sm), !=, 0);
 
 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
 
 	/*
 	 * This field is no longer necessary since the in-core space map
 	 * now contains the object number but is maintained for backwards
 	 * compatibility.
 	 */
 	sm->sm_phys->smp_object = sm->sm_object;
 
 	if (range_tree_is_empty(rt)) {
 		VERIFY3U(sm->sm_object, ==, sm->sm_phys->smp_object);
 		return;
 	}
 
 	if (maptype == SM_ALLOC)
 		sm->sm_phys->smp_alloc += range_tree_space(rt);
 	else
 		sm->sm_phys->smp_alloc -= range_tree_space(rt);
 
 	uint64_t nodes = avl_numnodes(&rt->rt_root);
 	uint64_t rt_space = range_tree_space(rt);
 
 	space_map_write_impl(sm, rt, maptype, vdev_id, tx);
 
 	/*
 	 * Ensure that the space_map's accounting wasn't changed
 	 * while we were in the middle of writing it out.
 	 */
 	VERIFY3U(nodes, ==, avl_numnodes(&rt->rt_root));
 	VERIFY3U(range_tree_space(rt), ==, rt_space);
 }
 
 static int
 space_map_open_impl(space_map_t *sm)
 {
 	int error;
 	u_longlong_t blocks;
 
 	error = dmu_bonus_hold(sm->sm_os, sm->sm_object, sm, &sm->sm_dbuf);
 	if (error)
 		return (error);
 
 	dmu_object_size_from_db(sm->sm_dbuf, &sm->sm_blksz, &blocks);
 	sm->sm_phys = sm->sm_dbuf->db_data;
 	return (0);
 }
 
 int
 space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
     uint64_t start, uint64_t size, uint8_t shift)
 {
 	space_map_t *sm;
 	int error;
 
 	ASSERT(*smp == NULL);
 	ASSERT(os != NULL);
 	ASSERT(object != 0);
 
 	sm = kmem_zalloc(sizeof (space_map_t), KM_SLEEP);
 
 	sm->sm_start = start;
 	sm->sm_size = size;
 	sm->sm_shift = shift;
 	sm->sm_os = os;
 	sm->sm_object = object;
 
 	error = space_map_open_impl(sm);
 	if (error != 0) {
 		space_map_close(sm);
 		return (error);
 	}
 	*smp = sm;
 
 	return (0);
 }
 
 void
 space_map_close(space_map_t *sm)
 {
 	if (sm == NULL)
 		return;
 
 	if (sm->sm_dbuf != NULL)
 		dmu_buf_rele(sm->sm_dbuf, sm);
 	sm->sm_dbuf = NULL;
 	sm->sm_phys = NULL;
 
 	kmem_free(sm, sizeof (*sm));
 }
 
 void
 space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx)
 {
 	objset_t *os = sm->sm_os;
 	spa_t *spa = dmu_objset_spa(os);
 	dmu_object_info_t doi;
 
 	ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
 	ASSERT(dmu_tx_is_syncing(tx));
 	VERIFY3U(dmu_tx_get_txg(tx), <=, spa_final_dirty_txg(spa));
 
 	dmu_object_info_from_db(sm->sm_dbuf, &doi);
 
 	/*
 	 * If the space map has the wrong bonus size (because
 	 * SPA_FEATURE_SPACEMAP_HISTOGRAM has recently been enabled), or
 	 * the wrong block size (because space_map_blksz has changed),
 	 * free and re-allocate its object with the updated sizes.
 	 *
 	 * Otherwise, just truncate the current object.
 	 */
 	if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
 	    doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
-	    doi.doi_data_block_size != blocksize) {
+	    doi.doi_data_block_size != blocksize ||
+	    doi.doi_metadata_block_size != 1 << space_map_ibs) {
 		zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating "
 		    "object[%llu]: old bonus %u, old blocksz %u",
 		    dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
 		    doi.doi_bonus_size, doi.doi_data_block_size);
 
 		space_map_free(sm, tx);
 		dmu_buf_rele(sm->sm_dbuf, sm);
 
 		sm->sm_object = space_map_alloc(sm->sm_os, blocksize, tx);
 		VERIFY0(space_map_open_impl(sm));
 	} else {
 		VERIFY0(dmu_free_range(os, space_map_object(sm), 0, -1ULL, tx));
 
 		/*
 		 * If the spacemap is reallocated, its histogram
 		 * will be reset.  Do the same in the common case so that
 		 * bugs related to the uncommon case do not go unnoticed.
 		 */
 		bzero(sm->sm_phys->smp_histogram,
 		    sizeof (sm->sm_phys->smp_histogram));
 	}
 
 	dmu_buf_will_dirty(sm->sm_dbuf, tx);
 	sm->sm_phys->smp_objsize = 0;
 	sm->sm_phys->smp_alloc = 0;
 }
 
 /*
  * Update the in-core space_map allocation and length values.
  */
 void
 space_map_update(space_map_t *sm)
 {
 	if (sm == NULL)
 		return;
 
 	sm->sm_alloc = sm->sm_phys->smp_alloc;
 	sm->sm_length = sm->sm_phys->smp_objsize;
 }
 
 uint64_t
 space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_objset_spa(os);
 	uint64_t object;
 	int bonuslen;
 
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 		spa_feature_incr(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
 		bonuslen = sizeof (space_map_phys_t);
 		ASSERT3U(bonuslen, <=, dmu_bonus_max());
 	} else {
 		bonuslen = SPACE_MAP_SIZE_V0;
 	}
 
-	object = dmu_object_alloc(os, DMU_OT_SPACE_MAP, blocksize,
-	    DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
+	object = dmu_object_alloc_ibs(os, DMU_OT_SPACE_MAP, blocksize,
+	    space_map_ibs, DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
 
 	return (object);
 }
 
 void
 space_map_free_obj(objset_t *os, uint64_t smobj, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_objset_spa(os);
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 		dmu_object_info_t doi;
 
 		VERIFY0(dmu_object_info(os, smobj, &doi));
 		if (doi.doi_bonus_size != SPACE_MAP_SIZE_V0) {
 			spa_feature_decr(spa,
 			    SPA_FEATURE_SPACEMAP_HISTOGRAM, tx);
 		}
 	}
 
 	VERIFY0(dmu_object_free(os, smobj, tx));
 }
 
 void
 space_map_free(space_map_t *sm, dmu_tx_t *tx)
 {
 	if (sm == NULL)
 		return;
 
 	space_map_free_obj(sm->sm_os, space_map_object(sm), tx);
 	sm->sm_object = 0;
 }
 
 /*
  * Given a range tree, it makes a worst-case estimate of how much
  * space would the tree's segments take if they were written to
  * the given space map.
  */
 uint64_t
 space_map_estimate_optimal_size(space_map_t *sm, range_tree_t *rt,
     uint64_t vdev_id)
 {
 	spa_t *spa = dmu_objset_spa(sm->sm_os);
 	uint64_t shift = sm->sm_shift;
 	uint64_t *histogram = rt->rt_histogram;
 	uint64_t entries_for_seg = 0;
 
 	/*
 	 * In order to get a quick estimate of the optimal size that this
 	 * range tree would have on-disk as a space map, we iterate through
 	 * its histogram buckets instead of iterating through its nodes.
 	 *
 	 * Note that this is a highest-bound/worst-case estimate for the
 	 * following reasons:
 	 *
 	 * 1] We assume that we always add a debug padding for each block
 	 *    we write and we also assume that we start at the last word
 	 *    of a block attempting to write a two-word entry.
 	 * 2] Rounding up errors due to the way segments are distributed
 	 *    in the buckets of the range tree's histogram.
 	 * 3] The activation of zfs_force_some_double_word_sm_entries
 	 *    (tunable) when testing.
 	 *
 	 * = Math and Rounding Errors =
 	 *
 	 * rt_histogram[i] bucket of a range tree represents the number
 	 * of entries in [2^i, (2^(i+1))-1] of that range_tree. Given
 	 * that, we want to divide the buckets into groups: Buckets that
 	 * can be represented using a single-word entry, ones that can
 	 * be represented with a double-word entry, and ones that can
 	 * only be represented with multiple two-word entries.
 	 *
 	 * [Note that if the new encoding feature is not enabled there
 	 * are only two groups: single-word entry buckets and multiple
 	 * single-word entry buckets. The information below assumes
 	 * two-word entries enabled, but it can easily applied when
 	 * the feature is not enabled]
 	 *
 	 * To find the highest bucket that can be represented with a
 	 * single-word entry we look at the maximum run that such entry
 	 * can have, which is 2^(SM_RUN_BITS + sm_shift) [remember that
 	 * the run of a space map entry is shifted by sm_shift, thus we
 	 * add it to the exponent]. This way, excluding the value of the
 	 * maximum run that can be represented by a single-word entry,
 	 * all runs that are smaller exist in buckets 0 to
 	 * SM_RUN_BITS + shift - 1.
 	 *
 	 * To find the highest bucket that can be represented with a
 	 * double-word entry, we follow the same approach. Finally, any
 	 * bucket higher than that are represented with multiple two-word
 	 * entries. To be more specific, if the highest bucket whose
 	 * segments can be represented with a single two-word entry is X,
 	 * then bucket X+1 will need 2 two-word entries for each of its
 	 * segments, X+2 will need 4, X+3 will need 8, ...etc.
 	 *
 	 * With all of the above we make our estimation based on bucket
 	 * groups. There is a rounding error though. As we mentioned in
 	 * the example with the one-word entry, the maximum run that can
 	 * be represented in a one-word entry 2^(SM_RUN_BITS + shift) is
 	 * not part of bucket SM_RUN_BITS + shift - 1. Thus, segments of
 	 * that length fall into the next bucket (and bucket group) where
 	 * we start counting two-word entries and this is one more reason
 	 * why the estimated size may end up being bigger than the actual
 	 * size written.
 	 */
 	uint64_t size = 0;
 	uint64_t idx = 0;
 
 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2) ||
 	    (vdev_id == SM_NO_VDEVID && sm->sm_size < SM_OFFSET_MAX)) {
 
 		/*
 		 * If we are trying to force some double word entries just
 		 * assume the worst-case of every single word entry being
 		 * written as a double word entry.
 		 */
 		uint64_t entry_size =
 		    (spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2) &&
 		    zfs_force_some_double_word_sm_entries) ?
 		    (2 * sizeof (uint64_t)) : sizeof (uint64_t);
 
 		uint64_t single_entry_max_bucket = SM_RUN_BITS + shift - 1;
 		for (; idx <= single_entry_max_bucket; idx++)
 			size += histogram[idx] * entry_size;
 
 		if (!spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2)) {
 			for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) {
 				ASSERT3U(idx, >=, single_entry_max_bucket);
 				entries_for_seg =
 				    1ULL << (idx - single_entry_max_bucket);
 				size += histogram[idx] *
 				    entries_for_seg * entry_size;
 			}
 			return (size);
 		}
 	}
 
 	ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_V2));
 
 	uint64_t double_entry_max_bucket = SM2_RUN_BITS + shift - 1;
 	for (; idx <= double_entry_max_bucket; idx++)
 		size += histogram[idx] * 2 * sizeof (uint64_t);
 
 	for (; idx < RANGE_TREE_HISTOGRAM_SIZE; idx++) {
 		ASSERT3U(idx, >=, double_entry_max_bucket);
 		entries_for_seg = 1ULL << (idx - double_entry_max_bucket);
 		size += histogram[idx] *
 		    entries_for_seg * 2 * sizeof (uint64_t);
 	}
 
 	/*
 	 * Assume the worst case where we start with the padding at the end
 	 * of the current block and we add an extra padding entry at the end
 	 * of all subsequent blocks.
 	 */
 	size += ((size / sm->sm_blksz) + 1) * sizeof (uint64_t);
 
 	return (size);
 }
 
 uint64_t
 space_map_object(space_map_t *sm)
 {
 	return (sm != NULL ? sm->sm_object : 0);
 }
 
 /*
  * Returns the already synced, on-disk allocated space.
  */
 uint64_t
 space_map_allocated(space_map_t *sm)
 {
 	return (sm != NULL ? sm->sm_alloc : 0);
 }
 
 /*
  * Returns the already synced, on-disk length;
  */
 uint64_t
 space_map_length(space_map_t *sm)
 {
 	return (sm != NULL ? sm->sm_length : 0);
 }
 
 /*
  * Returns the allocated space that is currently syncing.
  */
 int64_t
 space_map_alloc_delta(space_map_t *sm)
 {
 	if (sm == NULL)
 		return (0);
 	ASSERT(sm->sm_dbuf != NULL);
 	return (sm->sm_phys->smp_alloc - space_map_allocated(sm));
 }
Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	(revision 337168)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h	(revision 337169)
@@ -1,990 +1,993 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2013 DEY Storage Systems, Inc.
  * Copyright 2014 HybridCluster. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #ifndef	_SYS_DMU_H
 #define	_SYS_DMU_H
 
 /*
  * This file describes the interface that the DMU provides for its
  * consumers.
  *
  * The DMU also interacts with the SPA.  That interface is described in
  * dmu_spa.h.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/cred.h>
 #include <sys/fs/zfs.h>
 #include <sys/zio_compress.h>
 #include <sys/zio_priority.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct uio;
 struct xuio;
 struct page;
 struct vnode;
 struct spa;
 struct zilog;
 struct zio;
 struct blkptr;
 struct zap_cursor;
 struct dsl_dataset;
 struct dsl_pool;
 struct dnode;
 struct drr_begin;
 struct drr_end;
 struct zbookmark_phys;
 struct spa;
 struct nvlist;
 struct arc_buf;
 struct zio_prop;
 struct sa_handle;
 struct file;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
 typedef struct dsl_dir dsl_dir_t;
 typedef struct dnode dnode_t;
 
 typedef enum dmu_object_byteswap {
 	DMU_BSWAP_UINT8,
 	DMU_BSWAP_UINT16,
 	DMU_BSWAP_UINT32,
 	DMU_BSWAP_UINT64,
 	DMU_BSWAP_ZAP,
 	DMU_BSWAP_DNODE,
 	DMU_BSWAP_OBJSET,
 	DMU_BSWAP_ZNODE,
 	DMU_BSWAP_OLDACL,
 	DMU_BSWAP_ACL,
 	/*
 	 * Allocating a new byteswap type number makes the on-disk format
 	 * incompatible with any other format that uses the same number.
 	 *
 	 * Data can usually be structured to work with one of the
 	 * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
 	 */
 	DMU_BSWAP_NUMFUNCS
 } dmu_object_byteswap_t;
 
 #define	DMU_OT_NEWTYPE 0x80
 #define	DMU_OT_METADATA 0x40
 #define	DMU_OT_BYTESWAP_MASK 0x3f
 
 /*
  * Defines a uint8_t object type. Object types specify if the data
  * in the object is metadata (boolean) and how to byteswap the data
  * (dmu_object_byteswap_t). All of the types created by this method
  * are cached in the dbuf metadata cache.
  */
 #define	DMU_OT(byteswap, metadata) \
 	(DMU_OT_NEWTYPE | \
 	((metadata) ? DMU_OT_METADATA : 0) | \
 	((byteswap) & DMU_OT_BYTESWAP_MASK))
 
 #define	DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
 	(ot) < DMU_OT_NUMTYPES)
 
 #define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_METADATA) : \
 	dmu_ot[(ot)].ot_metadata)
 
 #define	DMU_OT_IS_METADATA_CACHED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	B_TRUE : dmu_ot[(ot)].ot_dbuf_metadata_cache)
 
 /*
  * These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
  * have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
  * is repurposed for embedded BPs.
  */
 #define	DMU_OT_HAS_FILL(ot) \
 	((ot) == DMU_OT_DNODE || (ot) == DMU_OT_OBJSET)
 
 #define	DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) : \
 	dmu_ot[(ot)].ot_byteswap)
 
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
 	/* general: */
 	DMU_OT_OBJECT_DIRECTORY,	/* ZAP */
 	DMU_OT_OBJECT_ARRAY,		/* UINT64 */
 	DMU_OT_PACKED_NVLIST,		/* UINT8 (XDR by nvlist_pack/unpack) */
 	DMU_OT_PACKED_NVLIST_SIZE,	/* UINT64 */
 	DMU_OT_BPOBJ,			/* UINT64 */
 	DMU_OT_BPOBJ_HDR,		/* UINT64 */
 	/* spa: */
 	DMU_OT_SPACE_MAP_HEADER,	/* UINT64 */
 	DMU_OT_SPACE_MAP,		/* UINT64 */
 	/* zil: */
 	DMU_OT_INTENT_LOG,		/* UINT64 */
 	/* dmu: */
 	DMU_OT_DNODE,			/* DNODE */
 	DMU_OT_OBJSET,			/* OBJSET */
 	/* dsl: */
 	DMU_OT_DSL_DIR,			/* UINT64 */
 	DMU_OT_DSL_DIR_CHILD_MAP,	/* ZAP */
 	DMU_OT_DSL_DS_SNAP_MAP,		/* ZAP */
 	DMU_OT_DSL_PROPS,		/* ZAP */
 	DMU_OT_DSL_DATASET,		/* UINT64 */
 	/* zpl: */
 	DMU_OT_ZNODE,			/* ZNODE */
 	DMU_OT_OLDACL,			/* Old ACL */
 	DMU_OT_PLAIN_FILE_CONTENTS,	/* UINT8 */
 	DMU_OT_DIRECTORY_CONTENTS,	/* ZAP */
 	DMU_OT_MASTER_NODE,		/* ZAP */
 	DMU_OT_UNLINKED_SET,		/* ZAP */
 	/* zvol: */
 	DMU_OT_ZVOL,			/* UINT8 */
 	DMU_OT_ZVOL_PROP,		/* ZAP */
 	/* other; for testing only! */
 	DMU_OT_PLAIN_OTHER,		/* UINT8 */
 	DMU_OT_UINT64_OTHER,		/* UINT64 */
 	DMU_OT_ZAP_OTHER,		/* ZAP */
 	/* new object types: */
 	DMU_OT_ERROR_LOG,		/* ZAP */
 	DMU_OT_SPA_HISTORY,		/* UINT8 */
 	DMU_OT_SPA_HISTORY_OFFSETS,	/* spa_his_phys_t */
 	DMU_OT_POOL_PROPS,		/* ZAP */
 	DMU_OT_DSL_PERMS,		/* ZAP */
 	DMU_OT_ACL,			/* ACL */
 	DMU_OT_SYSACL,			/* SYSACL */
 	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
 	DMU_OT_SCAN_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_USERREFS,		/* ZAP */
 	DMU_OT_DDT_ZAP,			/* ZAP */
 	DMU_OT_DDT_STATS,		/* ZAP */
 	DMU_OT_SA,			/* System attr */
 	DMU_OT_SA_MASTER_NODE,		/* ZAP */
 	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
 	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
 	DMU_OT_SCAN_XLATE,		/* ZAP */
 	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
 	DMU_OT_DEADLIST,		/* ZAP */
 	DMU_OT_DEADLIST_HDR,		/* UINT64 */
 	DMU_OT_DSL_CLONES,		/* ZAP */
 	DMU_OT_BPOBJ_SUBOBJ,		/* UINT64 */
 	/*
 	 * Do not allocate new object types here. Doing so makes the on-disk
 	 * format incompatible with any other format that uses the same object
 	 * type number.
 	 *
 	 * When creating an object which does not have one of the above types
 	 * use the DMU_OTN_* type with the correct byteswap and metadata
 	 * values.
 	 *
 	 * The DMU_OTN_* types do not have entries in the dmu_ot table,
 	 * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
 	 * of indexing into dmu_ot directly (this works for both DMU_OT_* types
 	 * and DMU_OTN_* types).
 	 */
 	DMU_OT_NUMTYPES,
 
 	/*
 	 * Names for valid types declared with DMU_OT().
 	 */
 	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
 	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
 	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
 	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
 	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
 	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
 	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
 	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
 	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
 	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 
 /*
  * These flags are intended to be used to specify the "txg_how"
  * parameter when calling the dmu_tx_assign() function. See the comment
  * above dmu_tx_assign() for more details on the meaning of these flags.
  */
 #define	TXG_NOWAIT	(0ULL)
 #define	TXG_WAIT	(1ULL<<0)
 #define	TXG_NOTHROTTLE	(1ULL<<1)
 
 void byteswap_uint64_array(void *buf, size_t size);
 void byteswap_uint32_array(void *buf, size_t size);
 void byteswap_uint16_array(void *buf, size_t size);
 void byteswap_uint8_array(void *buf, size_t size);
 void zap_byteswap(void *buf, size_t size);
 void zfs_oldacl_byteswap(void *buf, size_t size);
 void zfs_acl_byteswap(void *buf, size_t size);
 void zfs_znode_byteswap(void *buf, size_t size);
 
 #define	DS_FIND_SNAPSHOTS	(1<<0)
 #define	DS_FIND_CHILDREN	(1<<1)
 #define	DS_FIND_SERIALIZE	(1<<2)
 
 /*
  * The maximum number of bytes that can be accessed as part of one
  * operation, including metadata.
  */
 #define	DMU_MAX_ACCESS (32 * 1024 * 1024) /* 32MB */
 #define	DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
 
 #define	DMU_USERUSED_OBJECT	(-1ULL)
 #define	DMU_GROUPUSED_OBJECT	(-2ULL)
 
 /*
  * artificial blkids for bonus buffer and spill blocks
  */
 #define	DMU_BONUS_BLKID		(-1ULL)
 #define	DMU_SPILL_BLKID		(-2ULL)
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp);
 void dmu_objset_rele(objset_t *os, void *tag);
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
 
 void dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
     struct nvlist *snaps);
 int dmu_objset_clone(const char *name, const char *origin);
 int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
     struct nvlist *errlist);
 int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
 int dmu_objset_snapshot_tmp(const char *, const char *, int);
 int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
 int dsl_dataset_rename_snapshot(const char *fsname,
     const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 int dmu_objset_remap_indirects(const char *fsname);
 
 typedef struct dmu_buf {
 	uint64_t db_object;		/* object that this buffer is part of */
 	uint64_t db_offset;		/* byte offset in this object */
 	uint64_t db_size;		/* size of buffer in bytes */
 	void *db_data;			/* data in buffer */
 } dmu_buf_t;
 
 /*
  * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
  */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
 #define	DMU_POOL_FEATURES_FOR_WRITE	"features_for_write"
 #define	DMU_POOL_FEATURES_FOR_READ	"features_for_read"
 #define	DMU_POOL_FEATURE_DESCRIPTIONS	"feature_descriptions"
 #define	DMU_POOL_FEATURE_ENABLED_TXG	"feature_enabled_txg"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPOBJ		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
 #define	DMU_POOL_ERRLOG_LAST		"errlog_last"
 #define	DMU_POOL_SPARES			"spares"
 #define	DMU_POOL_DEFLATE		"deflate"
 #define	DMU_POOL_HISTORY		"history"
 #define	DMU_POOL_PROPS			"pool_props"
 #define	DMU_POOL_L2CACHE		"l2cache"
 #define	DMU_POOL_TMP_USERREFS		"tmp_userrefs"
 #define	DMU_POOL_DDT			"DDT-%s-%s-%s"
 #define	DMU_POOL_DDT_STATS		"DDT-statistics"
 #define	DMU_POOL_CREATION_VERSION	"creation_version"
 #define	DMU_POOL_SCAN			"scan"
 #define	DMU_POOL_FREE_BPOBJ		"free_bpobj"
 #define	DMU_POOL_BPTREE_OBJ		"bptree_obj"
 #define	DMU_POOL_EMPTY_BPOBJ		"empty_bpobj"
 #define	DMU_POOL_CHECKSUM_SALT		"org.illumos:checksum_salt"
 #define	DMU_POOL_VDEV_ZAP_MAP		"com.delphix:vdev_zap_map"
 #define	DMU_POOL_REMOVING		"com.delphix:removing"
 #define	DMU_POOL_OBSOLETE_BPOBJ		"com.delphix:obsolete_bpobj"
 #define	DMU_POOL_CONDENSING_INDIRECT	"com.delphix:condensing_indirect"
 #define	DMU_POOL_ZPOOL_CHECKPOINT	"com.delphix:zpool_checkpoint"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
  * available is (0, DN_MAX_OBJECT).  Object 0 is the meta-dnode.
  *
  * The transaction must be assigned to a txg.  The newly allocated
  * object will be "held" in the transaction (ie. you can modify the
  * newly allocated object in this transaction).
  *
  * dmu_object_alloc() chooses an object and returns it in *objectp.
  *
  * dmu_object_claim() allocates a specific object number.  If that
  * number is already allocated, it fails and returns EEXIST.
  *
  * Return 0 on success, or ENOSPC or EEXIST as specified above.
  */
 uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+uint64_t dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
 int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
 int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
 
 /*
  * Free an object from this objset.
  *
  * The object's data will be freed as well (ie. you don't need to call
  * dmu_free(object, 0, -1, tx)).
  *
  * The object need not be held in the transaction.
  *
  * If there are any holds on this object's buffers (via dmu_buf_hold()),
  * or tx holds on the object (via dmu_tx_hold_object()), you can not
  * free it; it fails and returns EBUSY.
  *
  * If the object is not allocated, it fails and returns ENOENT.
  *
  * Return 0 on success, or EBUSY or ENOENT as specified above.
  */
 int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
 
 /*
  * Find the next allocated or free object.
  *
  * The objectp parameter is in-out.  It will be updated to be the next
  * object which is allocated.  Ignore objects which have not been
  * modified since txg.
  *
  * XXX Can only be called on a objset with no dirty data.
  *
  * Returns 0 on success, or ENOENT if there are no more objects.
  */
 int dmu_object_next(objset_t *os, uint64_t *objectp,
     boolean_t hole, uint64_t txg);
 
 /*
  * Set the data blocksize for an object.
  *
  * The object cannot have any blocks allcated beyond the first.  If
  * the first block is allocated already, the new size must be greater
  * than the current block size.  If these conditions are not met,
  * ENOTSUP will be returned.
  *
  * Returns 0 on success, or EBUSY if there are any holds on the object
  * contents, or ENOTSUP as described above.
  */
 int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
     int ibs, dmu_tx_t *tx);
 
 /*
  * Set the checksum property on a dnode.  The new checksum algorithm will
  * apply to all newly written blocks; existing blocks will not be affected.
  */
 void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
     dmu_tx_t *tx);
 
 /*
  * Set the compress property on a dnode.  The new compression algorithm will
  * apply to all newly written blocks; existing blocks will not be affected.
  */
 void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
     dmu_tx_t *tx);
 
 int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg);
 
 void
 dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
     void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
     int compressed_size, int byteorder, dmu_tx_t *tx);
 
 /*
  * Decide how to write a block: checksum, compression, number of copies, etc.
  */
 #define	WP_NOFILL	0x1
 #define	WP_DMU_SYNC	0x2
 #define	WP_SPILL	0x4
 
 void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
     struct zio_prop *zp);
 /*
  * The bonus data is accessed more or less like a regular buffer.
  * You must dmu_bonus_hold() to get the buffer, which will give you a
  * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
  * data.  As with any normal buffer, you must call dmu_buf_will_dirty()
  * before modifying it, and the
  * object must be held in an assigned transaction before calling
  * dmu_buf_will_dirty.  You may use dmu_buf_set_user() on the bonus
  * buffer as well.  You must release your hold with dmu_buf_rele().
  *
  * Returns ENOENT, EIO, or 0.
  */
 int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
 int dmu_bonus_max(void);
 int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
 int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
 dmu_object_type_t dmu_get_bonustype(dmu_buf_t *);
 int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
 
 /*
  * Special spill buffer support used by "SA" framework
  */
 
 int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
 int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
     void *tag, dmu_buf_t **dbp);
 int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
 
 /*
  * Obtain the DMU buffer from the specified object which contains the
  * specified offset.  dmu_buf_hold() puts a "hold" on the buffer, so
  * that it will remain in memory.  You must release the hold with
  * dmu_buf_rele().  You musn't access the dmu_buf_t after releasing your
  * hold.  You must have a hold on any dmu_buf_t* you pass to the DMU.
  *
  * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
  * on the returned buffer before reading or writing the buffer's
  * db_data.  The comments for those routines describe what particular
  * operations are valid after calling them.
  *
  * The object number must be a valid, allocated object number.
  */
 int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
     void *tag, dmu_buf_t **, int flags);
 int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
     void *tag, dmu_buf_t **dbp, int flags);
 
 /*
  * Add a reference to a dmu buffer that has already been held via
  * dmu_buf_hold() in the current context.
  */
 void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
 
 /*
  * Attempt to add a reference to a dmu buffer that is in an unknown state,
  * using a pointer that may have been invalidated by eviction processing.
  * The request will succeed if the passed in dbuf still represents the
  * same os/object/blkid, is ineligible for eviction, and has at least
  * one hold by a user other than the syncer.
  */
 boolean_t dmu_buf_try_add_ref(dmu_buf_t *, objset_t *os, uint64_t object,
     uint64_t blkid, void *tag);
 
 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 uint64_t dmu_buf_refcount(dmu_buf_t *db);
 
 /*
  * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
  * range of an object.  A pointer to an array of dmu_buf_t*'s is
  * returned (in *dbpp).
  *
  * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
  * frees the array.  The hold on the array of buffers MUST be released
  * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
  * individually with dmu_buf_rele.
  */
 int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
     uint64_t length, boolean_t read, void *tag,
     int *numbufsp, dmu_buf_t ***dbpp);
 void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
 
 typedef void dmu_buf_evict_func_t(void *user_ptr);
 
 /*
  * A DMU buffer user object may be associated with a dbuf for the
  * duration of its lifetime.  This allows the user of a dbuf (client)
  * to attach private data to a dbuf (e.g. in-core only data such as a
  * dnode_children_t, zap_t, or zap_leaf_t) and be optionally notified
  * when that dbuf has been evicted.  Clients typically respond to the
  * eviction notification by freeing their private data, thus ensuring
  * the same lifetime for both dbuf and private data.
  *
  * The mapping from a dmu_buf_user_t to any client private data is the
  * client's responsibility.  All current consumers of the API with private
  * data embed a dmu_buf_user_t as the first member of the structure for
  * their private data.  This allows conversions between the two types
  * with a simple cast.  Since the DMU buf user API never needs access
  * to the private data, other strategies can be employed if necessary
  * or convenient for the client (e.g. using container_of() to do the
  * conversion for private data that cannot have the dmu_buf_user_t as
  * its first member).
  *
  * Eviction callbacks are executed without the dbuf mutex held or any
  * other type of mechanism to guarantee that the dbuf is still available.
  * For this reason, users must assume the dbuf has already been freed
  * and not reference the dbuf from the callback context.
  *
  * Users requesting "immediate eviction" are notified as soon as the dbuf
  * is only referenced by dirty records (dirties == holds).  Otherwise the
  * notification occurs after eviction processing for the dbuf begins.
  */
 typedef struct dmu_buf_user {
 	/*
 	 * Asynchronous user eviction callback state.
 	 */
 	taskq_ent_t	dbu_tqent;
 
 	/*
 	 * This instance's eviction function pointers.
 	 *
 	 * dbu_evict_func_sync is called synchronously and then
 	 * dbu_evict_func_async is executed asynchronously on a taskq.
 	 */
 	dmu_buf_evict_func_t *dbu_evict_func_sync;
 	dmu_buf_evict_func_t *dbu_evict_func_async;
 #ifdef ZFS_DEBUG
 	/*
 	 * Pointer to user's dbuf pointer.  NULL for clients that do
 	 * not associate a dbuf with their user data.
 	 *
 	 * The dbuf pointer is cleared upon eviction so as to catch
 	 * use-after-evict bugs in clients.
 	 */
 	dmu_buf_t **dbu_clear_on_evict_dbufp;
 #endif
 } dmu_buf_user_t;
 
 /*
  * Initialize the given dmu_buf_user_t instance with the eviction function
  * evict_func, to be called when the user is evicted.
  *
  * NOTE: This function should only be called once on a given dmu_buf_user_t.
  *       To allow enforcement of this, dbu must already be zeroed on entry.
  */
 /*ARGSUSED*/
 inline void
 dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
     dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
 {
 	ASSERT(dbu->dbu_evict_func_sync == NULL);
 	ASSERT(dbu->dbu_evict_func_async == NULL);
 
 	/* must have at least one evict func */
 	IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
 	dbu->dbu_evict_func_sync = evict_func_sync;
 	dbu->dbu_evict_func_async = evict_func_async;
 #ifdef ZFS_DEBUG
 	dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
 #endif
 }
 
 /*
  * Attach user data to a dbuf and mark it for normal (when the dbuf's
  * data is cleared or its reference count goes to zero) eviction processing.
  *
  * Returns NULL on success, or the existing user if another user currently
  * owns the buffer.
  */
 void *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);
 
 /*
  * Attach user data to a dbuf and mark it for immediate (its dirty and
  * reference counts are equal) eviction processing.
  *
  * Returns NULL on success, or the existing user if another user currently
  * owns the buffer.
  */
 void *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);
 
 /*
  * Replace the current user of a dbuf.
  *
  * If given the current user of a dbuf, replaces the dbuf's user with
  * "new_user" and returns the user data pointer that was replaced.
  * Otherwise returns the current, and unmodified, dbuf user pointer.
  */
 void *dmu_buf_replace_user(dmu_buf_t *db,
     dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);
 
 /*
  * Remove the specified user data for a DMU buffer.
  *
  * Returns the user that was removed on success, or the current user if
  * another user currently owns the buffer.
  */
 void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
 
 /*
  * Returns the user data (dmu_buf_user_t *) associated with this dbuf.
  */
 void *dmu_buf_get_user(dmu_buf_t *db);
 
 objset_t *dmu_buf_get_objset(dmu_buf_t *db);
 dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
 void dmu_buf_dnode_exit(dmu_buf_t *db);
 
 /* Block until any in-progress dmu buf user evictions complete. */
 void dmu_buf_user_evict_wait(void);
 
 /*
  * Returns the blkptr associated with this dbuf, or NULL if not set.
  */
 struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
 
 /*
  * Indicate that you are going to modify the buffer's data (db_data).
  *
  * The transaction (tx) must be assigned to a txg (ie. you've called
  * dmu_tx_assign()).  The buffer's object must be held in the tx
  * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
  */
 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
 
 /*
  * You must create a transaction, then hold the objects which you will
  * (or might) modify as part of this transaction.  Then you must assign
  * the transaction to a transaction group.  Once the transaction has
  * been assigned, you can modify buffers which belong to held objects as
  * part of this transaction.  You can't modify buffers before the
  * transaction has been assigned; you can't modify buffers which don't
  * belong to objects which this transaction holds; you can't hold
  * objects once the transaction has been assigned.  You may hold an
  * object which you are going to free (with dmu_object_free()), but you
  * don't have to.
  *
  * You can abort the transaction before it has been assigned.
  *
  * Note that you may hold buffers (with dmu_buf_hold) at any time,
  * regardless of transaction state.
  */
 
 #define	DMU_NEW_OBJECT	(-1ULL)
 #define	DMU_OBJECT_END	(-1ULL)
 
 dmu_tx_t *dmu_tx_create(objset_t *os);
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
     uint64_t len);
 void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     uint64_t len);
 void dmu_tx_hold_remap_l1indirect(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
 void dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add,
     const char *name);
 void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_bonus_by_dnode(dmu_tx_t *tx, dnode_t *dn);
 void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
 void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
 void dmu_tx_abort(dmu_tx_t *tx);
 int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
 void dmu_tx_wait(dmu_tx_t *tx);
 void dmu_tx_commit(dmu_tx_t *tx);
 void dmu_tx_mark_netfree(dmu_tx_t *tx);
 
 /*
  * To register a commit callback, dmu_tx_callback_register() must be called.
  *
  * dcb_data is a pointer to caller private data that is passed on as a
  * callback parameter. The caller is responsible for properly allocating and
  * freeing it.
  *
  * When registering a callback, the transaction must be already created, but
  * it cannot be committed or aborted. It can be assigned to a txg or not.
  *
  * The callback will be called after the transaction has been safely written
  * to stable storage and will also be called if the dmu_tx is aborted.
  * If there is any error which prevents the transaction from being committed to
  * disk, the callback will be called with a value of error != 0.
  */
 typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
 
 void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
     void *dcb_data);
 
 /*
  * Free up the data blocks for a defined range of a file.  If size is
  * -1, the range from offset to end-of-file is freed.
  */
 int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
 	uint64_t size, dmu_tx_t *tx);
 int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
 	uint64_t size);
 int dmu_free_long_object(objset_t *os, uint64_t object);
 
 /*
  * Convenience functions.
  *
  * Canfail routines will return 0 on success, or an errno if there is a
  * nonrecoverable I/O error.
  */
 #define	DMU_READ_PREFETCH	0 /* prefetch */
 #define	DMU_READ_NO_PREFETCH	1 /* don't prefetch */
 int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	void *buf, uint32_t flags);
 int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
     uint32_t flags);
 void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	const void *buf, dmu_tx_t *tx);
 void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
     const void *buf, dmu_tx_t *tx);
 void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	dmu_tx_t *tx);
 int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
 int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
 int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
     dmu_tx_t *tx);
 int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
     dmu_tx_t *tx);
 #ifdef _KERNEL
 #ifdef illumos
 int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t size, struct page *pp, dmu_tx_t *tx);
 #else
 int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t size, struct vm_page **ppa, dmu_tx_t *tx);
 int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
     int *rbehind, int *rahead, int last_size);
 #endif
 #endif
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
 void dmu_return_arcbuf(struct arc_buf *buf);
 void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
     dmu_tx_t *tx);
 int dmu_xuio_init(struct xuio *uio, int niov);
 void dmu_xuio_fini(struct xuio *uio);
 int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
     size_t n);
 int dmu_xuio_cnt(struct xuio *uio);
 struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
 void dmu_xuio_clear(struct xuio *uio, int i);
 void xuio_stat_wbuf_copied(void);
 void xuio_stat_wbuf_nocopy(void);
 
 extern boolean_t zfs_prefetch_disable;
 extern int zfs_max_recordsize;
 
 /*
  * Asynchronously try to read in the data.
  */
 void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
     uint64_t len, enum zio_priority pri);
 
 typedef struct dmu_object_info {
 	/* All sizes are in bytes unless otherwise indicated. */
 	uint32_t doi_data_block_size;
 	uint32_t doi_metadata_block_size;
 	dmu_object_type_t doi_type;
 	dmu_object_type_t doi_bonus_type;
 	uint64_t doi_bonus_size;
 	uint8_t doi_indirection;		/* 2 = dnode->indirect->data */
 	uint8_t doi_checksum;
 	uint8_t doi_compress;
 	uint8_t doi_nblkptr;
 	uint8_t doi_pad[4];
 	uint64_t doi_physical_blocks_512;	/* data + metadata, 512b blks */
 	uint64_t doi_max_offset;
 	uint64_t doi_fill_count;		/* number of non-empty blocks */
 } dmu_object_info_t;
 
 typedef void arc_byteswap_func_t(void *buf, size_t size);
 
 typedef struct dmu_object_type_info {
 	dmu_object_byteswap_t	ot_byteswap;
 	boolean_t		ot_metadata;
 	boolean_t		ot_dbuf_metadata_cache;
 	char			*ot_name;
 } dmu_object_type_info_t;
 
 typedef struct dmu_object_byteswap_info {
 	arc_byteswap_func_t	*ob_func;
 	char			*ob_name;
 } dmu_object_byteswap_info_t;
 
 extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
 extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
 
 /*
  * Get information on a DMU object.
  *
  * Return 0 on success or ENOENT if object is not allocated.
  *
  * If doi is NULL, just indicates whether the object exists.
  */
 int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
 /* Like dmu_object_info, but faster if you have a held dnode in hand. */
 void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi);
 /* Like dmu_object_info, but faster if you have a held dbuf in hand. */
 void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
 /*
  * Like dmu_object_info_from_db, but faster still when you only care about
  * the size.  This is specifically optimized for zfs_getattr().
  */
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
 
 typedef struct dmu_objset_stats {
 	uint64_t dds_num_clones; /* number of clones of this */
 	uint64_t dds_creation_txg;
 	uint64_t dds_guid;
 	dmu_objset_type_t dds_type;
 	uint8_t dds_is_snapshot;
 	uint8_t dds_inconsistent;
 	char dds_origin[ZFS_MAX_DATASET_NAME_LEN];
 } dmu_objset_stats_t;
 
 /*
  * Get stats on a dataset.
  */
 void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
 
 /*
  * Add entries to the nvlist for all the objset's properties.  See
  * zfs_prop_table[] and zfs(1m) for details on the properties.
  */
 void dmu_objset_stats(objset_t *os, struct nvlist *nv);
 
 /*
  * Get the space usage statistics for statvfs().
  *
  * refdbytes is the amount of space "referenced" by this objset.
  * availbytes is the amount of space available to this objset, taking
  * into account quotas & reservations, assuming that no other objsets
  * use the space first.  These values correspond to the 'referenced' and
  * 'available' properties, described in the zfs(1m) manpage.
  *
  * usedobjs and availobjs are the number of objects currently allocated,
  * and available.
  */
 void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 
 /*
  * The fsid_guid is a 56-bit ID that can change to avoid collisions.
  * (Contrast with the ds_guid which is a 64-bit ID that will never
  * change, so there is a small probability that it will collide.)
  */
 uint64_t dmu_objset_fsid_guid(objset_t *os);
 
 /*
  * Get the [cm]time for an objset's snapshot dir
  */
 timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 
 int dmu_objset_is_snapshot(objset_t *os);
 
 extern struct spa *dmu_objset_spa(objset_t *os);
 extern struct zilog *dmu_objset_zil(objset_t *os);
 extern struct dsl_pool *dmu_objset_pool(objset_t *os);
 extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
 extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
 extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
     int maxlen, boolean_t *conflict);
 extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp);
 
 typedef int objset_used_cb_t(dmu_object_type_t bonustype,
     void *bonus, uint64_t *userp, uint64_t *groupp);
 extern void dmu_objset_register_type(dmu_objset_type_t ost,
     objset_used_cb_t *cb);
 extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
 extern void *dmu_objset_get_user(objset_t *os);
 
 /*
  * Return the txg number for the given assigned transaction.
  */
 uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
 
 /*
  * Synchronous write.
  * If a parent zio is provided this function initiates a write on the
  * provided buffer as a child of the parent zio.
  * In the absence of a parent zio, the write is completed synchronously.
  * At write completion, blk is filled with the bp of the written block.
  * Note that while the data covered by this function will be on stable
  * storage when the write completes this new data does not become a
  * permanent part of the file until the associated transaction commits.
  */
 
 /*
  * {zfs,zvol,ztest}_get_done() args
  */
 typedef struct zgd {
 	struct lwb	*zgd_lwb;
 	struct blkptr	*zgd_bp;
 	dmu_buf_t	*zgd_db;
 	struct rl	*zgd_rl;
 	void		*zgd_private;
 } zgd_t;
 
 typedef void dmu_sync_cb_t(zgd_t *arg, int error);
 int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
 
 /*
  * Find the next hole or data block in file starting at *off
  * Return found offset in *off. Return ESRCH for end of file.
  */
 int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
     uint64_t *off);
 
 /*
  * Check if a DMU object has any dirty blocks. If so, sync out
  * all pending transaction groups. Otherwise, this function
  * does not alter DMU state. This could be improved to only sync
  * out the necessary transaction groups for this particular
  * object.
  */
 int dmu_object_wait_synced(objset_t *os, uint64_t object);
 
 /*
  * Initial setup and final teardown.
  */
 extern void dmu_init(void);
 extern void dmu_fini(void);
 
 typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
     uint64_t object, uint64_t offset, int len);
 void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
     dmu_traverse_cb_t cb, void *arg);
 int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
     struct file *fp, offset_t *offp);
 
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 extern uint64_t zfs_crc64_table[256];
 
 extern int zfs_mdcomp_disable;
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DMU_H */
Index: head/sys/cddl/contrib/opensolaris
===================================================================
--- head/sys/cddl/contrib/opensolaris	(revision 337168)
+++ head/sys/cddl/contrib/opensolaris	(revision 337169)

Property changes on: head/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /vendor-sys/illumos/dist:r337167