Index: vendor/illumos/dist/man/man1m/zfs.1m
===================================================================
--- vendor/illumos/dist/man/man1m/zfs.1m	(revision 247315)
+++ vendor/illumos/dist/man/man1m/zfs.1m	(revision 247316)
@@ -1,3980 +1,4014 @@
 '\" t
 .\"
 .\" CDDL HEADER START
 .\"
 .\" The contents of this file are subject to the terms of the
 .\" Common Development and Distribution License (the "License").
 .\" You may not use this file except in compliance with the License.
 .\"
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 .\" or http://www.opensolaris.org/os/licensing.
 .\" See the License for the specific language governing permissions
 .\" and limitations under the License.
 .\"
 .\" When distributing Covered Code, include this CDDL HEADER in each
 .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 .\" If applicable, add the following below this CDDL HEADER, with the
 .\" fields enclosed by brackets "[]" replaced with your own identifying
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" CDDL HEADER END
 .\"
 .\"
 .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
 .\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\" Copyright (c) 2012, Joyent, Inc. All rights reserved.
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
 .\" Copyright 2013 Nexenta Systems, Inc.  All Rights Reserved.
 .\"
 .TH ZFS 1M "Jan 26, 2013"
 .SH NAME
 zfs \- configures ZFS file systems
 .SH SYNOPSIS
 .LP
 .nf
 \fBzfs\fR [\fB-?\fR]
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... \fB-V\fR \fIsize\fR \fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,\fIsnap\fR[%\fIsnap\fR]]...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBsnapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]...
       \fIfilesystem@snapname\fR|\fIvolume@snapname\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBclone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
      \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-H\fR][\fB-o\fR \fIproperty\fR[,\fIproperty\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...]
      [\fB-s\fR \fIproperty\fR]... [\fB-S\fR \fIproperty\fR]... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-t\fR \fItype\fR[,\fItype\fR]...] 
     [\fB-s\fR \fIsource\fR[,\fIsource\fR]...] \fBall\fR | \fIproperty\fR[,\fIproperty\fR]...
     \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume|snapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBupgrade\fR [\fB-v\fR]
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-s\fR \fIfield\fR]...
     [\fB-S\fR \fIfield\fR]... [\fB-t\fR \fItype\fR[,\fItype\fR]...] \fIfilesystem\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...] [\fB-s\fR \fIfield\fR]...
     [\fB-S\fR \fIfield\fR]... [\fB-t\fR \fItype\fR[,\fItype\fR]...] \fIfilesystem\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBmount\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBsend\fR [\fB-DnPpRrv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBreceive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR [\fB-ldug\fR] \fIuser\fR|\fIgroup\fR[,\fIuser\fR|\fIgroup\fR]...
      \fIperm\fR|\fI@setname\fR[,\fIperm\fR|\fI@setname\fR]... \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR|\fBeveryone\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]...
      \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]... \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]... \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-rldug\fR] \fIuser\fR|\fIgroup\fR[,\fIuser\fR|\fIgroup\fR]...
      [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]...] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR|\fBeveryone\fR [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]...]
      \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]...] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|\fI@setname\fR]...]
      \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdiff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR
 
 .SH DESCRIPTION
 .sp
 .LP
 The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage
 pool, as described in \fBzpool\fR(1M). A dataset is identified by a unique path
 within the \fBZFS\fR namespace. For example:
 .sp
 .in +2
 .nf
 pool/{filesystem,volume,snapshot}
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
 .sp
 .LP
 A dataset can be one of the following:
 .sp
 .ne 2
 .na
 \fB\fIfile system\fR\fR
 .ad
 .sp .6
 .RS 4n
 A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard
 system namespace and behaves like other file systems. While \fBZFS\fR file
 systems are designed to be \fBPOSIX\fR compliant, known issues exist that
 prevent compliance in some cases. Applications that depend on standards
 conformance might fail due to nonstandard behavior when checking file system
 free space.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 A logical volume exported as a raw or block device. This type of dataset should
 only be used under special circumstances. File systems are typically used in
 most environments.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 A read-only version of a file system or volume at a given point in time. It is
 specified as \fIfilesystem@name\fR or \fIvolume@name\fR.
 .RE
 
 .SS "ZFS File System Hierarchy"
 .sp
 .LP
 A \fBZFS\fR storage pool is a logical collection of devices that provide space
 for datasets. A storage pool is also the root of the \fBZFS\fR file system
 hierarchy.
 .sp
 .LP
 The root of the pool can be accessed as a file system, such as mounting and
 unmounting, taking snapshots, and setting properties. The physical storage
 characteristics, however, are managed by the \fBzpool\fR(1M) command.
 .sp
 .LP
 See \fBzpool\fR(1M) for more information on creating and administering pools.
 .SS "Snapshots"
 .sp
 .LP
 A snapshot is a read-only copy of a file system or volume. Snapshots can be
 created extremely quickly, and initially consume no additional space within the
 pool. As data within the active dataset changes, the snapshot consumes more
 data than would otherwise be shared with the active dataset.
 .sp
 .LP
 Snapshots can have arbitrary names. Snapshots of volumes can be cloned or
 rolled back, but cannot be accessed independently.
 .sp
 .LP
 File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory
 in the root of the file system. Snapshots are automatically mounted on demand
 and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR
 directory can be controlled by the \fBsnapdir\fR property.
 .SS "Clones"
 .sp
 .LP
 A clone is a writable volume or file system whose initial contents are the same
 as another dataset. As with snapshots, creating a clone is nearly
 instantaneous, and initially consumes no additional space.
 .sp
 .LP
 Clones can only be created from a snapshot. When a snapshot is cloned, it
 creates an implicit dependency between the parent and child. Even though the
 clone is created somewhere else in the dataset hierarchy, the original snapshot
 cannot be destroyed as long as a clone exists. The \fBorigin\fR property
 exposes this dependency, and the \fBdestroy\fR command lists any such
 dependencies, if they exist.
 .sp
 .LP
 The clone parent-child dependency relationship can be reversed by using the
 \fBpromote\fR subcommand. This causes the "origin" file system to become a
 clone of the specified file system, which makes it possible to destroy the file
 system that the clone was created from.
 .SS "Mount Points"
 .sp
 .LP
 Creating a \fBZFS\fR file system is a simple operation, so the number of file
 systems per system is likely to be numerous. To cope with this, \fBZFS\fR
 automatically manages mounting and unmounting file systems without the need to
 edit the \fB/etc/vfstab\fR file. All automatically managed file systems are
 mounted by \fBZFS\fR at boot time.
 .sp
 .LP
 By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR
 is the name of the file system in the \fBZFS\fR namespace. Directories are
 created and destroyed as needed.
 .sp
 .LP
 A file system can also have a mount point set in the \fBmountpoint\fR property.
 This directory is created as needed, and \fBZFS\fR automatically mounts the
 file system when the \fBzfs mount -a\fR command is invoked (without editing
 \fB/etc/vfstab\fR). The \fBmountpoint\fR property can be inherited, so if
 \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then
 \fBpool/home/user\fR automatically inherits a mount point of
 \fB/export/stuff/user\fR.
 .sp
 .LP
 A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system
 from being mounted.
 .sp
 .LP
 If needed, \fBZFS\fR file systems can also be managed with traditional tools
 (\fBmount\fR, \fBumount\fR, \fB/etc/vfstab\fR). If a file system's mount point
 is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system,
 and the administrator is responsible for mounting and unmounting the file
 system.
 .SS "Zones"
 .sp
 .LP
 A \fBZFS\fR file system can be added to a non-global zone by using the
 \fBzonecfg\fR \fBadd fs\fR subcommand. A \fBZFS\fR file system that is added to
 a non-global zone must have its \fBmountpoint\fR property set to \fBlegacy\fR.
 .sp
 .LP
 The physical properties of an added file system are controlled by the global
 administrator. However, the zone administrator can create, modify, or destroy
 files within the added file system, depending on how the file system is
 mounted.
 .sp
 .LP
 A dataset can also be delegated to a non-global zone by using the \fBzonecfg\fR
 \fBadd dataset\fR subcommand. You cannot delegate a dataset to one zone and the
 children of the same dataset to another zone. The zone administrator can change
 properties of the dataset or any of its children. However, the \fBquota\fR
 property is controlled by the global administrator.
 .sp
 .LP
 A \fBZFS\fR volume can be added as a device to a non-global zone by using the
 \fBzonecfg\fR \fBadd device\fR subcommand. However, its physical properties can
 be modified only by the global administrator.
 .sp
 .LP
 For more information about \fBzonecfg\fR syntax, see \fBzonecfg\fR(1M).
 .sp
 .LP
 After a dataset is delegated to a non-global zone, the \fBzoned\fR property is
 automatically set. A zoned file system cannot be mounted in the global zone,
 since the zone administrator might have to set the mount point to an
 unacceptable value.
 .sp
 .LP
 The global administrator can forcibly clear the \fBzoned\fR property, though
 this should be done with extreme care. The global administrator should verify
 that all the mount points are acceptable before clearing the property.
 .SS "Native Properties"
 .sp
 .LP
 Properties are divided into two types, native properties and user-defined (or
 "user") properties. Native properties either export internal statistics or
 control \fBZFS\fR behavior. In addition, native properties are either editable
 or read-only. User properties have no effect on \fBZFS\fR behavior, but you can
 use them to annotate datasets in a way that is meaningful in your environment.
 For more information about user properties, see the "User Properties" section,
 below.
 .sp
 .LP
 Every dataset has a set of properties that export statistics about the dataset
 as well as control various behaviors. Properties are inherited from the parent
 unless overridden by the child. Some properties apply only to certain types of
 datasets (file systems, volumes, or snapshots).
 .sp
 .LP
 The values of numeric properties can be specified using human-readable suffixes
 (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR
 for zettabyte). The following are all valid (and equal) specifications:
 .sp
 .in +2
 .nf
 1536M, 1.5g, 1.50GB
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The values of non-numeric properties are case sensitive and must be lowercase,
 except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR.
 .sp
 .LP
 The following native properties consist of read-only statistics about the
 dataset. These properties can be neither set, nor inherited. Native properties
 apply to all dataset types unless otherwise noted.
 .sp
 .ne 2
 .na
 \fB\fBavailable\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space available to the dataset and all its children, assuming
 that there is no other activity in the pool. Because space is shared within a
 pool, availability can be limited by any number of factors, including physical
 pool size, quotas, reservations, or other datasets within the pool.
 .sp
 This property can also be referred to by its shortened column name,
 \fBavail\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBcompressratio\fR\fR
 .ad
 .sp .6
 .RS 4n
 For non-snapshots, the compression ratio achieved for the \fBused\fR
 space of this dataset, expressed as a multiplier.  The \fBused\fR
 property includes descendant datasets, and, for clones, does not include
 the space shared with the origin snapshot.  For snapshots, the
 \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property.
 Compression can be turned on by running: \fBzfs set compression=on
 \fIdataset\fR\fR. The default value is \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBcreation\fR\fR
 .ad
 .sp .6
 .RS 4n
 The time this dataset was created.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBclones\fR\fR
 .ad
 .sp .6
 .RS 4n
 For snapshots, this property is a comma-separated list of filesystems or
 volumes which are clones of this snapshot.  The clones' \fBorigin\fR property
 is this snapshot.  If the \fBclones\fR property is not empty, then this
 snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options).
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBdefer_destroy\fR\fR
 .ad
 .sp .6
 .RS 4n
 This property is \fBon\fR if the snapshot has been marked for deferred destroy
 by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is
 \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .na
+\fB\fBlogicalreferenced\fR\fR
+.ad
+.sp .6
+.RS 4n
+The amount of space that is "logically" accessible by this dataset.  See
+the \fBreferenced\fR property.  The logical space ignores the effect of
+the \fBcompression\fR and \fBcopies\fR properties, giving a quantity
+closer to the amount of data that applications see.  However, it does
+include space consumed by metadata.
+.sp
+This property can also be referred to by its shortened column name,
+\fBlrefer\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBlogicalused\fR\fR
+.ad
+.sp .6
+.RS 4n
+The amount of space that is "logically" consumed by this dataset and all
+its descendents.  See the \fBused\fR property.  The logical space
+ignores the effect of the \fBcompression\fR and \fBcopies\fR properties,
+giving a quantity closer to the amount of data that applications see.
+However, it does include space consumed by metadata.
+.sp
+This property can also be referred to by its shortened column name,
+\fBlused\fR.
+.RE
+
+.sp
+.ne 2
+.na
 \fB\fBmounted\fR\fR
 .ad
 .sp .6
 .RS 4n
 For file systems, indicates whether the file system is currently mounted. This
 property can be either \fByes\fR or \fBno\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBorigin\fR\fR
 .ad
 .sp .6
 .RS 4n
 For cloned file systems or volumes, the snapshot from which the clone was
 created.  See also the \fBclones\fR property.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBreferenced\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of data that is accessible by this dataset, which may or may not be
 shared with other datasets in the pool. When a snapshot or clone is created, it
 initially references the same amount of space as the file system or snapshot it
 was created from, since its contents are identical.
 .sp
 This property can also be referred to by its shortened column name,
 \fBrefer\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBrefcompressratio\fR\fR
 .ad
 .sp .6
 .RS 4n
 The compression ratio achieved for the \fBreferenced\fR space of this
 dataset, expressed as a multiplier.  See also the \fBcompressratio\fR
 property.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBtype\fR\fR
 .ad
 .sp .6
 .RS 4n
 The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBused\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by this dataset and all its descendents. This is
 the value that is checked against this dataset's quota and reservation. The
 space used does not include this dataset's reservation, but does take into
 account the reservations of any descendent datasets. The amount of space that a
 dataset consumes from its parent, as well as the amount of space that are freed
 if this dataset is recursively destroyed, is the greater of its space used and
 its reservation.
 .sp
 When snapshots (see the "Snapshots" section) are created, their space is
 initially shared between the snapshot and the file system, and possibly with
 previous snapshots. As the file system changes, space that was previously
 shared becomes unique to the snapshot, and counted in the snapshot's space
 used. Additionally, deleting snapshots can increase the amount of space unique
 to (and used by) other snapshots.
 .sp
 The amount of space used, available, or referenced does not take into account
 pending changes. Pending changes are generally accounted for within a few
 seconds. Committing a change to a disk using \fBfsync\fR(3c) or \fBO_SYNC\fR
 does not necessarily guarantee that the space usage information is updated
 immediately.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBusedby*\fR\fR
 .ad
 .sp .6
 .RS 4n
 The \fBusedby*\fR properties decompose the \fBused\fR properties into the
 various reasons that space is used. Specifically, \fBused\fR =
 \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +,
 \fBusedbysnapshots\fR. These properties are only available for datasets created
 on \fBzpool\fR "version 13" pools.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBusedbychildren\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by children of this dataset, which would be freed if
 all the dataset's children were destroyed.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBusedbydataset\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by this dataset itself, which would be freed if the
 dataset were destroyed (after first removing any \fBrefreservation\fR and
 destroying any necessary snapshots or descendents).
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBusedbyrefreservation\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by a \fBrefreservation\fR set on this dataset, which
 would be freed if the \fBrefreservation\fR was removed.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBusedbysnapshots\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by snapshots of this dataset. In particular, it is
 the amount of space that would be freed if all of this dataset's snapshots were
 destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR
 properties because space can be shared by multiple snapshots.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBuserused@\fR\fIuser\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by the specified user in this dataset. Space is
 charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The
 amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the
 \fBzfs userspace\fR subcommand for more information.
 .sp
 Unprivileged users can access only their own space usage. The root user, or a
 user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR,
 can access everyone's usage.
 .sp
 The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The
 user's name must be appended after the \fB@\fR symbol, using one of the
 following forms:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX name\fR (for example, \fBjoe\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX numeric ID\fR (for example, \fB789\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
 .RE
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBuserrefs\fR\fR
 .ad
 .sp .6
 .RS 4n
 This property is set to the number of user holds on this snapshot. User holds
 are set by using the \fBzfs hold\fR command.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBgroupused@\fR\fIgroup\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by the specified group in this dataset. Space is
 charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the
 \fBuserused@\fR\fIuser\fR property for more information.
 .sp
 Unprivileged users can only access their own groups' space usage. The root
 user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs
 allow\fR, can access all groups' usage.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBvolblocksize\fR=\fIblocksize\fR\fR
 .ad
 .sp .6
 .RS 4n
 For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot
 be changed once the volume has been written, so it should be set at volume
 creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power
 of 2 from 512 bytes to 128 Kbytes is valid.
 .sp
 This property can also be referred to by its shortened column name,
 \fBvolblock\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBwritten\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of \fBreferenced\fR space written to this dataset since the
 previous snapshot.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBwritten@\fR\fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of \fBreferenced\fR space written to this dataset since the
 specified snapshot.  This is the space that is referenced by this dataset
 but was not referenced by the specified snapshot.
 .sp
 The \fIsnapshot\fR may be specified as a short snapshot name (just the part
 after the \fB@\fR), in which case it will be interpreted as a snapshot in
 the same filesystem as this dataset.
 The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR),
 which for clones may be a snapshot in the origin's filesystem (or the origin
 of the origin's filesystem, etc).
 .RE
 
 .sp
 .LP
 The following native properties can be used to change the behavior of a
 \fBZFS\fR dataset.
 .sp
 .ne 2
 .na
 \fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR |
 \fBpassthrough\fR | \fBpassthrough-x\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls how \fBACL\fR entries are inherited when files and directories are
 created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does
 not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR
 property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries
 that specify "deny" permissions. The property value \fBrestricted\fR (the
 default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the
 \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property
 value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without
 any modifications made to the \fBACL\fR entries when they are inherited. A file
 system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the
 same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR,
 and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file
 creation mode also requests the execute bit.
 .sp
 When the property value is set to \fBpassthrough\fR, files are created with a
 mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs
 exist that affect the mode, then the mode is set in accordance to the requested
 mode from the application.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBaclmode\fR=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR | \fBrestricted\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with
 an \fBaclmode\fR property of \fBdiscard\fR (the default) deletes all \fBACL\fR
 entries that do not represent the mode of the file. An \fBaclmode\fR property
 of \fBgroupmask\fR reduces permissions granted in all \fBALLOW\fR entries found
 in the \fBACL\fR such that they are no greater than the group permissions
 specified by \fBchmod\fR(2).  A file system with an \fBaclmode\fR property of
 \fBpassthrough\fR indicates that no changes are made to the \fBACL\fR other
 than creating or updating the necessary \fBACL\fR entries to represent the new
 mode of the file or directory. An \fBaclmode\fR property of \fBrestricted\fR
 will cause the \fBchmod\fR(2) operation to return an error when used on any
 file or directory which has a non-trivial \fBACL\fR whose entries can not be
 represented by a mode. \fBchmod\fR(2) is required to change the set user ID,
 set group ID, or sticky bits on a file or directory, as they do not have
 equivalent \fBACL\fR entries. In order to use \fBchmod\fR(2) on a file or
 directory with a non-trivial \fBACL\fR when \fBaclmode\fR is set to
 \fBrestricted\fR, you must first remove all \fBACL\fR entries which do not
 represent the current mode.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the access time for files is updated when they are read.
 Turning this property off avoids producing write traffic when reading files and
 can result in significant performance gains, though it might confuse mailers
 and other similar utilities. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR
 .ad
 .sp .6
 .RS 4n
 If this property is set to \fBoff\fR, the file system cannot be mounted, and is
 ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to
 setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset
 still has a normal \fBmountpoint\fR property, which can be inherited. Setting
 this property to \fBoff\fR allows datasets to be used solely as a mechanism to
 inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have
 two datasets with the same \fBmountpoint\fR, so that the children of both
 datasets appear in the same directory, but might have different inherited
 characteristics.
 .sp
 When the \fBnoauto\fR option is set, a dataset can only be mounted and
 unmounted explicitly. The dataset is not mounted automatically when the dataset
 is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or
 unmounted by the \fBzfs unmount -a\fR command.
 .sp
 This property is not inherited.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2\fR | \fBfletcher4\fR |
 \fBsha256\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the checksum used to verify data integrity. The default value is
 \fBon\fR, which automatically selects an appropriate algorithm (currently,
 \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR
 disables integrity checking on user data. Disabling checksums is \fBNOT\fR a
 recommended practice.
 .sp
 Changing this property affects only newly-written data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR |
 \fBgzip-\fR\fIN\fR | \fBzle\fR\fR | \fBlz4\fR
 .ad
 .sp .6
 .RS 4n
 Controls the compression algorithm used for this dataset. The \fBlzjb\fR
 compression algorithm is optimized for performance while providing decent data
 compression. Setting compression to \fBon\fR uses the \fBlzjb\fR compression
 algorithm. The \fBgzip\fR compression algorithm uses the same compression as
 the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the
 value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9
 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR
 (which is also the default for \fBgzip\fR(1)). The \fBzle\fR compression
 algorithm compresses runs of zeros.
 .sp
 The \fBlz4\fR compression algorithm is a high-performance replacement
 for the \fBlzjb\fR algorithm. It features significantly faster
 compression and decompression, as well as a moderately higher
 compression ratio than \fBlzjb\fR, but can only be used on pools with
 the \fBlz4_compress\fR feature set to \fIenabled\fR. See
 \fBzpool-features\fR(5) for details on ZFS feature flags and the
 \fBlz4_compress\fR feature.
 .sp
 This property can also be referred to by its shortened column name
 \fBcompress\fR. Changing this property affects only newly-written data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the number of copies of data stored for this dataset. These copies are
 in addition to any redundancy provided by the pool, for example, mirroring or
 RAID-Z. The copies are stored on different disks, if possible. The space used
 by multiple copies is charged to the associated file and dataset, changing the
 \fBused\fR property and counting against quotas and reservations.
 .sp
 Changing this property only affects newly-written data. Therefore, set this
 property at file system creation time by using the \fB-o\fR
 \fBcopies=\fR\fIN\fR option.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether device nodes can be opened on this file system. The default
 value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether processes can be executed from within this file system. The
 default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the mount point used for this file system. See the "Mount Points"
 section for more information on how this property is used.
 .sp
 When the \fBmountpoint\fR property is changed for a file system, the file
 system and any children that inherit the mount point are unmounted. If the new
 value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are
 automatically remounted in the new location if the property was previously
 \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was
 changed. In addition, any shared file systems are unshared and shared in the
 new location.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system should be mounted with \fBnbmand\fR (Non
 Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this
 property only take effect when the file system is umounted and remounted. See
 \fBmount\fR(1M) for more information on \fBnbmand\fR mounts.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls what is cached in the primary cache (ARC). If this property is set to
 \fBall\fR, then both user data and metadata is cached. If this property is set
 to \fBnone\fR, then neither user data nor metadata is cached. If this property
 is set to \fBmetadata\fR, then only metadata is cached. The default value is
 \fBall\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space a dataset and its descendents can consume. This
 property enforces a hard limit on the amount of space used. This includes all
 space consumed by descendents, including file systems and snapshots. Setting a
 quota on a descendent of a dataset that already has a quota does not override
 the ancestor's quota, but rather imposes an additional limit.
 .sp
 Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an
 implicit quota.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space consumed by the specified user. User space
 consumption is identified by the \fBuserspace@\fR\fIuser\fR property.
 .sp
 Enforcement of user quotas may be delayed by several seconds. This delay means
 that a user might exceed their quota before the system notices that they are
 over quota and begins to refuse additional writes with the \fBEDQUOT\fR error
 message . See the \fBzfs userspace\fR subcommand for more information.
 .sp
 Unprivileged users can only access their own groups' space usage. The root
 user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs
 allow\fR, can get and set everyone's quota.
 .sp
 This property is not available on volumes, on file systems before version 4, or
 on pools before version 15. The \fBuserquota@\fR... properties are not
 displayed by \fBzfs get all\fR. The user's name must be appended after the
 \fB@\fR symbol, using one of the following forms:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX name\fR (for example, \fBjoe\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX numeric ID\fR (for example, \fB789\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
 .RE
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space consumed by the specified group. Group space
 consumption is identified by the \fBuserquota@\fR\fIuser\fR property.
 .sp
 Unprivileged users can access only their own groups' space usage. The root
 user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs
 allow\fR, can get and set all groups' quotas.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether this dataset can be modified. The default value is \fBoff\fR.
 .sp
 This property can also be referred to by its shortened column name,
 \fBrdonly\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBrecordsize\fR=\fIsize\fR\fR
 .ad
 .sp .6
 .RS 4n
 Specifies a suggested block size for files in the file system. This property is
 designed solely for use with database workloads that access files in fixed-size
 records. \fBZFS\fR automatically tunes block sizes according to internal
 algorithms optimized for typical access patterns.
 .sp
 For databases that create very large files but access them in small random
 chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR
 greater than or equal to the record size of the database can result in
 significant performance gains. Use of this property for general purpose file
 systems is strongly discouraged, and may adversely affect performance.
 .sp
 The size specified must be a power of two greater than or equal to 512 and less
 than or equal to 128 Kbytes.
 .sp
 Changing the file system's \fBrecordsize\fR affects only files created
 afterward; existing files are unaffected.
 .sp
 This property can also be referred to by its shortened column name,
 \fBrecsize\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space a dataset can consume. This property enforces a hard
 limit on the amount of space used. This hard limit does not include space used
 by descendents, including file systems and snapshots.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 The minimum amount of space guaranteed to a dataset, not including its
 descendents. When the amount of space used is below this value, the dataset is
 treated as if it were taking up the amount of space specified by
 \fBrefreservation\fR. The \fBrefreservation\fR reservation is accounted for in
 the parent datasets' space used, and counts against the parent datasets' quotas
 and reservations.
 .sp
 If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough
 free pool space outside of this reservation to accommodate the current number
 of "referenced" bytes in the dataset.
 .sp
 This property can also be referred to by its shortened column name,
 \fBrefreserv\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 The minimum amount of space guaranteed to a dataset and its descendents. When
 the amount of space used is below this value, the dataset is treated as if it
 were taking up the amount of space specified by its reservation. Reservations
 are accounted for in the parent datasets' space used, and count against the
 parent datasets' quotas and reservations.
 .sp
 This property can also be referred to by its shortened column name,
 \fBreserv\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls what is cached in the secondary cache (L2ARC). If this property is set
 to \fBall\fR, then both user data and metadata is cached. If this property is
 set to \fBnone\fR, then neither user data nor metadata is cached. If this
 property is set to \fBmetadata\fR, then only metadata is cached. The default
 value is \fBall\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the set-\fBUID\fR bit is respected for the file system. The
 default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBshareiscsi\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Like the \fBsharenfs\fR property, \fBshareiscsi\fR indicates whether a
 \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values
 for this property are \fBon\fR, \fBoff\fR, and \fBtype=disk\fR. The default
 value is \fBoff\fR. In the future, other target types might be supported. For
 example, \fBtape\fR.
 .sp
 You might want to set \fBshareiscsi=on\fR for a file system so that all
 \fBZFS\fR volumes within the file system are shared by default. However,
 setting this property on a file system has no direct effect.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system is shared by using the Solaris \fBCIFS\fR
 service, and what options are to be used. A file system with the \fBsharesmb\fR
 property set to \fBoff\fR is managed through traditional tools such as
 \fBsharemgr\fR(1M). Otherwise, the file system is automatically shared and
 unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the
 property is set to \fBon\fR, the \fBsharemgr\fR(1M) command is invoked with no
 options. Otherwise, the \fBsharemgr\fR(1M) command is invoked with options
 equivalent to the contents of this property.
 .sp
 Because \fBSMB\fR shares requires a resource name, a unique resource name is
 constructed from the dataset name. The constructed name is a copy of the
 dataset name except that the characters in the dataset name, which would be
 illegal in the resource name, are replaced with underscore (\fB_\fR)
 characters. A pseudo property "name" is also supported that allows you to
 replace the data set name with a specified name. The specified name is then
 used to replace the prefix dataset in the case of inheritance. For example, if
 the dataset \fBdata/home/john\fR is set to \fBname=john\fR, then
 \fBdata/home/john\fR has a resource name of \fBjohn\fR. If a child dataset of
 \fBdata/home/john/backups\fR, it has a resource name of \fBjohn_backups\fR.
 .sp
 When SMB shares are created, the SMB share name appears as an entry in the
 \fB\&.zfs/shares\fR directory. You can use the \fBls\fR or \fBchmod\fR command
 to display the share-level ACLs on the entries in this directory.
 .sp
 When the \fBsharesmb\fR property is changed for a dataset, the dataset and any
 children inheriting the property are re-shared with the new options, only if
 the property was previously set to \fBoff\fR, or if they were shared before the
 property was changed. If the new property is set to \fBoff\fR, the file systems
 are unshared.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsharenfs\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system is shared via \fBNFS\fR, and what options are
 used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed
 through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and
 \fBdfstab\fR(4). Otherwise, the file system is automatically shared and
 unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the
 property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no
 options. Otherwise, the \fBshare\fR(1M) command is invoked with options
 equivalent to the contents of this property.
 .sp
 When the \fBsharenfs\fR property is changed for a dataset, the dataset and any
 children inheriting the property are re-shared with the new options, only if
 the property was previously \fBoff\fR, or if they were shared before the
 property was changed. If the new property is \fBoff\fR, the file systems are
 unshared.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBlogbias\fR = \fBlatency\fR | \fBthroughput\fR\fR
 .ad
 .sp .6
 .RS 4n
 Provide a hint to ZFS about handling of synchronous requests in this dataset.
 If \fBlogbias\fR is set to \fBlatency\fR (the default), ZFS will use pool log
 devices (if configured) to handle the requests at low latency. If \fBlogbias\fR
 is set to \fBthroughput\fR, ZFS will not use configured pool log devices. ZFS
 will instead optimize synchronous operations for global pool throughput and
 efficient use of resources.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsnapdir\fR=\fBhidden\fR | \fBvisible\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the \fB\&.zfs\fR directory is hidden or visible in the root of
 the file system as discussed in the "Snapshots" section. The default value is
 \fBhidden\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBsync\fR=\fBdefault\fR | \fBalways\fR | \fBdisabled\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the behavior of synchronous requests (e.g. fsync, O_DSYNC).
 \fBdefault\fR is the POSIX specified behavior of ensuring all synchronous
 requests are written to stable storage and all devices are flushed to ensure
 data is not cached by device controllers (this is the default). \fBalways\fR
 causes every file system transaction to be written and flushed before its
 system call returns. This has a large performance penalty. \fBdisabled\fR
 disables synchronous requests. File system transactions are only committed to
 stable storage periodically. This option will give the highest performance.
 However, it is very dangerous as ZFS would be ignoring the synchronous
 transaction demands of applications such as databases or NFS.  Administrators
 should only use this option when the risks are understood.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBversion\fR=\fB1\fR | \fB2\fR | \fBcurrent\fR\fR
 .ad
 .sp .6
 .RS 4n
 The on-disk version of this file system, which is independent of the pool
 version. This property can only be set to later supported versions. See the
 \fBzfs upgrade\fR command.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBvolsize\fR=\fIsize\fR\fR
 .ad
 .sp .6
 .RS 4n
 For volumes, specifies the logical size of the volume. By default, creating a
 volume establishes a reservation of equal size. For storage pools with a
 version number of 9 or higher, a \fBrefreservation\fR is set instead. Any
 changes to \fBvolsize\fR are reflected in an equivalent change to the
 reservation (or \fBrefreservation\fR). The \fBvolsize\fR can only be set to a
 multiple of \fBvolblocksize\fR, and cannot be zero.
 .sp
 The reservation is kept equal to the volume's logical size to prevent
 unexpected behavior for consumers. Without the reservation, the volume could
 run out of space, resulting in undefined behavior or data corruption, depending
 on how the volume is used. These effects can also occur when the volume size is
 changed while it is in use (particularly when shrinking the size). Extreme care
 should be used when adjusting the volume size.
 .sp
 Though not recommended, a "sparse volume" (also known as "thin provisioning")
 can be created by specifying the \fB-s\fR option to the \fBzfs create -V\fR
 command, or by changing the reservation after the volume has been created. A
 "sparse volume" is a volume where the reservation is less then the volume size.
 Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the
 pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not
 reflected in the reservation.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBvscan\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether regular files should be scanned for viruses when a file is
 opened and closed. In addition to enabling this property, the virus scan
 service must also be enabled for virus scanning to occur. The default value is
 \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBxattr\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether extended attributes are enabled for this file system. The
 default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzoned\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the dataset is managed from a non-global zone. See the "Zones"
 section for more information. The default value is \fBoff\fR.
 .RE
 
 .sp
 .LP
 The following three properties cannot be changed after the file system is
 created, and therefore, should be set when the file system is created. If the
 properties are not set with the \fBzfs create\fR or \fBzpool create\fR
 commands, these properties are inherited from the parent dataset. If the parent
 dataset lacks these properties due to having been created prior to these
 features being supported, the new file system will have the default values for
 these properties.
 .sp
 .ne 2
 .na
 \fB\fBcasesensitivity\fR=\fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file name matching algorithm used by the file system
 should be case-sensitive, case-insensitive, or allow a combination of both
 styles of matching. The default value for the \fBcasesensitivity\fR property is
 \fBsensitive\fR. Traditionally, UNIX and POSIX file systems have case-sensitive
 file names.
 .sp
 The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the
 file system can support requests for both case-sensitive and case-insensitive
 matching behavior. Currently, case-insensitive matching behavior on a file
 system that supports mixed behavior is limited to the Solaris CIFS server
 product. For more information about the \fBmixed\fR value behavior, see the
 \fISolaris ZFS Administration Guide\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBnormalization\fR = \fBnone\fR | \fBformC\fR | \fBformD\fR | \fBformKC\fR
 | \fBformKD\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file system should perform a \fBunicode\fR normalization
 of file names whenever two file names are compared, and which normalization
 algorithm should be used. File names are always stored unmodified, names are
 normalized as part of any comparison process. If this property is set to a
 legal value other than \fBnone\fR, and the \fButf8only\fR property was left
 unspecified, the \fButf8only\fR property is automatically set to \fBon\fR. The
 default value of the \fBnormalization\fR property is \fBnone\fR. This property
 cannot be changed after the file system is created.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file system should reject file names that include
 characters that are not present in the \fBUTF-8\fR character code set. If this
 property is explicitly set to \fBoff\fR, the normalization property must either
 not be explicitly set or be set to \fBnone\fR. The default value for the
 \fButf8only\fR property is \fBoff\fR. This property cannot be changed after the
 file system is created.
 .RE
 
 .sp
 .LP
 The \fBcasesensitivity\fR, \fBnormalization\fR, and \fButf8only\fR properties
 are also new permissions that can be assigned to non-privileged users by using
 the \fBZFS\fR delegated administration feature.
 .SS "Temporary Mount Point Properties"
 .sp
 .LP
 When a file system is mounted, either through \fBmount\fR(1M) for legacy mounts
 or the \fBzfs mount\fR command for normal file systems, its mount options are
 set according to its properties. The correlation between properties and mount
 options is as follows:
 .sp
 .in +2
 .nf
     PROPERTY                MOUNT OPTION
      devices                 devices/nodevices
      exec                    exec/noexec
      readonly                ro/rw
      setuid                  setuid/nosetuid
      xattr                   xattr/noxattr
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 In addition, these options can be set on a per-mount basis using the \fB-o\fR
 option, without affecting the property that is stored on disk. The values
 specified on the command line override the values stored in the dataset. The
 \fB-nosuid\fR option is an alias for \fBnodevices,nosetuid\fR. These properties
 are reported as "temporary" by the \fBzfs get\fR command. If the properties are
 changed while the dataset is mounted, the new setting overrides any temporary
 settings.
 .SS "User Properties"
 .sp
 .LP
 In addition to the standard native properties, \fBZFS\fR supports arbitrary
 user properties. User properties have no effect on \fBZFS\fR behavior, but
 applications or administrators can use them to annotate datasets (file systems,
 volumes, and snapshots).
 .sp
 .LP
 User property names must contain a colon (\fB:\fR) character to distinguish
 them from native properties. They may contain lowercase letters, numbers, and
 the following punctuation characters: colon (\fB:\fR), dash (\fB-\fR), period
 (\fB\&.\fR), and underscore (\fB_\fR). The expected convention is that the
 property name is divided into two portions such as
 \fImodule\fR\fB:\fR\fIproperty\fR, but this namespace is not enforced by
 \fBZFS\fR. User property names can be at most 256 characters, and cannot begin
 with a dash (\fB-\fR).
 .sp
 .LP
 When making programmatic use of user properties, it is strongly suggested to
 use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property
 names to reduce the chance that two independently-developed packages use the
 same property name for different purposes. Property names beginning with
 \fBcom.sun\fR. are reserved for use by Sun Microsystems.
 .sp
 .LP
 The values of user properties are arbitrary strings, are always inherited, and
 are never validated. All of the commands that operate on properties (\fBzfs
 list\fR, \fBzfs get\fR, \fBzfs set\fR, and so forth) can be used to manipulate
 both native properties and user properties. Use the \fBzfs inherit\fR command
 to clear a user property . If the property is not defined in any parent
 dataset, it is removed entirely. Property values are limited to 1024
 characters.
 .SS "ZFS Volumes as Swap or Dump Devices"
 .sp
 .LP
 During an initial installation a swap device and dump device are created on
 \fBZFS\fR volumes in the \fBZFS\fR root pool. By default, the swap area size is
 based on 1/2 the size of physical memory up to 2 Gbytes. The size of the dump
 device depends on the kernel's requirements at installation time. Separate
 \fBZFS\fR volumes must be used for the swap area and dump devices. Do not swap
 to a file on a \fBZFS\fR file system. A \fBZFS\fR swap file configuration is
 not supported.
 .sp
 .LP
 If you need to change your swap area or dump device after the system is
 installed or upgraded, use the \fBswap\fR(1M) and \fBdumpadm\fR(1M) commands.
 If you need to change the size of your swap area or dump device, see the
 \fISolaris ZFS Administration Guide\fR.
 .SH SUBCOMMANDS
 .sp
 .LP
 All subcommands that modify state are logged persistently to the pool in their
 original form.
 .sp
 .ne 2
 .na
 \fB\fBzfs ?\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays a help message.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs create\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]...
 \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a new \fBZFS\fR file system. The file system is automatically mounted
 according to the \fBmountpoint\fR property inherited from the parent.
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the \fBmountpoint\fR property inherited
 from their parent. Any property specified on the command line using the
 \fB-o\fR option is ignored. If the target filesystem already exists, the
 operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property as if the command \fBzfs set\fR
 \fIproperty\fR=\fIvalue\fR was invoked at the same time the dataset was
 created. Any editable \fBZFS\fR property can also be set at creation time.
 Multiple \fB-o\fR options can be specified. An error results if the same
 property is specified in multiple \fB-o\fR options.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs create\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR
 \fIproperty\fR=\fIvalue\fR]... \fB-V\fR \fIsize\fR \fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a volume of the given size. The volume is exported as a block device in
 \fB/dev/zvol/{dsk,rdsk}/\fR\fIpath\fR, where \fIpath\fR is the name of the
 volume in the \fBZFS\fR namespace. The size represents the logical size as
 exported by the device. By default, a reservation of equal size is created.
 .sp
 \fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that
 the volume has an integral number of blocks regardless of \fIblocksize\fR.
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the \fBmountpoint\fR property inherited
 from their parent. Any property specified on the command line using the
 \fB-o\fR option is ignored. If the target filesystem already exists, the
 operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-s\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native
 Properties section for more information about sparse volumes.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property as if the \fBzfs set\fR \fIproperty\fR=\fIvalue\fR
 command was invoked at the same time the dataset was created. Any editable
 \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options
 can be specified. An error results if the same property is specified in
 multiple \fB-o\fR options.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-b\fR \fIblocksize\fR\fR
 .ad
 .sp .6
 .RS 4n
 Equivalent to \fB-o\fR \fBvolblocksize\fR=\fIblocksize\fR. If this option is
 specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting
 behavior is undefined.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs destroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR
 .ad
 .sp .6
 .RS 4n
 Destroys the given dataset. By default, the command unshares any file systems
 that are currently shared, unmounts any file systems that are currently
 mounted, and refuses to destroy a dataset that has active dependents (children
 or clones).
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all children.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all dependents, including cloned file systems outside the
 target hierarchy.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force an unmount of any file systems using the \fBunmount -f\fR command. This
 option has no effect on non-file systems or unmounted file systems.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") deletion.  No data will be deleted.  This is
 useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what
 data would be deleted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the deleted data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the deleted data.
 .RE
 .sp
 Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR
 options, as they can destroy large portions of a pool and cause unexpected
 behavior for mounted file systems in use.
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs destroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,\fIsnap\fR[%\fIsnap\fR]]...
 .ad
 .sp .6
 .RS 4n
 The given snapshots are destroyed immediately if and only if the \fBzfs
 destroy\fR command without the \fB-d\fR option would have destroyed it. Such
 immediate destruction would occur, for example, if the snapshot had no clones
 and the user-initiated reference count were zero.
 .sp
 If a snapshot does not qualify for immediate destruction, it is marked for
 deferred deletion. In this state, it exists as a usable, visible snapshot until
 both of the preconditions listed above are met, at which point it is destroyed.
 .sp
 An inclusive range of snapshots may be specified by separating the
 first and last snapshots with a percent sign.
 The first and/or last snapshots may be left blank, in which case the
 filesystem's oldest or newest snapshot will be implied.
 .sp
 Multiple snapshots
 (or ranges of snapshots) of the same filesystem or volume may be specified
 in a comma-separated list of snapshots.
 Only the snapshot's short name (the
 part after the \fB@\fR) should be specified when using a range or
 comma-separated list to identify multiple snapshots.
 .sp
 .ne 2
 .na
 \fB\fB-d\fR\fR
 .ad
 .sp .6
 .RS 4n
 Defer snapshot deletion.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Destroy (or mark for deferred deletion) all snapshots with this name in
 descendent file systems.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all dependents.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") deletion.  No data will be deleted.  This is
 useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what
 data would be deleted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the deleted data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the deleted data.
 .RE
 
 .sp
 Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR
 options, as they can destroy large portions of a pool and cause unexpected
 behavior for mounted file systems in use.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]...
 \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR...
 .ad
 .sp .6
 .RS 4n
 Creates snapshots with the given names. All previous modifications by
 successful system calls to the file system are part of the snapshots.
 Snapshots are taken atomically, so that all snapshots correspond to the same
 moment in time. See the "Snapshots" section for details.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively create snapshots of all descendent datasets
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property; see \fBzfs create\fR for details.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Roll back the given dataset to a previous snapshot. When a dataset is rolled
 back, all data that has changed since the snapshot is discarded, and the
 dataset reverts to the state at the time of the snapshot. By default, the
 command refuses to roll back to a snapshot other than the most recent one. In
 order to do so, all intermediate snapshots must be destroyed by specifying the
 \fB-r\fR option.
 .sp
 The \fB-rR\fR options do not recursively destroy the child snapshots of a
 recursive snapshot. Only the top-level recursive snapshot is destroyed by
 either of these options. To completely roll back a recursive snapshot, you must
 rollback the individual child snapshots.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy any snapshots more recent than the one specified.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy any more recent snapshots, as well as any clones of those
 snapshots.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Used with the \fB-R\fR option to force an unmount of any clone file systems
 that are to be destroyed.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs clone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]...
 \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a clone of the given snapshot. See the "Clones" section for details.
 The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is
 created as the same type as the original.
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner
 are automatically mounted according to the \fBmountpoint\fR property inherited
 from their parent. If the target filesystem or volume already exists, the
 operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property; see \fBzfs create\fR for details.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs promote\fR \fIclone-filesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Promotes a clone file system to no longer be dependent on its "origin"
 snapshot. This makes it possible to destroy the file system that the clone was
 created from. The clone parent-child dependency relationship is reversed, so
 that the origin file system becomes a clone of the specified file system.
 .sp
 The snapshot that was cloned, and any snapshots previous to this snapshot, are
 now owned by the promoted clone. The space they use moves from the origin file
 system to the promoted clone, so enough space must be available to accommodate
 these snapshots. No new space is consumed by this operation, but the space
 accounting is adjusted. The promoted clone must not have any conflicting
 snapshot names of its own. The \fBrename\fR subcommand can be used to rename
 any conflicting snapshots.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs rename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs rename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR
 \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Renames the given dataset. The new target can be located anywhere in the
 \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be
 renamed within the parent file system or volume. When renaming a snapshot, the
 parent file system of the snapshot does not need to be specified as part of the
 second argument. Renamed file systems can inherit new mount points, in which
 case they are unmounted and remounted at the new mount point.
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the nonexistent parent datasets. Datasets created in this manner
 are automatically mounted according to the \fBmountpoint\fR property inherited
 from their parent.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force unmount any filesystems that need to be unmounted in the process.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs rename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively rename the snapshots of all descendent datasets. Snapshots are the
 only dataset that can be renamed recursively.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-H\fR] [\fB-o\fR
 \fIproperty\fR[,\fIproperty\fR]...] [ \fB-t\fR \fItype\fR[,\fItype\fR]...] [ \fB-s\fR
 \fIproperty\fR ]... [ \fB-S\fR \fIproperty\fR ]...
 [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR]...\fR
 .ad
 .sp .6
 .RS 4n
 Lists the property information for the given datasets in tabular form. If
 specified, you can list property information by the absolute pathname or the
 relative pathname. By default, all file systems and volumes are displayed.
 Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the
 default is \fBoff\fR) . The following fields are displayed,
 \fBname,used,available,referenced,mountpoint\fR.
 .sp
 .ne 2
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Used for scripting mode. Do not print headers and separate fields by a single
 tab instead of arbitrary white space.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset on the command line.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-d\fR \fIdepth\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset, limiting the recursion to
 \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct
 children.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of properties to display. The property must be:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 One of the properties described in the "Native Properties" section
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 A user property
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 The value \fBname\fR to display the dataset name
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 The value \fBspace\fR to display space usage properties on file systems and
 volumes. This is a shortcut for specifying \fB-o
 name,avail,used,usedsnap,usedds,usedrefreserv,usedchild\fR \fB-t
 filesystem,volume\fR syntax.
 .RE
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-s\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 A property for sorting the output by column in ascending order based on the
 value of the property. The property must be one of the properties described in
 the "Properties" section, or the special value \fBname\fR to sort by the
 dataset name. Multiple properties can be specified at one time using multiple
 \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to
 right in decreasing order of importance.
 .sp
 The following is a list of sorting criteria:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 Numeric types sort in numeric order.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 String types sort in alphabetical order.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 Types inappropriate for a row sort that row to the literal bottom, regardless
 of the specified ordering.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 If no sorting options are specified the existing behavior of \fBzfs list\fR is
 preserved.
 .RE
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-S\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 Same as the \fB-s\fR option, but sorts by property in descending order.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-t\fR \fItype\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of types to display, where \fItype\fR is one of
 \fBfilesystem\fR, \fBsnapshot\fR , \fBvolume\fR, or \fBall\fR. For example,
 specifying \fB-t snapshot\fR displays only snapshots.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs set\fR \fIproperty\fR=\fIvalue\fR
 \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Sets the property to the given value for each dataset. Only some properties can
 be edited. See the "Properties" section for more information on what properties
 can be set and acceptable values. Numeric values can be specified as exact
 values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR,
 \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes,
 gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). User
 properties can be set on snapshots. For more information, see the "User
 Properties" section.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR
 \fIfield\fR[,\fIfield\fR]... [\fB-t\fR \fItype\fR[,\fItype\fR]...] [\fB-s\fR \fIsource\fR[,\fIsource\fR]... \fBall\fR |
 \fIproperty\fR[,\fIproperty\fR]... \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Displays properties for the given datasets. If no datasets are specified, then
 the command displays properties for all datasets on the system. For each
 property, the following columns are displayed:
 .sp
 .in +2
 .nf
     name      Dataset name
      property  Property name
      value     Property value
      source    Property source. Can either be local, default,
                temporary, inherited, or none (-).
 .fi
 .in -2
 .sp
 
 All columns are displayed by default, though this can be controlled by using
 the \fB-o\fR option. This command takes a comma-separated list of properties as
 described in the "Native Properties" and "User Properties" sections.
 .sp
 The special value \fBall\fR can be used to display all properties that apply to
 the given dataset's type (filesystem, volume, or snapshot).
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display properties for any children.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-d\fR \fIdepth\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset, limiting the recursion to
 \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct
 children.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Display output in a form more easily parsed by scripts. Any headers are
 omitted, and fields are explicitly separated by a single tab instead of an
 arbitrary amount of space.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of columns to display. \fBname,property,value,source\fR
 is the default value.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-s\fR \fIsource\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of sources to display. Those properties coming from a
 source other than those in this list are ignored. Each source must be one of
 the following: \fBlocal,default,inherited,temporary,none\fR. The default value
 is all sources.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Display numbers in parseable (exact) values.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs inherit\fR [\fB-r\fR] \fIproperty\fR
 \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Clears the specified property, causing it to be inherited from an ancestor. If
 no ancestor has the property set, then the default value is used. See the
 "Properties" section for a listing of default values, and details on which
 properties can be inherited.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively inherit the given property for all children.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs upgrade\fR [\fB-v\fR]\fR
 .ad
 .sp .6
 .RS 4n
 Displays a list of file systems that are not the most recent version.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs upgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] [\fB-a\fR |
 \fIfilesystem\fR]\fR
 .ad
 .sp .6
 .RS 4n
 Upgrades file systems to a new on-disk version. Once this is done, the file
 systems will no longer be accessible on systems running older versions of the
 software. \fBzfs send\fR streams generated from new snapshots of these file
 systems cannot be accessed on systems running older versions of the software.
 .sp
 In general, the file system version is independent of the pool version. See
 \fBzpool\fR(1M) for information on the \fBzpool upgrade\fR command.
 .sp
 In some cases, the file system version and the pool version are interrelated
 and the pool version must be upgraded before the file system version can be
 upgraded.
 .sp
 .ne 2
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade all file systems on all imported pools.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade the specified file system.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade the specified file system and all descendent file systems
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-V\fR \fIversion\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade to the specified \fIversion\fR. If the \fB-V\fR flag is not specified,
 this command upgrades to the most recent version. This option can only be used
 to increase the version number, and only up to the most recent version
 supported by this software.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...]
 [\fB-s\fR \fIfield\fR]...
 [\fB-S\fR \fIfield\fR]...
 [\fB-t\fR \fItype\fR[,\fItype\fR]...] \fIfilesystem\fR|\fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Displays space consumed by, and quotas on, each user in the specified
 filesystem or snapshot. This corresponds to the \fBuserused@\fR\fIuser\fR and
 \fBuserquota@\fR\fIuser\fR properties.
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print numeric ID instead of user/group name.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do not print headers, use tab-delimited output.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Use exact (parsable) numeric output.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIfield\fR[,\fIfield\fR]...\fR
 .ad
 .sp .6
 .RS 4n
 Display only the specified fields from the following
 set: \fBtype, name, used, quota\fR. The default is to display all fields.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-s\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified
 multiple times to sort first by one field, then by another. The default is
 \fB-s type\fR \fB-s name\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-S\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sort by this field in reverse order. See \fB-s\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-t\fR \fItype\fR[,\fItype\fR]...\fR
 .ad
 .sp .6
 .RS 4n
 Print only the specified types from the following
 set: \fBall, posixuser, smbuser, posixgroup, smbgroup\fR. The default
 is \fB-t posixuser,smbuser\fR. The default can be changed to include group
 types.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-i\fR\fR
 .ad
 .sp .6
 .RS 4n
 Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists.
 Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform
 this translation, so the \fB-i\fR option allows the output from \fBzfs
 userspace\fR to be compared directly with those utilities. However, \fB-i\fR
 may lead to confusion if some files were created by an SMB user before a
 SMB-to-POSIX name mapping was established. In such a case, some files will be owned
 by the SMB entity and some by the POSIX entity. However, the \fB-i\fR option
 will report that the POSIX entity has the total usage and quota for both.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,\fIfield\fR]...]
 [\fB-s\fR \fIfield\fR]...
 [\fB-S\fR \fIfield\fR]...
 [\fB-t\fR \fItype\fR[,\fItype\fR]...] \fIfilesystem\fR|\fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Displays space consumed by, and quotas on, each group in the specified
 filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR,
 except that the default types to display are \fB-t posixgroup,smbgroup\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs mount\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays all \fBZFS\fR file systems currently mounted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs mount\fR [\fB-vO\fR] [\fB-o\fR \fIoptions\fR] \fB-a\fR |
 \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mounts \fBZFS\fR file systems. Invoked automatically as part of the boot
 process.
 .sp
 .ne 2
 .na
 \fB\fB-o\fR \fIoptions\fR\fR
 .ad
 .sp .6
 .RS 4n
 An optional, comma-separated list of mount options to use temporarily for the
 duration of the mount. See the "Temporary Mount Point Properties" section for
 details.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-O\fR\fR
 .ad
 .sp .6
 .RS 4n
 Perform an overlay mount. See \fBmount\fR(1M) for more information.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Report mount progress.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mount all available \fBZFS\fR file systems. Invoked automatically as part of
 the boot process.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mount the specified filesystem.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs unmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmounts currently mounted \fBZFS\fR file systems. Invoked automatically as
 part of the shutdown process.
 .sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Forcefully unmount the file system, even if it is currently in use.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmount all available \fBZFS\fR file systems. Invoked automatically as part of
 the boot process.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmount the specified filesystem. The command can also be given a path to a
 \fBZFS\fR file system mount point on the system.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs share\fR \fB-a\fR | \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Shares available \fBZFS\fR file systems.
 .sp
 .ne 2
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Share all available \fBZFS\fR file systems. Invoked automatically as part of
 the boot process.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Share the specified filesystem according to the \fBsharenfs\fR and
 \fBsharesmb\fR properties. File systems are shared when the \fBsharenfs\fR or
 \fBsharesmb\fR property is set.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs unshare\fR \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshares currently shared \fBZFS\fR file systems. This is invoked automatically
 as part of the shutdown process.
 .sp
 .ne 2
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshare all available \fBZFS\fR file systems. Invoked automatically as part of
 the boot process.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshare the specified filesystem. The command can also be given a path to a
 \fBZFS\fR file system shared on the system.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs send\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Creates a stream representation of the second \fIsnapshot\fR, which is written
 to standard output. The output can be redirected to a file or to a different
 system (for example, using \fBssh\fR(1). By default, a full stream is
 generated.
 .sp
 .ne 2
 .na
 \fB\fB-i\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate an incremental stream from the first \fIsnapshot\fR to the second
 \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be
 specified as the last component of the snapshot name (for example, the part
 after the \fB@\fR), and it is assumed to be from the same file system as the
 second \fIsnapshot\fR.
 .sp
 If the destination is a clone, the source may be the origin snapshot, which
 must be fully specified (for example, \fBpool/fs@origin\fR, not just
 \fB@origin\fR).
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-I\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a stream package that sends all intermediary snapshots from the first
 snapshot to the second snapshot. For example, \fB-I @a fs@d\fR is similar to
 \fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR. The incremental source snapshot may
 be specified as with the \fB-i\fR option.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a replication stream package, which will replicate the specified
 filesystem, and all descendent file systems, up to the named snapshot. When
 received, all properties, snapshots, descendent file systems, and clones are
 preserved.
 .sp
 If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR
 flag, an incremental replication stream is generated. The current values of
 properties, and current snapshot and file system names are set when the stream
 is received. If the \fB-F\fR flag is specified when this stream is received,
 snapshots and file systems that do not exist on the sending side are destroyed.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-D\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a deduplicated stream.  Blocks which would have been sent multiple
 times in the send stream will only be sent once.  The receiving system must
 also support this feature to recieve a deduplicated stream.  This flag can
 be used regardless of the dataset's \fBdedup\fR property, but performance
 will be much better if the filesystem uses a dedup-capable checksum (eg.
 \fBsha256\fR).
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Include the dataset's properties in the stream.  This flag is implicit when
 \fB-R\fR is specified.  The receiving system must also support this feature.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") send.  Do not generate any actual send data.  This is
 useful in conjunction with the \fB-v\fR or \fB-P\fR flags to determine what
 data will be sent.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-P\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the stream package generated.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the stream package generated.  This information
 includes a per-second report of how much data has been sent.
 .RE
 
 The format of the stream is committed. You will be able to receive your streams
 on future versions of \fBZFS\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs receive\fR [\fB-vnFu\fR]
 \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a snapshot whose contents are as specified in the stream provided on
 standard input. If a full stream is received, then a new file system is created
 as well. Streams are created using the \fBzfs send\fR subcommand, which by
 default creates a full stream. \fBzfs recv\fR can be used as an alias for
 \fBzfs receive\fR.
 .sp
 If an incremental stream is received, then the destination file system must
 already exist, and its most recent snapshot must match the incremental stream's
 source. For \fBzvols\fR, the destination device link is destroyed and
 recreated, which means the \fBzvol\fR cannot be accessed during the
 \fBreceive\fR operation.
 .sp
 When a snapshot replication package stream that is generated by using the
 \fBzfs send\fR \fB-R\fR command is  received, any snapshots that do not exist
 on the sending location are destroyed by using the \fBzfs destroy\fR \fB-d\fR
 command.
 .sp
 The name of the snapshot (and file system, if a full stream is received) that
 this subcommand creates depends on the argument type and the use of the
 \fB-d\fR or \fB-e\fR options.
 .sp
 If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If
 the argument is a file system or volume name, a snapshot with the same name as
 the sent snapshot is created within the specified \fIfilesystem\fR or
 \fIvolume\fR.  If neither of the \fB-d\fR or \fB-e\fR options are specified,
 the provided target snapshot name is used exactly as provided.
 .sp
 The \fB-d\fR and \fB-e\fR options cause the file system name of the target
 snapshot to be determined by appending a portion of the sent snapshot's name to
 the specified target \fIfilesystem\fR. If the \fB-d\fR option is specified, all
 but the first element of the sent snapshot's file system path (usually the
 pool name) is used and any required intermediate file systems within the
 specified one are created.  If the \fB-e\fR option is specified, then only the
 last element of the sent snapshot's file system name (i.e. the name of the
 source file system itself) is used as the target file system name.
 .sp
 .ne 2
 .na
 \fB\fB-d\fR\fR
 .ad
 .sp .6
 .RS 4n
 Discard the first element of the sent snapshot's file system name, using
 the remaining elements to determine the name of the target file system for
 the new snapshot as described in the paragraph above.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-e\fR\fR
 .ad
 .sp .6
 .RS 4n
 Discard all but the last element of the sent snapshot's file system name,
 using that element to determine the name of the target file system for
 the new snapshot as described in the paragraph above.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-u\fR\fR
 .ad
 .sp .6
 .RS 4n
 File system that is associated with the received stream is not mounted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the stream and the time required to perform the
 receive operation.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do not actually receive the stream. This can be useful in conjunction with the
 \fB-v\fR option to verify the name the receive operation would use.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-F\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force a rollback of the file system to the most recent snapshot before
 performing the receive operation. If receiving an incremental replication
 stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy
 snapshots and file systems that do not exist on the sending side.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs allow\fR \fIfilesystem\fR | \fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays permissions that have been delegated on the specified filesystem or
 volume. See the other forms of \fBzfs allow\fR for more information.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs allow\fR [\fB-ldug\fR] \fIuser\fR|\fIgroup\fR[,\fIuser\fR|\fIgroup\fR]...
 \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]... \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR|\fBeveryone\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...
 \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Delegates \fBZFS\fR administration permission for the file systems to
 non-privileged users.
 .sp
 .ne 2
 .na
 [\fB-ug\fR] \fIuser\fR|\fIgroup\fR[,\fIuser\fR|\fIgroup\fR]...
 .ad
 .sp .6
 .RS 4n
 Specifies to whom the permissions are delegated. Multiple entities can be
 specified as a comma-separated list. If neither of the \fB-ug\fR options are
 specified, then the argument is interpreted preferentially as the keyword
 \fBeveryone,\fR then as a user name, and lastly as a group name. To specify a user
 or group named "everyone", use the \fB-u\fR or \fB-g\fR options. To specify a
 group with the same name as a user, use the \fB-g\fR options.
 .RE
 
 .sp
 .ne 2
 .na
 \fB-e\fR|\fBeveryone\fR
 .ad
 .sp .6
 .RS 4n
 Specifies that the permissions be delegated to everyone.
 .RE
 
 .sp
 .ne 2
 .na
 \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...
 .ad
 .sp .6
 .RS 4n
 The permissions to delegate. Multiple permissions
 may be specified as a comma-separated list. Permission names are the same as
 \fBZFS\fR subcommand and property names. See the property list below. Property
 set names, which begin with an at sign (\fB@\fR) , may be specified. See the
 \fB-s\fR form below for details.
 .RE
 
 .sp
 .ne 2
 .na
 [\fB-ld\fR] \fIfilesystem\fR|\fIvolume\fR
 .ad
 .sp .6
 .RS 4n
 Specifies where the permissions are delegated. If neither of the \fB-ld\fR
 options are specified, or both are, then the permissions are allowed for the
 file system or volume, and all of its descendents. If only the \fB-l\fR option
 is used, then is allowed "locally" only for the specified file system. If only
 the \fB-d\fR option is used, then is allowed only for the descendent file
 systems.
 .RE
 
 .RE
 
 .sp
 .LP
 Permissions are generally the ability to use a \fBZFS\fR subcommand or change a
 \fBZFS\fR property. The following permissions are available:
 .sp
 .in +2
 .nf
 NAME             TYPE           NOTES
 allow            subcommand     Must also have the permission that is being
                                 allowed
 clone            subcommand     Must also have the 'create' ability and 'mount'
                                 ability in the origin file system
 create           subcommand     Must also have the 'mount' ability
 destroy          subcommand     Must also have the 'mount' ability
 diff             subcommand     Allows lookup of paths within a dataset
                                 given an object number, and the ability to
                                 create snapshots necessary to 'zfs diff'.
 mount            subcommand     Allows mount/umount of ZFS datasets
 promote          subcommand     Must also have the 'mount'
                                 and 'promote' ability in the origin file system
 receive          subcommand     Must also have the 'mount' and 'create' ability
 rename           subcommand     Must also have the 'mount' and 'create'
                                 ability in the new parent
 rollback         subcommand     Must also have the 'mount' ability
 send             subcommand
 share            subcommand     Allows sharing file systems over NFS or SMB
                                 protocols
 snapshot         subcommand     Must also have the 'mount' ability
 groupquota       other          Allows accessing any groupquota@... property
 groupused        other          Allows reading any groupused@... property
 userprop         other          Allows changing any user property
 userquota        other          Allows accessing any userquota@... property
 userused         other          Allows reading any userused@... property
 
 aclinherit       property
 aclmode          property
 atime            property
 canmount         property
 casesensitivity  property
 checksum         property
 compression      property
 copies           property
 devices          property
 exec             property
 mountpoint       property
 nbmand           property
 normalization    property
 primarycache     property
 quota            property
 readonly         property
 recordsize       property
 refquota         property
 refreservation   property
 reservation      property
 secondarycache   property
 setuid           property
 shareiscsi       property
 sharenfs         property
 sharesmb         property
 snapdir          property
 utf8only         property
 version          property
 volblocksize     property
 volsize          property
 vscan            property
 xattr            property
 zoned            property
 .fi
 .in -2
 .sp
 
 .sp
 .ne 2
 .na
 \fB\fBzfs allow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...
 \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets "create time" permissions. These permissions are granted (locally) to the
 creator of any newly-created descendent file system.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs allow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...
 \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Defines or adds permissions to a permission set. The set can be used by other
 \fBzfs allow\fR commands for the specified file system and its descendents.
 Sets are evaluated dynamically, so changes to a set are immediately reflected.
 Permission sets follow the same naming restrictions as ZFS file systems, but
 the name must begin with an "at sign" (\fB@\fR), and can be no more than 64
 characters long.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs unallow\fR [\fB-rldug\fR] \fIuser\fR|\fIgroup\fR[,\fIuser\fR|\fIgroup\fR]...
 [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs unallow\fR [\fB-rld\fR] \fB-e\fR|\fBeveryone\fR [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...]
 \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs unallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...]\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Removes permissions that were granted with the \fBzfs allow\fR command. No
 permissions are explicitly denied, so other permissions granted are still in
 effect. For example, if the permission is granted by an ancestor. If no
 permissions are specified, then all permissions for the specified \fIuser\fR,
 \fIgroup\fR, or everyone are removed. Specifying \fBeveryone\fR (or using the
 \fB-e\fR option) only removes the permissions that were granted to everyone,
 not all permissions for every user and group. See the \fBzfs allow\fR command
 for a description of the \fB-ldugec\fR options.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively remove the permissions from this file system and all descendents.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR
 [\fIperm\fR|@\fIsetname\fR[,\fIperm\fR|@\fIsetname\fR]...]\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Removes permissions from a permission set. If no permissions are specified,
 then all permissions are removed, thus removing the set entirely.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs hold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Adds a single reference, named with the \fItag\fR argument, to the specified
 snapshot or snapshots. Each snapshot has its own tag namespace, and tags must
 be unique within that space.
 .sp
 If a hold exists on a snapshot, attempts to destroy that snapshot by using the
 \fBzfs destroy\fR command return \fBEBUSY\fR.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Specifies that a hold with the given tag is applied recursively to the
 snapshots of all descendent file systems.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs holds\fR [\fB-r\fR] \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Lists all existing user references for the given snapshot or snapshots.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Lists the holds that are set on the named descendent snapshots, in addition to
 listing the holds on the named snapshot.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBzfs release\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Removes a single reference, named with the \fItag\fR argument, from the
 specified snapshot or snapshots. The tag must already exist for each snapshot.
 .sp
 If a hold exists on a snapshot, attempts to destroy that snapshot by using the
 \fBzfs destroy\fR command return \fBEBUSY\fR.
 .sp
 .ne 2
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively releases a hold with the given tag on the snapshots of all
 descendent file systems.
 .RE
 
 .sp
 .ne 2
 .na
 \fBzfs diff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR
 .ad
 .sp .6
 .RS 4n
 Display the difference between a snapshot of a given filesystem and another
 snapshot of that filesystem from a later time or the current contents of the
 filesystem.  The first column is a character indicating the type of change,
 the other columns indicate pathname, new pathname (in case of rename), change
 in link count, and optionally file type and/or change time.
 
 The types of change are:
 .in +2
 .nf
 -       The path has been removed
 +       The path has been created
 M       The path has been modified
 R       The path has been renamed
 .fi
 .in -2
 .sp
 .ne 2
 .na
 \fB-F\fR
 .ad
 .sp .6
 .RS 4n
 Display an indication of the type of file, in a manner similar to the \fB-F\fR
 option of \fBls\fR(1).
 .in +2
 .nf
 B       Block device
 C       Character device
 /       Directory
 >       Door
 |       Named pipe
 @       Symbolic link
 P       Event port
 =       Socket
 F       Regular file
 .fi
 .in -2
 .RE
 .sp
 .ne 2
 .na
 \fB-H\fR
 .ad
 .sp .6
 .RS 4n
 Give more parseable tab-separated output, without header lines and without arrows.
 .RE
 .sp
 .ne 2
 .na
 \fB-t\fR
 .ad
 .sp .6
 .RS 4n
 Display the path's inode change time as the first column of output.
 .RE
 
 .SH EXAMPLES
 .LP
 \fBExample 1 \fRCreating a ZFS File System Hierarchy
 .sp
 .LP
 The following commands create a file system named \fBpool/home\fR and a file
 system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for
 the parent file system, and is automatically inherited by the child file
 system.
 
 .sp
 .in +2
 .nf
 # \fBzfs create pool/home\fR
 # \fBzfs set mountpoint=/export/home pool/home\fR
 # \fBzfs create pool/home/bob\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 2 \fRCreating a ZFS Snapshot
 .sp
 .LP
 The following command creates a snapshot named \fByesterday\fR. This snapshot
 is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of the
 \fBpool/home/bob\fR file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs snapshot pool/home/bob@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 3 \fRCreating and Destroying Multiple Snapshots
 .sp
 .LP
 The following command creates snapshots named \fByesterday\fR of
 \fBpool/home\fR and all of its descendent file systems. Each snapshot is
 mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its
 file system. The second command destroys the newly created snapshots.
 
 .sp
 .in +2
 .nf
 # \fBzfs snapshot -r pool/home@yesterday\fR
 # \fBzfs destroy -r pool/home@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 4 \fRDisabling and Enabling File System Compression
 .sp
 .LP
 The following command disables the \fBcompression\fR property for all file
 systems under \fBpool/home\fR. The next command explicitly enables
 \fBcompression\fR for \fBpool/home/anne\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs set compression=off pool/home\fR
 # \fBzfs set compression=on pool/home/anne\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 5 \fRListing ZFS Datasets
 .sp
 .LP
 The following command lists all active file systems and volumes in the system.
 Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR. The
 default is \fBoff\fR. See \fBzpool\fR(1M) for more information on pool
 properties.
 
 .sp
 .in +2
 .nf
 # \fBzfs list\fR
    NAME                      USED  AVAIL  REFER  MOUNTPOINT
    pool                      450K   457G    18K  /pool
    pool/home                 315K   457G    21K  /export/home
    pool/home/anne             18K   457G    18K  /export/home/anne
    pool/home/bob             276K   457G   276K  /export/home/bob
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 6 \fRSetting a Quota on a ZFS File System
 .sp
 .LP
 The following command sets a quota of 50 Gbytes for \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs set quota=50G pool/home/bob\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 7 \fRListing ZFS Properties
 .sp
 .LP
 The following command lists all properties for \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs get all pool/home/bob\fR
 NAME           PROPERTY              VALUE                  SOURCE
 pool/home/bob  type                  filesystem             -
 pool/home/bob  creation              Tue Jul 21 15:53 2009  -
 pool/home/bob  used                  21K                    -
 pool/home/bob  available             20.0G                  -
 pool/home/bob  referenced            21K                    -
 pool/home/bob  compressratio         1.00x                  -
 pool/home/bob  mounted               yes                    -
 pool/home/bob  quota                 20G                    local
 pool/home/bob  reservation           none                   default
 pool/home/bob  recordsize            128K                   default
 pool/home/bob  mountpoint            /pool/home/bob         default
 pool/home/bob  sharenfs              off                    default
 pool/home/bob  checksum              on                     default
 pool/home/bob  compression           on                     local
 pool/home/bob  atime                 on                     default
 pool/home/bob  devices               on                     default
 pool/home/bob  exec                  on                     default
 pool/home/bob  setuid                on                     default
 pool/home/bob  readonly              off                    default
 pool/home/bob  zoned                 off                    default
 pool/home/bob  snapdir               hidden                 default
 pool/home/bob  aclmode               discard                default
 pool/home/bob  aclinherit            restricted             default
 pool/home/bob  canmount              on                     default
 pool/home/bob  shareiscsi            off                    default
 pool/home/bob  xattr                 on                     default
 pool/home/bob  copies                1                      default
 pool/home/bob  version               4                      -
 pool/home/bob  utf8only              off                    -
 pool/home/bob  normalization         none                   -
 pool/home/bob  casesensitivity       sensitive              -
 pool/home/bob  vscan                 off                    default
 pool/home/bob  nbmand                off                    default
 pool/home/bob  sharesmb              off                    default
 pool/home/bob  refquota              none                   default
 pool/home/bob  refreservation        none                   default
 pool/home/bob  primarycache          all                    default
 pool/home/bob  secondarycache        all                    default
 pool/home/bob  usedbysnapshots       0                      -
 pool/home/bob  usedbydataset         21K                    -
 pool/home/bob  usedbychildren        0                      -
 pool/home/bob  usedbyrefreservation  0                      -
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The following command gets a single property value.
 
 .sp
 .in +2
 .nf
 # \fBzfs get -H -o value compression pool/home/bob\fR
 on
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The following command lists all properties with local settings for
 \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR
 NAME           PROPERTY              VALUE
 pool/home/bob  quota                 20G
 pool/home/bob  compression           on
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 8 \fRRolling Back a ZFS File System
 .sp
 .LP
 The following command reverts the contents of \fBpool/home/anne\fR to the
 snapshot named \fByesterday\fR, deleting all intermediate snapshots.
 
 .sp
 .in +2
 .nf
 # \fBzfs rollback -r pool/home/anne@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 9 \fRCreating a ZFS Clone
 .sp
 .LP
 The following command creates a writable file system whose initial contents are
 the same as \fBpool/home/bob@yesterday\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs clone pool/home/bob@yesterday pool/clone\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 10 \fRPromoting a ZFS Clone
 .sp
 .LP
 The following commands illustrate how to test out changes to a file system, and
 then replace the original file system with the changed one, using clones, clone
 promotion, and renaming:
 
 .sp
 .in +2
 .nf
 # \fBzfs create pool/project/production\fR
   populate /pool/project/production with data
 # \fBzfs snapshot pool/project/production@today\fR
 # \fBzfs clone pool/project/production@today pool/project/beta\fR
 make changes to /pool/project/beta and test them
 # \fBzfs promote pool/project/beta\fR
 # \fBzfs rename pool/project/production pool/project/legacy\fR
 # \fBzfs rename pool/project/beta pool/project/production\fR
 once the legacy version is no longer needed, it can be destroyed
 # \fBzfs destroy pool/project/legacy\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 11 \fRInheriting ZFS Properties
 .sp
 .LP
 The following command causes \fBpool/home/bob\fR and \fBpool/home/anne\fR to
 inherit the \fBchecksum\fR property from their parent.
 
 .sp
 .in +2
 .nf
 # \fBzfs inherit checksum pool/home/bob pool/home/anne\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 12 \fRRemotely Replicating ZFS Data
 .sp
 .LP
 The following commands send a full stream and then an incremental stream to a
 remote machine, restoring them into \fBpoolB/received/fs@a\fRand
 \fBpoolB/received/fs@b\fR, respectively. \fBpoolB\fR must contain the file
 system \fBpoolB/received\fR, and must not initially contain
 \fBpoolB/received/fs\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs send pool/fs@a | \e\fR
    \fBssh host zfs receive poolB/received/fs@a\fR
 # \fBzfs send -i a pool/fs@b | ssh host \e\fR
    \fBzfs receive poolB/received/fs\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 13 \fRUsing the \fBzfs receive\fR \fB-d\fR Option
 .sp
 .LP
 The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a
 remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The
 \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from
 the name of the sent snapshot. \fBpoolB\fR must contain the file system
 \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created
 as an empty file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs send poolA/fsA/fsB@snap | \e
    ssh host zfs receive -d poolB/received\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 14 \fRSetting User Properties
 .sp
 .LP
 The following example sets the user-defined \fBcom.example:department\fR
 property for a dataset.
 
 .sp
 .in +2
 .nf
 # \fBzfs set com.example:department=12345 tank/accounting\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 15 \fRCreating a ZFS Volume as an iSCSI Target Device
 .sp
 .LP
 The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR
 target.
 
 .sp
 .in +2
 .nf
 # \fBzfs create -V 2g pool/volumes/vol1\fR
 # \fBzfs set shareiscsi=on pool/volumes/vol1\fR
 # \fBiscsitadm list target\fR
 Target: pool/volumes/vol1
  iSCSI Name:
  iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c
  Connections: 0
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For
 more information about the Solaris \fBiSCSI\fR initiator, see
 \fBiscsitadm\fR(1M).
 .LP
 \fBExample 16 \fRPerforming a Rolling Snapshot
 .sp
 .LP
 The following example shows how to maintain a history of snapshots with a
 consistent naming scheme. To keep a week's worth of snapshots, the user
 destroys the oldest snapshot, renames the remaining snapshots, and then creates
 a new snapshot, as follows:
 
 .sp
 .in +2
 .nf
 # \fBzfs destroy -r pool/users@7daysago\fR
 # \fBzfs rename -r pool/users@6daysago @7daysago\fR
 # \fBzfs rename -r pool/users@5daysago @6daysago\fR
 # \fBzfs rename -r pool/users@yesterday @5daysago\fR
 # \fBzfs rename -r pool/users@yesterday @4daysago\fR
 # \fBzfs rename -r pool/users@yesterday @3daysago\fR
 # \fBzfs rename -r pool/users@yesterday @2daysago\fR
 # \fBzfs rename -r pool/users@today @yesterday\fR
 # \fBzfs snapshot -r pool/users@today\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 17 \fRSetting \fBsharenfs\fR Property Options on a ZFS File System
 .sp
 .LP
 The following commands show how to set \fBsharenfs\fR property options to
 enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root
 access for system \fBneo\fR on the \fBtank/home\fR file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 If you are using \fBDNS\fR for host name resolution, specify the fully
 qualified hostname.
 
 .LP
 \fBExample 18 \fRDelegating ZFS Administration Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to set permissions so that user \fBcindys\fR
 can create, destroy, mount, and take snapshots on \fBtank/cindys\fR. The
 permissions on \fBtank/cindys\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
 # \fBzfs allow tank/cindys\fR
 -------------------------------------------------------------
 Local+Descendent permissions on (tank/cindys)
           user cindys create,destroy,mount,snapshot
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 Because the \fBtank/cindys\fR mount point permission is set to 755 by default,
 user \fBcindys\fR will be unable to mount file systems under \fBtank/cindys\fR.
 Set an \fBACL\fR similar to the following syntax to provide mount point access:
 .sp
 .in +2
 .nf
 # \fBchmod A+user:cindys:add_subdirectory:allow /tank/cindys\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 19 \fRDelegating Create Time Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to grant anyone in the group \fBstaff\fR to
 create file systems in \fBtank/users\fR. This syntax also allows staff members
 to destroy their own file systems, but not destroy anyone else's file system.
 The permissions on \fBtank/users\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow staff create,mount tank/users\fR
 # \fBzfs allow -c destroy tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Create time permissions on (tank/users)
           create,destroy
 Local+Descendent permissions on (tank/users)
           group staff create,mount
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 20 \fRDefining and Granting a Permission Set on a ZFS Dataset
 .sp
 .LP
 The following example shows how to define and grant a permission set on the
 \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also
 displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow -s @pset create,destroy,snapshot,mount tank/users\fR
 # \fBzfs allow staff @pset tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Permission sets on (tank/users)
         @pset create,destroy,mount,snapshot
 Create time permissions on (tank/users)
         create,destroy
 Local+Descendent permissions on (tank/users)
         group staff @pset,create,mount
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 21 \fRDelegating Property Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows to grant the ability to set quotas and reservations
 on the \fBusers/home\fR file system. The permissions on \fBusers/home\fR are
 also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow cindys quota,reservation users/home\fR
 # \fBzfs allow users/home\fR
 -------------------------------------------------------------
 Local+Descendent permissions on (users/home)
         user cindys quota,reservation
 -------------------------------------------------------------
 cindys% \fBzfs set quota=10G users/home/marks\fR
 cindys% \fBzfs get quota users/home/marks\fR
 NAME              PROPERTY  VALUE             SOURCE
 users/home/marks  quota     10G               local
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 22 \fRRemoving ZFS Delegated Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to remove the snapshot permission from the
 \fBstaff\fR group on the \fBtank/users\fR file system. The permissions on
 \fBtank/users\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs unallow staff snapshot tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Permission sets on (tank/users)
         @pset create,destroy,mount,snapshot
 Create time permissions on (tank/users)
         create,destroy
 Local+Descendent permissions on (tank/users)
         group staff @pset,create,mount
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 23\fR Showing the differences between a snapshot and a ZFS Dataset
 .sp
 .LP
 The following example shows how to see what has changed between a prior
 snapshot of a ZFS Dataset and its current state.  The \fB-F\fR option is used
 to indicate type information for the files affected.
 
 .sp
 .in +2
 .nf
 # zfs diff -F tank/test@before tank/test
 M       /       /tank/test/
 M       F       /tank/test/linked      (+1)
 R       F       /tank/test/oldname -> /tank/test/newname
 -       F       /tank/test/deleted
 +       F       /tank/test/created
 M       F       /tank/test/modified
 .fi
 .in -2
 .sp
 
 .SH EXIT STATUS
 .sp
 .LP
 The following exit values are returned:
 .sp
 .ne 2
 .na
 \fB\fB0\fR\fR
 .ad
 .sp .6
 .RS 4n
 Successful completion.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB1\fR\fR
 .ad
 .sp .6
 .RS 4n
 An error occurred.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB2\fR\fR
 .ad
 .sp .6
 .RS 4n
 Invalid command line options were specified.
 .RE
 
 .SH ATTRIBUTES
 .sp
 .LP
 See \fBattributes\fR(5) for descriptions of the following attributes:
 .sp
 
 .sp
 .TS
 box;
 c | c
 l | l .
 ATTRIBUTE TYPE	ATTRIBUTE VALUE
 _
 Interface Stability	Committed
 .TE
 
 .SH SEE ALSO
 .sp
 .LP
 \fBssh\fR(1), \fBiscsitadm\fR(1M), \fBmount\fR(1M), \fBshare\fR(1M),
 \fBsharemgr\fR(1M), \fBunshare\fR(1M), \fBzonecfg\fR(1M), \fBzpool\fR(1M),
 \fBchmod\fR(2), \fBstat\fR(2), \fBwrite\fR(2), \fBfsync\fR(3C),
 \fBdfstab\fR(4), \fBacl\fR(5), \fBattributes\fR(5)
 .sp
 .LP
 See the \fBgzip\fR(1) man page, which is not part of the SunOS man page
 collection.
 .sp
 .LP
 For information about using the \fBZFS\fR web-based management tool and other
 \fBZFS\fR features, see the \fISolaris ZFS Administration Guide\fR.
Index: vendor-sys/illumos/dist/common/zfs/zfs_prop.c
===================================================================
--- vendor-sys/illumos/dist/common/zfs/zfs_prop.c	(revision 247315)
+++ vendor-sys/illumos/dist/common/zfs/zfs_prop.c	(revision 247316)
@@ -1,622 +1,626 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/zio.h>
 #include <sys/spa.h>
 #include <sys/u8_textprep.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_znode.h>
 
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 
 #if defined(_KERNEL)
 #include <sys/systm.h>
 #else
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #endif
 
 static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
 
 /* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
 const char *zfs_userquota_prop_prefixes[] = {
 	"userused@",
 	"userquota@",
 	"groupused@",
 	"groupquota@"
 };
 
 zprop_desc_t *
 zfs_prop_get_table(void)
 {
 	return (zfs_prop_table);
 }
 
 void
 zfs_prop_init(void)
 {
 	static zprop_index_t checksum_table[] = {
 		{ "on",		ZIO_CHECKSUM_ON },
 		{ "off",	ZIO_CHECKSUM_OFF },
 		{ "fletcher2",	ZIO_CHECKSUM_FLETCHER_2 },
 		{ "fletcher4",	ZIO_CHECKSUM_FLETCHER_4 },
 		{ "sha256",	ZIO_CHECKSUM_SHA256 },
 		{ NULL }
 	};
 
 	static zprop_index_t dedup_table[] = {
 		{ "on",		ZIO_CHECKSUM_ON },
 		{ "off",	ZIO_CHECKSUM_OFF },
 		{ "verify",	ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
 		{ "sha256",	ZIO_CHECKSUM_SHA256 },
 		{ "sha256,verify",
 				ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
 		{ NULL }
 	};
 
 	static zprop_index_t compress_table[] = {
 		{ "on",		ZIO_COMPRESS_ON },
 		{ "off",	ZIO_COMPRESS_OFF },
 		{ "lzjb",	ZIO_COMPRESS_LZJB },
 		{ "gzip",	ZIO_COMPRESS_GZIP_6 },	/* gzip default */
 		{ "gzip-1",	ZIO_COMPRESS_GZIP_1 },
 		{ "gzip-2",	ZIO_COMPRESS_GZIP_2 },
 		{ "gzip-3",	ZIO_COMPRESS_GZIP_3 },
 		{ "gzip-4",	ZIO_COMPRESS_GZIP_4 },
 		{ "gzip-5",	ZIO_COMPRESS_GZIP_5 },
 		{ "gzip-6",	ZIO_COMPRESS_GZIP_6 },
 		{ "gzip-7",	ZIO_COMPRESS_GZIP_7 },
 		{ "gzip-8",	ZIO_COMPRESS_GZIP_8 },
 		{ "gzip-9",	ZIO_COMPRESS_GZIP_9 },
 		{ "zle",	ZIO_COMPRESS_ZLE },
 		{ "lz4",	ZIO_COMPRESS_LZ4 },
 		{ NULL }
 	};
 
 	static zprop_index_t snapdir_table[] = {
 		{ "hidden",	ZFS_SNAPDIR_HIDDEN },
 		{ "visible",	ZFS_SNAPDIR_VISIBLE },
 		{ NULL }
 	};
 
 	static zprop_index_t acl_mode_table[] = {
 		{ "discard",	ZFS_ACL_DISCARD },
 		{ "groupmask",	ZFS_ACL_GROUPMASK },
 		{ "passthrough", ZFS_ACL_PASSTHROUGH },
 		{ "restricted", ZFS_ACL_RESTRICTED },
 		{ NULL }
 	};
 
 	static zprop_index_t acl_inherit_table[] = {
 		{ "discard",	ZFS_ACL_DISCARD },
 		{ "noallow",	ZFS_ACL_NOALLOW },
 		{ "restricted",	ZFS_ACL_RESTRICTED },
 		{ "passthrough", ZFS_ACL_PASSTHROUGH },
 		{ "secure",	ZFS_ACL_RESTRICTED }, /* bkwrd compatability */
 		{ "passthrough-x", ZFS_ACL_PASSTHROUGH_X },
 		{ NULL }
 	};
 
 	static zprop_index_t case_table[] = {
 		{ "sensitive",		ZFS_CASE_SENSITIVE },
 		{ "insensitive",	ZFS_CASE_INSENSITIVE },
 		{ "mixed",		ZFS_CASE_MIXED },
 		{ NULL }
 	};
 
 	static zprop_index_t copies_table[] = {
 		{ "1",		1 },
 		{ "2",		2 },
 		{ "3",		3 },
 		{ NULL }
 	};
 
 	/*
 	 * Use the unique flags we have to send to u8_strcmp() and/or
 	 * u8_textprep() to represent the various normalization property
 	 * values.
 	 */
 	static zprop_index_t normalize_table[] = {
 		{ "none",	0 },
 		{ "formD",	U8_TEXTPREP_NFD },
 		{ "formKC",	U8_TEXTPREP_NFKC },
 		{ "formC",	U8_TEXTPREP_NFC },
 		{ "formKD",	U8_TEXTPREP_NFKD },
 		{ NULL }
 	};
 
 	static zprop_index_t version_table[] = {
 		{ "1",		1 },
 		{ "2",		2 },
 		{ "3",		3 },
 		{ "4",		4 },
 		{ "5",		5 },
 		{ "current",	ZPL_VERSION },
 		{ NULL }
 	};
 
 	static zprop_index_t boolean_table[] = {
 		{ "off",	0 },
 		{ "on",		1 },
 		{ NULL }
 	};
 
 	static zprop_index_t logbias_table[] = {
 		{ "latency",	ZFS_LOGBIAS_LATENCY },
 		{ "throughput",	ZFS_LOGBIAS_THROUGHPUT },
 		{ NULL }
 	};
 
 	static zprop_index_t canmount_table[] = {
 		{ "off",	ZFS_CANMOUNT_OFF },
 		{ "on",		ZFS_CANMOUNT_ON },
 		{ "noauto",	ZFS_CANMOUNT_NOAUTO },
 		{ NULL }
 	};
 
 	static zprop_index_t cache_table[] = {
 		{ "none",	ZFS_CACHE_NONE },
 		{ "metadata",	ZFS_CACHE_METADATA },
 		{ "all",	ZFS_CACHE_ALL },
 		{ NULL }
 	};
 
 	static zprop_index_t sync_table[] = {
 		{ "standard",	ZFS_SYNC_STANDARD },
 		{ "always",	ZFS_SYNC_ALWAYS },
 		{ "disabled",	ZFS_SYNC_DISABLED },
 		{ NULL }
 	};
 
 	/* inherit index properties */
 	zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "standard | always | disabled", "SYNC",
 	    sync_table);
 	zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
 	    ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME,
 	    "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
 	    checksum_table);
 	zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "on | off | verify | sha256[,verify]", "DEDUP",
 	    dedup_table);
 	zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
 	    ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4",
 	    "COMPRESS", compress_table);
 	zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "hidden | visible", "SNAPDIR", snapdir_table);
 	zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "discard | groupmask | passthrough | restricted", "ACLMODE",
 	    acl_mode_table);
 	zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
 	    ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "discard | noallow | restricted | passthrough | passthrough-x",
 	    "ACLINHERIT", acl_inherit_table);
 	zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "1 | 2 | 3", "COPIES", copies_table);
 	zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
 	    ZFS_CACHE_ALL, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
 	    "all | none | metadata", "PRIMARYCACHE", cache_table);
 	zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
 	    ZFS_CACHE_ALL, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
 	    "all | none | metadata", "SECONDARYCACHE", cache_table);
 	zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "latency | throughput", "LOGBIAS", logbias_table);
 
 	/* inherit index (boolean) properties */
 	zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
 	zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
 	zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
 	    boolean_table);
 	zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
 	    boolean_table);
 
 	/* default index properties */
 	zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
 	    "1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table);
 	zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
 	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
 	    "CANMOUNT", canmount_table);
 
 	/* readonly index (boolean) properties */
 	zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
 	zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
 	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
 	    boolean_table);
 
 	/* set once index properties */
 	zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
 	    PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
 	    "none | formC | formD | formKC | formKD", "NORMALIZATION",
 	    normalize_table);
 	zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
 	    ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_SNAPSHOT,
 	    "sensitive | insensitive | mixed", "CASE", case_table);
 
 	/* set once index (boolean) properties */
 	zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
 	    "on | off", "UTF8ONLY", boolean_table);
 
 	/* string properties */
 	zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
 	zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY,
 	    ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES");
 	zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
 	    "MOUNTPOINT");
 	zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
 	    "SHARENFS");
 	zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
 	    ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
 	zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "on | off | sharemgr(1M) options", "SHARESMB");
 	zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
 	    ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
 	    "<sensitivity label>", "MLSLABEL");
 
 	/* readonly number properties */
 	zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
 	    ZFS_TYPE_DATASET, "<size>", "USED");
 	zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
 	zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
 	zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
 	    PROP_READONLY, ZFS_TYPE_DATASET,
 	    "<1.00x or higher if compressed>", "RATIO");
 	zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
 	    PROP_READONLY, ZFS_TYPE_DATASET,
 	    "<1.00x or higher if compressed>", "REFRATIO");
 	zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
 	    ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
 	    ZFS_TYPE_VOLUME, "512 to 128k, power of 2",	"VOLBLOCK");
 	zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
 	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
 	    "USEDSNAP");
 	zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
 	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
 	    "USEDDS");
 	zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
 	    PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
 	    "USEDCHILD");
 	zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
 	    PROP_READONLY,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
 	zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
 	    ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
 	zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY,
 	    ZFS_TYPE_DATASET, "<size>", "WRITTEN");
+	zprop_register_number(ZFS_PROP_LOGICALUSED, "logicalused", 0,
+	    PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LUSED");
+	zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced",
+	    0, PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LREFER");
 
 	/* default number properties */
 	zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
 	    ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
 	zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
 	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "<size> | none", "RESERV");
 	zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
 	    ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
 	zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
 	    ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
 	zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
 	    PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "<size> | none", "REFRESERV");
 
 	/* inherit number properties */
 	zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
 	    SPA_MAXBLOCKSIZE, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
 
 	/* hidden properties */
 	zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
 	zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
 	zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
 	zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
 	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
 	zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
 	    PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
 	    "STMF_SBD_LU");
 	zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
 	zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
 	    PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
 	    "USERACCOUNTING");
 	zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
 	zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
 	    PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
 
 	/* oddball properties */
 	zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
 	    NULL, PROP_READONLY, ZFS_TYPE_DATASET,
 	    "<date>", "CREATION", B_FALSE, B_TRUE, NULL);
 }
 
 boolean_t
 zfs_prop_delegatable(zfs_prop_t prop)
 {
 	zprop_desc_t *pd = &zfs_prop_table[prop];
 
 	/* The mlslabel property is never delegatable. */
 	if (prop == ZFS_PROP_MLSLABEL)
 		return (B_FALSE);
 
 	return (pd->pd_attr != PROP_READONLY);
 }
 
 /*
  * Given a zfs dataset property name, returns the corresponding property ID.
  */
 zfs_prop_t
 zfs_name_to_prop(const char *propname)
 {
 	return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
 }
 
 /*
  * For user property names, we allow all lowercase alphanumeric characters, plus
  * a few useful punctuation characters.
  */
 static int
 valid_char(char c)
 {
 	return ((c >= 'a' && c <= 'z') ||
 	    (c >= '0' && c <= '9') ||
 	    c == '-' || c == '_' || c == '.' || c == ':');
 }
 
 /*
  * Returns true if this is a valid user-defined property (one with a ':').
  */
 boolean_t
 zfs_prop_user(const char *name)
 {
 	int i;
 	char c;
 	boolean_t foundsep = B_FALSE;
 
 	for (i = 0; i < strlen(name); i++) {
 		c = name[i];
 		if (!valid_char(c))
 			return (B_FALSE);
 		if (c == ':')
 			foundsep = B_TRUE;
 	}
 
 	if (!foundsep)
 		return (B_FALSE);
 
 	return (B_TRUE);
 }
 
 /*
  * Returns true if this is a valid userspace-type property (one with a '@').
  * Note that after the @, any character is valid (eg, another @, for SID
  * user@domain).
  */
 boolean_t
 zfs_prop_userquota(const char *name)
 {
 	zfs_userquota_prop_t prop;
 
 	for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
 		if (strncmp(name, zfs_userquota_prop_prefixes[prop],
 		    strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
 			return (B_TRUE);
 		}
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Returns true if this is a valid written@ property.
  * Note that after the @, any character is valid (eg, another @, for
  * written@pool/fs@origin).
  */
 boolean_t
 zfs_prop_written(const char *name)
 {
 	static const char *prefix = "written@";
 	return (strncmp(name, prefix, strlen(prefix)) == 0);
 }
 
 /*
  * Tables of index types, plus functions to convert between the user view
  * (strings) and internal representation (uint64_t).
  */
 int
 zfs_prop_string_to_index(zfs_prop_t prop, const char *string, uint64_t *index)
 {
 	return (zprop_string_to_index(prop, string, index, ZFS_TYPE_DATASET));
 }
 
 int
 zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
 {
 	return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
 }
 
 uint64_t
 zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
 {
 	return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
 }
 
 /*
  * Returns TRUE if the property applies to any of the given dataset types.
  */
 boolean_t
 zfs_prop_valid_for_type(int prop, zfs_type_t types)
 {
 	return (zprop_valid_for_type(prop, types));
 }
 
 zprop_type_t
 zfs_prop_get_type(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_proptype);
 }
 
 /*
  * Returns TRUE if the property is readonly.
  */
 boolean_t
 zfs_prop_readonly(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_attr == PROP_READONLY ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
 }
 
 /*
  * Returns TRUE if the property is only allowed to be set once.
  */
 boolean_t
 zfs_prop_setonce(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_attr == PROP_ONETIME);
 }
 
 const char *
 zfs_prop_default_string(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_strdefault);
 }
 
 uint64_t
 zfs_prop_default_numeric(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_numdefault);
 }
 
 /*
  * Given a dataset property ID, returns the corresponding name.
  * Assuming the zfs dataset property ID is valid.
  */
 const char *
 zfs_prop_to_name(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_name);
 }
 
 /*
  * Returns TRUE if the property is inheritable.
  */
 boolean_t
 zfs_prop_inheritable(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_attr == PROP_INHERIT ||
 	    zfs_prop_table[prop].pd_attr == PROP_ONETIME);
 }
 
 #ifndef _KERNEL
 
 /*
  * Returns a string describing the set of acceptable values for the given
  * zfs property, or NULL if it cannot be set.
  */
 const char *
 zfs_prop_values(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_values);
 }
 
 /*
  * Returns TRUE if this property is a string type.  Note that index types
  * (compression, checksum) are treated as strings in userland, even though they
  * are stored numerically on disk.
  */
 int
 zfs_prop_is_string(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_proptype == PROP_TYPE_STRING ||
 	    zfs_prop_table[prop].pd_proptype == PROP_TYPE_INDEX);
 }
 
 /*
  * Returns the column header for the given property.  Used only in
  * 'zfs list -o', but centralized here with the other property information.
  */
 const char *
 zfs_prop_column_name(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_colname);
 }
 
 /*
  * Returns whether the given property should be displayed right-justified for
  * 'zfs list'.
  */
 boolean_t
 zfs_prop_align_right(zfs_prop_t prop)
 {
 	return (zfs_prop_table[prop].pd_rightalign);
 }
 
 #endif
Index: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c
===================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c	(revision 247315)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dataset.c	(revision 247316)
@@ -1,4290 +1,4292 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/arc.h>
 #include <sys/zio.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 #include <sys/unique.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dsl_deadlist.h>
 
 static char *dsl_reaper = "the grim reaper";
 
 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
 
 #define	SWITCH64(x, y) \
 	{ \
 		uint64_t __tmp = (x); \
 		(x) = (y); \
 		(y) = __tmp; \
 	}
 
 #define	DS_REF_MAX	(1ULL << 62)
 
 #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
 
 #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
 
 
 /*
  * Figure out how much of this delta should be propogated to the dsl_dir
  * layer.  If there's a refreservation, that space has already been
  * partially accounted for in our ancestors.
  */
 static int64_t
 parent_delta(dsl_dataset_t *ds, int64_t delta)
 {
 	uint64_t old_bytes, new_bytes;
 
 	if (ds->ds_reserved == 0)
 		return (delta);
 
 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
 
 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
 	return (new_bytes - old_bytes);
 }
 
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 	int64_t delta;
 
 	dprintf_bp(bp, "ds=%p", ds);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* It could have been compressed away to nothing */
 	if (BP_IS_HOLE(bp))
 		return;
 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 	if (ds == NULL) {
 		dsl_pool_mos_diduse_space(tx->tx_pool,
 		    used, compressed, uncompressed);
 		return;
 	}
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	delta = parent_delta(ds, used);
 	ds->ds_phys->ds_referenced_bytes += used;
 	ds->ds_phys->ds_compressed_bytes += compressed;
 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 	ds->ds_phys->ds_unique_bytes += used;
 	mutex_exit(&ds->ds_lock);
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
 	    compressed, uncompressed, tx);
 	dsl_dir_transfer_space(ds->ds_dir, used - delta,
 	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
 }
 
 int
 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
     boolean_t async)
 {
 	if (BP_IS_HOLE(bp))
 		return (0);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(bp->blk_birth <= tx->tx_txg);
 
 	int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 	int compressed = BP_GET_PSIZE(bp);
 	int uncompressed = BP_GET_UCSIZE(bp);
 
 	ASSERT(used > 0);
 	if (ds == NULL) {
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 		dsl_pool_mos_diduse_space(tx->tx_pool,
 		    -used, -compressed, -uncompressed);
 		return (used);
 	}
 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 
 	ASSERT(!dsl_dataset_is_snapshot(ds));
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
 		int64_t delta;
 
 		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 
 		mutex_enter(&ds->ds_dir->dd_lock);
 		mutex_enter(&ds->ds_lock);
 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
 		    !DS_UNIQUE_IS_ACCURATE(ds));
 		delta = parent_delta(ds, -used);
 		ds->ds_phys->ds_unique_bytes -= used;
 		mutex_exit(&ds->ds_lock);
 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 		    delta, -compressed, -uncompressed, tx);
 		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 		mutex_exit(&ds->ds_dir->dd_lock);
 	} else {
 		dprintf_bp(bp, "putting on dead list: %s", "");
 		if (async) {
 			/*
 			 * We are here as part of zio's write done callback,
 			 * which means we're a zio interrupt thread.  We can't
 			 * call dsl_deadlist_insert() now because it may block
 			 * waiting for I/O.  Instead, put bp on the deferred
 			 * queue and let dsl_pool_sync() finish the job.
 			 */
 			bplist_append(&ds->ds_pending_deadlist, bp);
 		} else {
 			dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
 		}
 		ASSERT3U(ds->ds_prev->ds_object, ==,
 		    ds->ds_phys->ds_prev_snap_obj);
 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
 		    ds->ds_object && bp->blk_birth >
 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 			mutex_enter(&ds->ds_prev->ds_lock);
 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
 			mutex_exit(&ds->ds_prev->ds_lock);
 		}
 		if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 			dsl_dir_transfer_space(ds->ds_dir, used,
 			    DD_USED_HEAD, DD_USED_SNAP, tx);
 		}
 	}
 	mutex_enter(&ds->ds_lock);
 	ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
 	ds->ds_phys->ds_referenced_bytes -= used;
 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 	ds->ds_phys->ds_compressed_bytes -= compressed;
 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
 	mutex_exit(&ds->ds_lock);
 
 	return (used);
 }
 
 uint64_t
 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
 {
 	uint64_t trysnap = 0;
 
 	if (ds == NULL)
 		return (0);
 	/*
 	 * The snapshot creation could fail, but that would cause an
 	 * incorrect FALSE return, which would only result in an
 	 * overestimation of the amount of space that an operation would
 	 * consume, which is OK.
 	 *
 	 * There's also a small window where we could miss a pending
 	 * snapshot, because we could set the sync task in the quiescing
 	 * phase.  So this should only be used as a guess.
 	 */
 	if (ds->ds_trysnap_txg >
 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 		trysnap = ds->ds_trysnap_txg;
 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 }
 
 boolean_t
 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
     uint64_t blk_birth)
 {
 	if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
 		return (B_FALSE);
 
 	ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 
 	return (B_TRUE);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_evict(dmu_buf_t *db, void *dsv)
 {
 	dsl_dataset_t *ds = dsv;
 
 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
 
 	unique_remove(ds->ds_fsid_guid);
 
 	if (ds->ds_objset != NULL)
 		dmu_objset_evict(ds->ds_objset);
 
 	if (ds->ds_prev) {
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
 		ds->ds_prev = NULL;
 	}
 
 	bplist_destroy(&ds->ds_pending_deadlist);
 	if (db != NULL) {
 		dsl_deadlist_close(&ds->ds_deadlist);
 	} else {
 		ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
 		ASSERT(!ds->ds_deadlist.dl_oldfmt);
 	}
 	if (ds->ds_dir)
 		dsl_dir_close(ds->ds_dir, ds);
 
 	ASSERT(!list_link_active(&ds->ds_synced_link));
 
 	mutex_destroy(&ds->ds_lock);
 	mutex_destroy(&ds->ds_recvlock);
 	mutex_destroy(&ds->ds_opening_lock);
 	rw_destroy(&ds->ds_rwlock);
 	cv_destroy(&ds->ds_exclusive_cv);
 
 	kmem_free(ds, sizeof (dsl_dataset_t));
 }
 
 static int
 dsl_dataset_get_snapname(dsl_dataset_t *ds)
 {
 	dsl_dataset_phys_t *headphys;
 	int err;
 	dmu_buf_t *headdbuf;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 
 	if (ds->ds_snapname[0])
 		return (0);
 	if (ds->ds_phys->ds_next_snap_obj == 0)
 		return (0);
 
 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
 	    FTAG, &headdbuf);
 	if (err)
 		return (err);
 	headphys = headdbuf->db_data;
 	err = zap_value_search(dp->dp_meta_objset,
 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
 	dmu_buf_rele(headdbuf, FTAG);
 	return (err);
 }
 
 static int
 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 	matchtype_t mt;
 	int err;
 
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_FIRST;
 	else
 		mt = MT_EXACT;
 
 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 	    value, mt, NULL, 0, NULL);
 	if (err == ENOTSUP && mt == MT_FIRST)
 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
 	return (err);
 }
 
 static int
 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 	matchtype_t mt;
 	int err;
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_FIRST;
 	else
 		mt = MT_EXACT;
 
 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
 	if (err == ENOTSUP && mt == MT_FIRST)
 		err = zap_remove(mos, snapobj, name, tx);
 	return (err);
 }
 
 static int
 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **dsp)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	dmu_buf_t *dbuf;
 	dsl_dataset_t *ds;
 	int err;
 	dmu_object_info_t doi;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 
 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 	if (err)
 		return (err);
 
 	/* Make sure dsobj has the correct object type. */
 	dmu_object_info_from_db(dbuf, &doi);
 	if (doi.doi_type != DMU_OT_DSL_DATASET)
 		return (EINVAL);
 
 	ds = dmu_buf_get_user(dbuf);
 	if (ds == NULL) {
 		dsl_dataset_t *winner = NULL;
 
 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
 		ds->ds_dbuf = dbuf;
 		ds->ds_object = dsobj;
 		ds->ds_phys = dbuf->db_data;
 
 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 
 		rw_init(&ds->ds_rwlock, 0, 0, 0);
 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 
 		bplist_create(&ds->ds_pending_deadlist);
 		dsl_deadlist_open(&ds->ds_deadlist,
 		    mos, ds->ds_phys->ds_deadlist_obj);
 
 		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 		    offsetof(dmu_sendarg_t, dsa_link));
 
 		if (err == 0) {
 			err = dsl_dir_open_obj(dp,
 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
 		}
 		if (err) {
 			mutex_destroy(&ds->ds_lock);
 			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
 			rw_destroy(&ds->ds_rwlock);
 			cv_destroy(&ds->ds_exclusive_cv);
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			kmem_free(ds, sizeof (dsl_dataset_t));
 			dmu_buf_rele(dbuf, tag);
 			return (err);
 		}
 
 		if (!dsl_dataset_is_snapshot(ds)) {
 			ds->ds_snapname[0] = '\0';
 			if (ds->ds_phys->ds_prev_snap_obj) {
 				err = dsl_dataset_get_ref(dp,
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds, &ds->ds_prev);
 			}
 		} else {
 			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 				err = dsl_dataset_get_snapname(ds);
 			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
 				err = zap_count(
 				    ds->ds_dir->dd_pool->dp_meta_objset,
 				    ds->ds_phys->ds_userrefs_obj,
 				    &ds->ds_userrefs);
 			}
 		}
 
 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
 			/*
 			 * In sync context, we're called with either no lock
 			 * or with the write lock.  If we're not syncing,
 			 * we're always called with the read lock held.
 			 */
 			boolean_t need_lock =
 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
 			    dsl_pool_sync_context(dp);
 
 			if (need_lock)
 				rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 			err = dsl_prop_get_ds(ds,
 			    "refreservation", sizeof (uint64_t), 1,
 			    &ds->ds_reserved, NULL);
 			if (err == 0) {
 				err = dsl_prop_get_ds(ds,
 				    "refquota", sizeof (uint64_t), 1,
 				    &ds->ds_quota, NULL);
 			}
 
 			if (need_lock)
 				rw_exit(&dp->dp_config_rwlock);
 		} else {
 			ds->ds_reserved = ds->ds_quota = 0;
 		}
 
 		if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds,
 		    &ds->ds_phys, dsl_dataset_evict)) != NULL) {
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			if (ds->ds_prev)
 				dsl_dataset_drop_ref(ds->ds_prev, ds);
 			dsl_dir_close(ds->ds_dir, ds);
 			mutex_destroy(&ds->ds_lock);
 			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
 			rw_destroy(&ds->ds_rwlock);
 			cv_destroy(&ds->ds_exclusive_cv);
 			kmem_free(ds, sizeof (dsl_dataset_t));
 			if (err) {
 				dmu_buf_rele(dbuf, tag);
 				return (err);
 			}
 			ds = winner;
 		} else {
 			ds->ds_fsid_guid =
 			    unique_insert(ds->ds_phys->ds_fsid_guid);
 		}
 	}
 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
 	mutex_enter(&ds->ds_lock);
 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
 		mutex_exit(&ds->ds_lock);
 		dmu_buf_rele(ds->ds_dbuf, tag);
 		return (ENOENT);
 	}
 	mutex_exit(&ds->ds_lock);
 	*dsp = ds;
 	return (0);
 }
 
 static int
 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
 	/*
 	 * In syncing context we don't want the rwlock lock: there
 	 * may be an existing writer waiting for sync phase to
 	 * finish.  We don't need to worry about such writers, since
 	 * sync phase is single-threaded, so the writer can't be
 	 * doing anything while we are active.
 	 */
 	if (dsl_pool_sync_context(dp)) {
 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 		return (0);
 	}
 
 	/*
 	 * Normal users will hold the ds_rwlock as a READER until they
 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
 	 * drop their READER lock after they set the ds_owner field.
 	 *
 	 * If the dataset is being destroyed, the destroy thread will
 	 * obtain a WRITER lock for exclusive access after it's done its
 	 * open-context work and then change the ds_owner to
 	 * dsl_reaper once destruction is assured.  So threads
 	 * may block here temporarily, until the "destructability" of
 	 * the dataset is determined.
 	 */
 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
 	mutex_enter(&ds->ds_lock);
 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
 		rw_exit(&dp->dp_config_rwlock);
 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
 		if (DSL_DATASET_IS_DESTROYED(ds)) {
 			mutex_exit(&ds->ds_lock);
 			dsl_dataset_drop_ref(ds, tag);
 			rw_enter(&dp->dp_config_rwlock, RW_READER);
 			return (ENOENT);
 		}
 		/*
 		 * The dp_config_rwlock lives above the ds_lock. And
 		 * we need to check DSL_DATASET_IS_DESTROYED() while
 		 * holding the ds_lock, so we have to drop and reacquire
 		 * the ds_lock here.
 		 */
 		mutex_exit(&ds->ds_lock);
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		mutex_enter(&ds->ds_lock);
 	}
 	mutex_exit(&ds->ds_lock);
 	return (0);
 }
 
 int
 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
 
 	if (err)
 		return (err);
 	return (dsl_dataset_hold_ref(*dsp, tag));
 }
 
 int
 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
     void *tag, dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 	if (err)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 		dsl_dataset_rele(*dsp, tag);
 		*dsp = NULL;
 		return (EBUSY);
 	}
 	return (0);
 }
 
 int
 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
 {
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	const char *snapname;
 	uint64_t obj;
 	int err = 0;
 
 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
 	if (err)
 		return (err);
 
 	dp = dd->dd_pool;
 	obj = dd->dd_phys->dd_head_dataset_obj;
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	if (obj)
 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
 	else
 		err = ENOENT;
 	if (err)
 		goto out;
 
 	err = dsl_dataset_hold_ref(*dsp, tag);
 
 	/* we may be looking for a snapshot */
 	if (err == 0 && snapname != NULL) {
 		dsl_dataset_t *ds = NULL;
 
 		if (*snapname++ != '@') {
 			dsl_dataset_rele(*dsp, tag);
 			err = ENOENT;
 			goto out;
 		}
 
 		dprintf("looking for snapshot '%s'\n", snapname);
 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
 		if (err == 0)
 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
 		dsl_dataset_rele(*dsp, tag);
 
 		ASSERT3U((err == 0), ==, (ds != NULL));
 
 		if (ds) {
 			mutex_enter(&ds->ds_lock);
 			if (ds->ds_snapname[0] == 0)
 				(void) strlcpy(ds->ds_snapname, snapname,
 				    sizeof (ds->ds_snapname));
 			mutex_exit(&ds->ds_lock);
 			err = dsl_dataset_hold_ref(ds, tag);
 			*dsp = err ? NULL : ds;
 		}
 	}
 out:
 	rw_exit(&dp->dp_config_rwlock);
 	dsl_dir_close(dd, FTAG);
 	return (err);
 }
 
 int
 dsl_dataset_own(const char *name, boolean_t inconsistentok,
     void *tag, dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_hold(name, tag, dsp);
 	if (err)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 		dsl_dataset_rele(*dsp, tag);
 		return (EBUSY);
 	}
 	return (0);
 }
 
 void
 dsl_dataset_name(dsl_dataset_t *ds, char *name)
 {
 	if (ds == NULL) {
 		(void) strcpy(name, "mos");
 	} else {
 		dsl_dir_name(ds->ds_dir, name);
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			(void) strcat(name, "@");
 			/*
 			 * We use a "recursive" mutex so that we
 			 * can call dprintf_ds() with ds_lock held.
 			 */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
 				mutex_enter(&ds->ds_lock);
 				(void) strcat(name, ds->ds_snapname);
 				mutex_exit(&ds->ds_lock);
 			} else {
 				(void) strcat(name, ds->ds_snapname);
 			}
 		}
 	}
 }
 
 static int
 dsl_dataset_namelen(dsl_dataset_t *ds)
 {
 	int result;
 
 	if (ds == NULL) {
 		result = 3;	/* "mos" */
 	} else {
 		result = dsl_dir_namelen(ds->ds_dir);
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			++result;	/* adding one for the @-sign */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
 				mutex_enter(&ds->ds_lock);
 				result += strlen(ds->ds_snapname);
 				mutex_exit(&ds->ds_lock);
 			} else {
 				result += strlen(ds->ds_snapname);
 			}
 		}
 	}
 
 	return (result);
 }
 
 void
 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
 {
 	dmu_buf_rele(ds->ds_dbuf, tag);
 }
 
 void
 dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 {
 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
 		rw_exit(&ds->ds_rwlock);
 	}
 	dsl_dataset_drop_ref(ds, tag);
 }
 
 void
 dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 {
 	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = NULL;
 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
 		rw_exit(&ds->ds_rwlock);
 		cv_broadcast(&ds->ds_exclusive_cv);
 	}
 	mutex_exit(&ds->ds_lock);
 	if (ds->ds_dbuf)
 		dsl_dataset_drop_ref(ds, tag);
 	else
 		dsl_dataset_evict(NULL, ds);
 }
 
 boolean_t
 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 {
 	boolean_t gotit = FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_owner == NULL &&
 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
 		ds->ds_owner = tag;
 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 			rw_exit(&ds->ds_rwlock);
 		gotit = TRUE;
 	}
 	mutex_exit(&ds->ds_lock);
 	return (gotit);
 }
 
 void
 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
 {
 	ASSERT3P(owner, ==, ds->ds_owner);
 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
 		rw_enter(&ds->ds_rwlock, RW_WRITER);
 }
 
 uint64_t
 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj;
 	objset_t *mos = dp->dp_meta_objset;
 
 	if (origin == NULL)
 		origin = dp->dp_origin_snap;
 
 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
 	dsphys->ds_dir_obj = dd->dd_object;
 	dsphys->ds_flags = flags;
 	dsphys->ds_fsid_guid = unique_create();
 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 	    sizeof (dsphys->ds_guid));
 	dsphys->ds_snapnames_zapobj =
 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
 	    DMU_OT_NONE, 0, tx);
 	dsphys->ds_creation_time = gethrestime_sec();
 	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
 
 	if (origin == NULL) {
 		dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 	} else {
 		dsl_dataset_t *ohds;
 
 		dsphys->ds_prev_snap_obj = origin->ds_object;
 		dsphys->ds_prev_snap_txg =
 		    origin->ds_phys->ds_creation_txg;
 		dsphys->ds_referenced_bytes =
 		    origin->ds_phys->ds_referenced_bytes;
 		dsphys->ds_compressed_bytes =
 		    origin->ds_phys->ds_compressed_bytes;
 		dsphys->ds_uncompressed_bytes =
 		    origin->ds_phys->ds_uncompressed_bytes;
 		dsphys->ds_bp = origin->ds_phys->ds_bp;
 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
 
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
 		origin->ds_phys->ds_num_children++;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 		    origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
 		dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
 		    dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
 		dsl_dataset_rele(ohds, FTAG);
 
 		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
 			if (origin->ds_phys->ds_next_clones_obj == 0) {
 				origin->ds_phys->ds_next_clones_obj =
 				    zap_create(mos,
 				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 			}
 			VERIFY(0 == zap_add_int(mos,
 			    origin->ds_phys->ds_next_clones_obj,
 			    dsobj, tx));
 		}
 
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		dd->dd_phys->dd_origin_obj = origin->ds_object;
 		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 			if (origin->ds_dir->dd_phys->dd_clones == 0) {
 				dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 				origin->ds_dir->dd_phys->dd_clones =
 				    zap_create(mos,
 				    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 			}
 			VERIFY3U(0, ==, zap_add_int(mos,
 			    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
 		}
 	}
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
 	dmu_buf_rele(dbuf, FTAG);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_head_dataset_obj = dsobj;
 
 	return (dsobj);
 }
 
 uint64_t
 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = pdd->dd_pool;
 	uint64_t dsobj, ddobj;
 	dsl_dir_t *dd;
 
 	ASSERT(lastname[0] != '@');
 
 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
 
 	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
 
 	dsl_deleg_set_create_perms(dd, tx, cr);
 
 	dsl_dir_close(dd, FTAG);
 
 	/*
 	 * If we are creating a clone, make sure we zero out any stale
 	 * data from the origin snapshots zil header.
 	 */
 	if (origin != NULL) {
 		dsl_dataset_t *ds;
 		objset_t *os;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
 		bzero(&os->os_zil_header, sizeof (os->os_zil_header));
 		dsl_dataset_dirty(ds, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
 	return (dsobj);
 }
 
 /*
  * The snapshots must all be in the same pool.
  */
 int
 dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
     nvlist_t *errlist)
 {
 	int err;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	nvpair_t *pair;
 	dsl_sync_task_group_t *dstg;
 
 	pair = nvlist_next_nvpair(snaps, NULL);
 	if (pair == NULL)
 		return (0);
 
 	err = spa_open(nvpair_name(pair), &spa, FTAG);
 	if (err)
 		return (err);
 	dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		dsl_dataset_t *ds;
 
 		err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 		if (err == 0) {
 			struct dsl_ds_destroyarg *dsda;
 
 			dsl_dataset_make_exclusive(ds, dstg);
 			dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
 			    KM_SLEEP);
 			dsda->ds = ds;
 			dsda->defer = defer;
 			dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 			    dsl_dataset_destroy_sync, dsda, dstg, 0);
 		} else if (err == ENOENT) {
 			err = 0;
 		} else {
 			fnvlist_add_int32(errlist, nvpair_name(pair), err);
 			break;
 		}
 	}
 
 	if (err == 0)
 		err = dsl_sync_task_group_wait(dstg);
 
 	for (dst = list_head(&dstg->dstg_tasks); dst;
 	    dst = list_next(&dstg->dstg_tasks, dst)) {
 		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
 		dsl_dataset_t *ds = dsda->ds;
 
 		/*
 		 * Return the snapshots that triggered the error.
 		 */
 		if (dst->dst_err != 0) {
 			char name[ZFS_MAXNAMELEN];
 			dsl_dataset_name(ds, name);
 			fnvlist_add_int32(errlist, name, dst->dst_err);
 		}
 		ASSERT3P(dsda->rm_origin, ==, NULL);
 		dsl_dataset_disown(ds, dstg);
 		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
 	}
 
 	dsl_sync_task_group_destroy(dstg);
 	spa_close(spa, FTAG);
 	return (err);
 
 }
 
 static boolean_t
 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
 {
 	boolean_t might_destroy = B_FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
 	    DS_IS_DEFER_DESTROY(ds))
 		might_destroy = B_TRUE;
 	mutex_exit(&ds->ds_lock);
 
 	return (might_destroy);
 }
 
 /*
  * If we're removing a clone, and these three conditions are true:
  *	1) the clone's origin has no other children
  *	2) the clone's origin has no user references
  *	3) the clone's origin has been marked for deferred destruction
  * Then, prepare to remove the origin as part of this sync task group.
  */
 static int
 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
 {
 	dsl_dataset_t *ds = dsda->ds;
 	dsl_dataset_t *origin = ds->ds_prev;
 
 	if (dsl_dataset_might_destroy_origin(origin)) {
 		char *name;
 		int namelen;
 		int error;
 
 		namelen = dsl_dataset_namelen(origin) + 1;
 		name = kmem_alloc(namelen, KM_SLEEP);
 		dsl_dataset_name(origin, name);
 #ifdef _KERNEL
 		error = zfs_unmount_snap(name, NULL);
 		if (error) {
 			kmem_free(name, namelen);
 			return (error);
 		}
 #endif
 		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
 		kmem_free(name, namelen);
 		if (error)
 			return (error);
 		dsda->rm_origin = origin;
 		dsl_dataset_make_exclusive(origin, tag);
 	}
 
 	return (0);
 }
 
 /*
  * ds must be opened as OWNER.  On return (whether successful or not),
  * ds will be closed and caller can no longer dereference it.
  */
 int
 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
 {
 	int err;
 	dsl_sync_task_group_t *dstg;
 	objset_t *os;
 	dsl_dir_t *dd;
 	uint64_t obj;
 	struct dsl_ds_destroyarg dsda = { 0 };
 
 	dsda.ds = ds;
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/* Destroying a snapshot is simpler */
 		dsl_dataset_make_exclusive(ds, tag);
 
 		dsda.defer = defer;
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
 		    &dsda, tag, 0);
 		ASSERT3P(dsda.rm_origin, ==, NULL);
 		goto out;
 	} else if (defer) {
 		err = EINVAL;
 		goto out;
 	}
 
 	dd = ds->ds_dir;
 
 	if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
 	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 		/*
 		 * Check for errors and mark this ds as inconsistent, in
 		 * case we crash while freeing the objects.
 		 */
 		err = dsl_sync_task_do(dd->dd_pool,
 		    dsl_dataset_destroy_begin_check,
 		    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
 		if (err)
 			goto out;
 
 		err = dmu_objset_from_ds(ds, &os);
 		if (err)
 			goto out;
 
 		/*
 		 * Remove all objects while in the open context so that
 		 * there is less work to do in the syncing context.
 		 */
 		for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
 		    ds->ds_phys->ds_prev_snap_txg)) {
 			/*
 			 * Ignore errors, if there is not enough disk space
 			 * we will deal with it in dsl_dataset_destroy_sync().
 			 */
 			(void) dmu_free_object(os, obj);
 		}
 		if (err != ESRCH)
 			goto out;
 
 		/*
 		 * Sync out all in-flight IO.
 		 */
 		txg_wait_synced(dd->dd_pool, 0);
 
 		/*
 		 * If we managed to free all the objects in open
 		 * context, the user space accounting should be zero.
 		 */
 		if (ds->ds_phys->ds_bp.blk_fill == 0 &&
 		    dmu_objset_userused_enabled(os)) {
 			uint64_t count;
 
 			ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
 			    &count) != 0 || count == 0);
 			ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
 			    &count) != 0 || count == 0);
 		}
 	}
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
 	rw_exit(&dd->dd_pool->dp_config_rwlock);
 
 	if (err)
 		goto out;
 
 	/*
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dsl_dataset_make_exclusive(ds, tag);
 	/*
 	 * If we're removing a clone, we might also need to remove its
 	 * origin.
 	 */
 	do {
 		dsda.need_prep = B_FALSE;
 		if (dsl_dir_is_clone(dd)) {
 			err = dsl_dataset_origin_rm_prep(&dsda, tag);
 			if (err) {
 				dsl_dir_close(dd, FTAG);
 				goto out;
 			}
 		}
 
 		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
 		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 		    dsl_dataset_destroy_sync, &dsda, tag, 0);
 		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
 		    dsl_dir_destroy_sync, dd, FTAG, 0);
 		err = dsl_sync_task_group_wait(dstg);
 		dsl_sync_task_group_destroy(dstg);
 
 		/*
 		 * We could be racing against 'zfs release' or 'zfs destroy -d'
 		 * on the origin snap, in which case we can get EBUSY if we
 		 * needed to destroy the origin snap but were not ready to
 		 * do so.
 		 */
 		if (dsda.need_prep) {
 			ASSERT(err == EBUSY);
 			ASSERT(dsl_dir_is_clone(dd));
 			ASSERT(dsda.rm_origin == NULL);
 		}
 	} while (dsda.need_prep);
 
 	if (dsda.rm_origin != NULL)
 		dsl_dataset_disown(dsda.rm_origin, tag);
 
 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
 	if (err)
 		dsl_dir_close(dd, FTAG);
 out:
 	dsl_dataset_disown(ds, tag);
 	return (err);
 }
 
 blkptr_t *
 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
 	return (&ds->ds_phys->ds_bp);
 }
 
 void
 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* If it's the meta-objset, set dp_meta_rootbp */
 	if (ds == NULL) {
 		tx->tx_pool->dp_meta_rootbp = *bp;
 	} else {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_phys->ds_bp = *bp;
 	}
 }
 
 spa_t *
 dsl_dataset_get_spa(dsl_dataset_t *ds)
 {
 	return (ds->ds_dir->dd_pool->dp_spa);
 }
 
 void
 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp;
 
 	if (ds == NULL) /* this is the meta-objset */
 		return;
 
 	ASSERT(ds->ds_objset != NULL);
 
 	if (ds->ds_phys->ds_next_snap_obj != 0)
 		panic("dirtying snapshot!");
 
 	dp = ds->ds_dir->dd_pool;
 
 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(ds->ds_dbuf, ds);
 	}
 }
 
 boolean_t
 dsl_dataset_is_dirty(dsl_dataset_t *ds)
 {
 	for (int t = 0; t < TXG_SIZE; t++) {
 		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
 		    ds, t))
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * The unique space in the head dataset can be calculated by subtracting
  * the space used in the most recent snapshot, that is still being used
  * in this file system, from the space currently in use.  To figure out
  * the space in the most recent snapshot still in use, we need to take
  * the total space used in the snapshot and subtract out the space that
  * has been freed up since the snapshot was taken.
  */
 static void
 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 {
 	uint64_t mrs_used;
 	uint64_t dlused, dlcomp, dluncomp;
 
 	ASSERT(!dsl_dataset_is_snapshot(ds));
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0)
 		mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
 	else
 		mrs_used = 0;
 
 	dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
 
 	ASSERT3U(dlused, <=, mrs_used);
 	ds->ds_phys->ds_unique_bytes =
 	    ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 	    SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 }
 
 struct killarg {
 	dsl_dataset_t *ds;
 	dmu_tx_t *tx;
 };
 
 /* ARGSUSED */
 static int
 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	struct killarg *ka = arg;
 	dmu_tx_t *tx = ka->tx;
 
 	if (bp == NULL)
 		return (0);
 
 	if (zb->zb_level == ZB_ZIL_LEVEL) {
 		ASSERT(zilog != NULL);
 		/*
 		 * It's a block in the intent log.  It has no
 		 * accounting, so just free it.
 		 */
 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 	} else {
 		ASSERT(zilog == NULL);
 		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 	}
 
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t count;
 	int err;
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
 	 * (Except if the only snapshots are from the branch we cloned
 	 * from.)
 	 */
 	if (ds->ds_prev != NULL &&
 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 		return (EBUSY);
 
 	/*
 	 * This is really a dsl_dir thing, but check it here so that
 	 * we'll be less likely to leave this dataset inconsistent &
 	 * nearly destroyed.
 	 */
 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
 	if (err)
 		return (err);
 	if (count != 0)
 		return (EEXIST);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 
 	/* Mark it as inconsistent on-disk, in case we crash */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
 	spa_history_log_internal_ds(ds, "destroy begin", tx, "");
 }
 
 static int
 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
     dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = dsda->ds;
 	dsl_dataset_t *ds_prev = ds->ds_prev;
 
 	if (dsl_dataset_might_destroy_origin(ds_prev)) {
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		/*
 		 * If we're not prepared to remove the origin, don't remove
 		 * the clone either.
 		 */
 		if (dsda->rm_origin == NULL) {
 			dsda->need_prep = B_TRUE;
 			return (EBUSY);
 		}
 
 		ndsda.ds = ds_prev;
 		ndsda.is_origin_rm = B_TRUE;
 		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
 	}
 
 	/*
 	 * If we're not going to remove the origin after all,
 	 * undo the open context setup.
 	 */
 	if (dsda->rm_origin != NULL) {
 		dsl_dataset_disown(dsda->rm_origin, tag);
 		dsda->rm_origin = NULL;
 	}
 
 	return (0);
 }
 
 /*
  * If you add new checks here, you may need to add
  * additional checks to the "temporary" case in
  * snapshot_check() in dmu_objset.c.
  */
 /* ARGSUSED */
 int
 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct dsl_ds_destroyarg *dsda = arg1;
 	dsl_dataset_t *ds = dsda->ds;
 
 	/* we have an owner hold, so noone else can destroy us */
 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 
 	/*
 	 * Only allow deferred destroy on pools that support it.
 	 * NOTE: deferred destroy is only supported on snapshots.
 	 */
 	if (dsda->defer) {
 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
 		    SPA_VERSION_USERREFS)
 			return (ENOTSUP);
 		ASSERT(dsl_dataset_is_snapshot(ds));
 		return (0);
 	}
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
 	 * (Except if the only snapshots are from the branch we cloned
 	 * from.)
 	 */
 	if (ds->ds_prev != NULL &&
 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 		return (EBUSY);
 
 	/*
 	 * If we made changes this txg, traverse_dsl_dataset won't find
 	 * them.  Try again.
 	 */
 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
 		return (EAGAIN);
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/*
 		 * If this snapshot has an elevated user reference count,
 		 * we can't destroy it yet.
 		 */
 		if (ds->ds_userrefs > 0 && !dsda->releasing)
 			return (EBUSY);
 
 		mutex_enter(&ds->ds_lock);
 		/*
 		 * Can't delete a branch point. However, if we're destroying
 		 * a clone and removing its origin due to it having a user
 		 * hold count of 0 and having been marked for deferred destroy,
 		 * it's OK for the origin to have a single clone.
 		 */
 		if (ds->ds_phys->ds_num_children >
 		    (dsda->is_origin_rm ? 2 : 1)) {
 			mutex_exit(&ds->ds_lock);
 			return (EEXIST);
 		}
 		mutex_exit(&ds->ds_lock);
 	} else if (dsl_dir_is_clone(ds->ds_dir)) {
 		return (dsl_dataset_origin_check(dsda, arg2, tx));
 	}
 
 	/* XXX we should do some i/o error checking... */
 	return (0);
 }
 
 struct refsarg {
 	kmutex_t lock;
 	boolean_t gone;
 	kcondvar_t cv;
 };
 
 /* ARGSUSED */
 static void
 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
 {
 	struct refsarg *arg = argv;
 
 	mutex_enter(&arg->lock);
 	arg->gone = TRUE;
 	cv_signal(&arg->cv);
 	mutex_exit(&arg->lock);
 }
 
 static void
 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
 {
 	struct refsarg arg;
 
 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
 	arg.gone = FALSE;
 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
 	    dsl_dataset_refs_gone);
 	dmu_buf_rele(ds->ds_dbuf, tag);
 	mutex_enter(&arg.lock);
 	while (!arg.gone)
 		cv_wait(&arg.cv, &arg.lock);
 	ASSERT(arg.gone);
 	mutex_exit(&arg.lock);
 	ds->ds_dbuf = NULL;
 	ds->ds_phys = NULL;
 	mutex_destroy(&arg.lock);
 	cv_destroy(&arg.cv);
 }
 
 static void
 remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t count;
 	int err;
 
 	ASSERT(ds->ds_phys->ds_num_children >= 2);
 	err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
 	/*
 	 * The err should not be ENOENT, but a bug in a previous version
 	 * of the code could cause upgrade_clones_cb() to not set
 	 * ds_next_snap_obj when it should, leading to a missing entry.
 	 * If we knew that the pool was created after
 	 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
 	 * ENOENT.  However, at least we can check that we don't have
 	 * too many entries in the next_clones_obj even after failing to
 	 * remove this one.
 	 */
 	if (err != ENOENT) {
 		VERIFY0(err);
 	}
 	ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 	    &count));
 	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
 }
 
 static void
 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	/*
 	 * If it is the old version, dd_clones doesn't exist so we can't
 	 * find the clones, but deadlist_remove_key() is a no-op so it
 	 * doesn't matter.
 	 */
 	if (ds->ds_dir->dd_phys->dd_clones == 0)
 		return;
 
 	for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 		    za.za_first_integer, FTAG, &clone));
 		if (clone->ds_dir->dd_origin_txg > mintxg) {
 			dsl_deadlist_remove_key(&clone->ds_deadlist,
 			    mintxg, tx);
 			dsl_dataset_remove_clones_key(clone, mintxg, tx);
 		}
 		dsl_dataset_rele(clone, FTAG);
 	}
 	zap_cursor_fini(&zc);
 }
 
 struct process_old_arg {
 	dsl_dataset_t *ds;
 	dsl_dataset_t *ds_prev;
 	boolean_t after_branch_point;
 	zio_t *pio;
 	uint64_t used, comp, uncomp;
 };
 
 static int
 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	struct process_old_arg *poa = arg;
 	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
 
 	if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
 		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
 		if (poa->ds_prev && !poa->after_branch_point &&
 		    bp->blk_birth >
 		    poa->ds_prev->ds_phys->ds_prev_snap_txg) {
 			poa->ds_prev->ds_phys->ds_unique_bytes +=
 			    bp_get_dsize_sync(dp->dp_spa, bp);
 		}
 	} else {
 		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
 		poa->comp += BP_GET_PSIZE(bp);
 		poa->uncomp += BP_GET_UCSIZE(bp);
 		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
 	}
 	return (0);
 }
 
 static void
 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
 {
 	struct process_old_arg poa = { 0 };
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 
 	ASSERT(ds->ds_deadlist.dl_oldfmt);
 	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
 
 	poa.ds = ds;
 	poa.ds_prev = ds_prev;
 	poa.after_branch_point = after_branch_point;
 	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
 	    process_old_cb, &poa, tx));
 	VERIFY0(zio_wait(poa.pio));
 	ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
 
 	/* change snapused */
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 	    -poa.used, -poa.comp, -poa.uncomp, tx);
 
 	/* swap next's deadlist to our deadlist */
 	dsl_deadlist_close(&ds->ds_deadlist);
 	dsl_deadlist_close(&ds_next->ds_deadlist);
 	SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
 	    ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
 	    ds_next->ds_phys->ds_deadlist_obj);
 }
 
 static int
 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	int err;
 	struct killarg ka;
 
 	/*
 	 * Free everything that we point to (that's born after
 	 * the previous snapshot, if we are a clone)
 	 *
 	 * NB: this should be very quick, because we already
 	 * freed all the objects in open context.
 	 */
 	ka.ds = ds;
 	ka.tx = tx;
 	err = traverse_dataset(ds,
 	    ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
 	    kill_blkptr, &ka);
 	ASSERT0(err);
 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
 
 	return (err);
 }
 
 void
 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_destroyarg *dsda = arg1;
 	dsl_dataset_t *ds = dsda->ds;
 	int err;
 	int after_branch_point = FALSE;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	dsl_dataset_t *ds_prev = NULL;
 	boolean_t wont_destroy;
 	uint64_t obj;
 
 	wont_destroy = (dsda->defer &&
 	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
 
 	ASSERT(ds->ds_owner || wont_destroy);
 	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
 	ASSERT(ds->ds_prev == NULL ||
 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 
 	if (wont_destroy) {
 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
 		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
 		return;
 	}
 
 	/* We need to log before removing it from the namespace. */
 	spa_history_log_internal_ds(ds, "destroy", tx, "");
 
 	/* signal any waiters that this dataset is going away */
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = dsl_reaper;
 	cv_broadcast(&ds->ds_exclusive_cv);
 	mutex_exit(&ds->ds_lock);
 
 	/* Remove our reservation */
 	if (ds->ds_reserved != 0) {
 		dsl_prop_setarg_t psa;
 		uint64_t value = 0;
 
 		dsl_prop_setarg_init_uint64(&psa, "refreservation",
 		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
 		    &value);
 		psa.psa_effective_value = 0;	/* predict default value */
 
 		dsl_dataset_set_reservation_sync(ds, &psa, tx);
 		ASSERT0(ds->ds_reserved);
 	}
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 
 	dsl_scan_ds_destroyed(ds, tx);
 
 	obj = ds->ds_object;
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
 		if (ds->ds_prev) {
 			ds_prev = ds->ds_prev;
 		} else {
 			VERIFY(0 == dsl_dataset_hold_obj(dp,
 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
 		}
 		after_branch_point =
 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
 
 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 		if (after_branch_point &&
 		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
 			remove_from_next_clones(ds_prev, obj, tx);
 			if (ds->ds_phys->ds_next_snap_obj != 0) {
 				VERIFY(0 == zap_add_int(mos,
 				    ds_prev->ds_phys->ds_next_clones_obj,
 				    ds->ds_phys->ds_next_snap_obj, tx));
 			}
 		}
 		if (after_branch_point &&
 		    ds->ds_phys->ds_next_snap_obj == 0) {
 			/* This clone is toast. */
 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
 			ds_prev->ds_phys->ds_num_children--;
 
 			/*
 			 * If the clone's origin has no other clones, no
 			 * user holds, and has been marked for deferred
 			 * deletion, then we should have done the necessary
 			 * destroy setup for it.
 			 */
 			if (ds_prev->ds_phys->ds_num_children == 1 &&
 			    ds_prev->ds_userrefs == 0 &&
 			    DS_IS_DEFER_DESTROY(ds_prev)) {
 				ASSERT3P(dsda->rm_origin, !=, NULL);
 			} else {
 				ASSERT3P(dsda->rm_origin, ==, NULL);
 			}
 		} else if (!after_branch_point) {
 			ds_prev->ds_phys->ds_next_snap_obj =
 			    ds->ds_phys->ds_next_snap_obj;
 		}
 	}
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		dsl_dataset_t *ds_next;
 		uint64_t old_unique;
 		uint64_t used = 0, comp = 0, uncomp = 0;
 
 		VERIFY(0 == dsl_dataset_hold_obj(dp,
 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
 
 		old_unique = ds_next->ds_phys->ds_unique_bytes;
 
 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 		ds_next->ds_phys->ds_prev_snap_obj =
 		    ds->ds_phys->ds_prev_snap_obj;
 		ds_next->ds_phys->ds_prev_snap_txg =
 		    ds->ds_phys->ds_prev_snap_txg;
 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
 
 
 		if (ds_next->ds_deadlist.dl_oldfmt) {
 			process_old_deadlist(ds, ds_prev, ds_next,
 			    after_branch_point, tx);
 		} else {
 			/* Adjust prev's unique space. */
 			if (ds_prev && !after_branch_point) {
 				dsl_deadlist_space_range(&ds_next->ds_deadlist,
 				    ds_prev->ds_phys->ds_prev_snap_txg,
 				    ds->ds_phys->ds_prev_snap_txg,
 				    &used, &comp, &uncomp);
 				ds_prev->ds_phys->ds_unique_bytes += used;
 			}
 
 			/* Adjust snapused. */
 			dsl_deadlist_space_range(&ds_next->ds_deadlist,
 			    ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 			    &used, &comp, &uncomp);
 			dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 			    -used, -comp, -uncomp, tx);
 
 			/* Move blocks to be freed to pool's free list. */
 			dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
 			    &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
 			    tx);
 			dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 			    DD_USED_HEAD, used, comp, uncomp, tx);
 
 			/* Merge our deadlist into next's and free it. */
 			dsl_deadlist_merge(&ds_next->ds_deadlist,
 			    ds->ds_phys->ds_deadlist_obj, tx);
 		}
 		dsl_deadlist_close(&ds->ds_deadlist);
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 
 		/* Collapse range in clone heads */
 		dsl_dataset_remove_clones_key(ds,
 		    ds->ds_phys->ds_creation_txg, tx);
 
 		if (dsl_dataset_is_snapshot(ds_next)) {
 			dsl_dataset_t *ds_nextnext;
 
 			/*
 			 * Update next's unique to include blocks which
 			 * were previously shared by only this snapshot
 			 * and it.  Those blocks will be born after the
 			 * prev snap and before this snap, and will have
 			 * died after the next snap and before the one
 			 * after that (ie. be on the snap after next's
 			 * deadlist).
 			 */
 			VERIFY(0 == dsl_dataset_hold_obj(dp,
 			    ds_next->ds_phys->ds_next_snap_obj,
 			    FTAG, &ds_nextnext));
 			dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
 			    ds->ds_phys->ds_prev_snap_txg,
 			    ds->ds_phys->ds_creation_txg,
 			    &used, &comp, &uncomp);
 			ds_next->ds_phys->ds_unique_bytes += used;
 			dsl_dataset_rele(ds_nextnext, FTAG);
 			ASSERT3P(ds_next->ds_prev, ==, NULL);
 
 			/* Collapse range in this head. */
 			dsl_dataset_t *hds;
 			VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 			    ds->ds_dir->dd_phys->dd_head_dataset_obj,
 			    FTAG, &hds));
 			dsl_deadlist_remove_key(&hds->ds_deadlist,
 			    ds->ds_phys->ds_creation_txg, tx);
 			dsl_dataset_rele(hds, FTAG);
 
 		} else {
 			ASSERT3P(ds_next->ds_prev, ==, ds);
 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
 			ds_next->ds_prev = NULL;
 			if (ds_prev) {
 				VERIFY(0 == dsl_dataset_get_ref(dp,
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds_next, &ds_next->ds_prev));
 			}
 
 			dsl_dataset_recalc_head_uniq(ds_next);
 
 			/*
 			 * Reduce the amount of our unconsmed refreservation
 			 * being charged to our parent by the amount of
 			 * new unique data we have gained.
 			 */
 			if (old_unique < ds_next->ds_reserved) {
 				int64_t mrsdelta;
 				uint64_t new_unique =
 				    ds_next->ds_phys->ds_unique_bytes;
 
 				ASSERT(old_unique <= new_unique);
 				mrsdelta = MIN(new_unique - old_unique,
 				    ds_next->ds_reserved - old_unique);
 				dsl_dir_diduse_space(ds->ds_dir,
 				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
 			}
 		}
 		dsl_dataset_rele(ds_next, FTAG);
 	} else {
 		zfeature_info_t *async_destroy =
 		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
 		objset_t *os;
 
 		/*
 		 * There's no next snapshot, so this is a head dataset.
 		 * Destroy the deadlist.  Unless it's a clone, the
 		 * deadlist should be empty.  (If it's a clone, it's
 		 * safe to ignore the deadlist contents.)
 		 */
 		dsl_deadlist_close(&ds->ds_deadlist);
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 		ds->ds_phys->ds_deadlist_obj = 0;
 
 		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
 
 		if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
 			err = old_synchronous_dataset_destroy(ds, tx);
 		} else {
 			/*
 			 * Move the bptree into the pool's list of trees to
 			 * clean up and update space accounting information.
 			 */
 			uint64_t used, comp, uncomp;
 
 			zil_destroy_sync(dmu_objset_zil(os), tx);
 
 			if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
 				spa_feature_incr(dp->dp_spa, async_destroy, tx);
 				dp->dp_bptree_obj = bptree_alloc(mos, tx);
 				VERIFY(zap_add(mos,
 				    DMU_POOL_DIRECTORY_OBJECT,
 				    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 				    &dp->dp_bptree_obj, tx) == 0);
 			}
 
 			used = ds->ds_dir->dd_phys->dd_used_bytes;
 			comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
 			uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
 
 			ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 			    ds->ds_phys->ds_unique_bytes == used);
 
 			bptree_add(mos, dp->dp_bptree_obj,
 			    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
 			    used, comp, uncomp, tx);
 			dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 			    -used, -comp, -uncomp, tx);
 			dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 			    used, comp, uncomp, tx);
 		}
 
 		if (ds->ds_prev != NULL) {
 			if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 				VERIFY3U(0, ==, zap_remove_int(mos,
 				    ds->ds_prev->ds_dir->dd_phys->dd_clones,
 				    ds->ds_object, tx));
 			}
 			dsl_dataset_rele(ds->ds_prev, ds);
 			ds->ds_prev = ds_prev = NULL;
 		}
 	}
 
 	/*
 	 * This must be done after the dsl_traverse(), because it will
 	 * re-open the objset.
 	 */
 	if (ds->ds_objset) {
 		dmu_objset_evict(ds->ds_objset);
 		ds->ds_objset = NULL;
 	}
 
 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
 		/* Erase the link in the dir */
 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
 		ASSERT(err == 0);
 	} else {
 		/* remove from snapshot namespace */
 		dsl_dataset_t *ds_head;
 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
 		VERIFY(0 == dsl_dataset_hold_obj(dp,
 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 #ifdef ZFS_DEBUG
 		{
 			uint64_t val;
 
 			err = dsl_dataset_snap_lookup(ds_head,
 			    ds->ds_snapname, &val);
 			ASSERT0(err);
 			ASSERT3U(val, ==, obj);
 		}
 #endif
 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
 		ASSERT(err == 0);
 		dsl_dataset_rele(ds_head, FTAG);
 	}
 
 	if (ds_prev && ds->ds_prev != ds_prev)
 		dsl_dataset_rele(ds_prev, FTAG);
 
 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
 		uint64_t count;
 		ASSERT(0 == zap_count(mos,
 		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
 		VERIFY(0 == dmu_object_free(mos,
 		    ds->ds_phys->ds_next_clones_obj, tx));
 	}
 	if (ds->ds_phys->ds_props_obj != 0)
 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
 	if (ds->ds_phys->ds_userrefs_obj != 0)
 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
 	dsl_dir_close(ds->ds_dir, ds);
 	ds->ds_dir = NULL;
 	dsl_dataset_drain_refs(ds, tag);
 	VERIFY(0 == dmu_object_free(mos, obj, tx));
 
 	if (dsda->rm_origin) {
 		/*
 		 * Remove the origin of the clone we just destroyed.
 		 */
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		ndsda.ds = dsda->rm_origin;
 		dsl_dataset_destroy_sync(&ndsda, tag, tx);
 	}
 }
 
 static int
 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	uint64_t asize;
 
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	/*
 	 * If there's an fs-only reservation, any blocks that might become
 	 * owned by the snapshot dataset must be accommodated by space
 	 * outside of the reservation.
 	 */
 	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
 	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
 		return (ENOSPC);
 
 	/*
 	 * Propagate any reserved space for this snapshot to other
 	 * snapshot checks in this sync group.
 	 */
 	if (asize > 0)
 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
 
 	return (0);
 }
 
 int
 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
     dmu_tx_t *tx)
 {
 	int err;
 	uint64_t value;
 
 	/*
 	 * We don't allow multiple snapshots of the same txg.  If there
 	 * is already one, try again.
 	 */
 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
 		return (EAGAIN);
 
 	/*
 	 * Check for conflicting snapshot name.
 	 */
 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	/*
 	 * Check that the dataset's name is not too long.  Name consists
 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
 	 */
 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
 		return (ENAMETOOLONG);
 
 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
 	if (err)
 		return (err);
 
 	ds->ds_trysnap_txg = tx->tx_txg;
 	return (0);
 }
 
 void
 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
     dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj, crtxg;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 
 	/*
 	 * The origin's ds_creation_txg has to be < TXG_INITIAL
 	 */
 	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
 		crtxg = 1;
 	else
 		crtxg = tx->tx_txg;
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
 	dsphys->ds_fsid_guid = unique_create();
 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 	    sizeof (dsphys->ds_guid));
 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
 	dsphys->ds_next_snap_obj = ds->ds_object;
 	dsphys->ds_num_children = 1;
 	dsphys->ds_creation_time = gethrestime_sec();
 	dsphys->ds_creation_txg = crtxg;
 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
 	dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
 	dsphys->ds_flags = ds->ds_phys->ds_flags;
 	dsphys->ds_bp = ds->ds_phys->ds_bp;
 	dmu_buf_rele(dbuf, FTAG);
 
 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
 	if (ds->ds_prev) {
 		uint64_t next_clones_obj =
 		    ds->ds_prev->ds_phys->ds_next_clones_obj;
 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
 		    ds->ds_object ||
 		    ds->ds_prev->ds_phys->ds_num_children > 1);
 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
 			    ds->ds_prev->ds_phys->ds_creation_txg);
 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
 		} else if (next_clones_obj != 0) {
 			remove_from_next_clones(ds->ds_prev,
 			    dsphys->ds_next_snap_obj, tx);
 			VERIFY3U(0, ==, zap_add_int(mos,
 			    next_clones_obj, dsobj, tx));
 		}
 	}
 
 	/*
 	 * If we have a reference-reservation on this dataset, we will
 	 * need to increase the amount of refreservation being charged
 	 * since our unique space is going to zero.
 	 */
 	if (ds->ds_reserved) {
 		int64_t delta;
 		ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
 		delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
 		    delta, 0, 0, tx);
 	}
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
 	    ds->ds_dir->dd_myname, snapname, dsobj,
 	    ds->ds_phys->ds_prev_snap_txg);
 	ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
 	    UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
 	dsl_deadlist_close(&ds->ds_deadlist);
 	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_add_key(&ds->ds_deadlist,
 	    ds->ds_phys->ds_prev_snap_txg, tx);
 
 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
 	ds->ds_phys->ds_prev_snap_obj = dsobj;
 	ds->ds_phys->ds_prev_snap_txg = crtxg;
 	ds->ds_phys->ds_unique_bytes = 0;
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
 	    snapname, 8, 1, &dsobj, tx);
 	ASSERT(err == 0);
 
 	if (ds->ds_prev)
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
 	VERIFY(0 == dsl_dataset_get_ref(dp,
 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
 
 	dsl_scan_ds_snapshotted(ds, tx);
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
 	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
 }
 
 void
 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(ds->ds_objset != NULL);
 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
 
 	/*
 	 * in case we had to change ds_fsid_guid when we opened it,
 	 * sync it out now.
 	 */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
 
 	dmu_objset_sync(ds->ds_objset, zio, tx);
 }
 
 static void
 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	uint64_t count = 0;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	nvlist_t *propval;
 	nvlist_t *val;
 
 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	/*
 	 * There may me missing entries in ds_next_clones_obj
 	 * due to a bug in a previous version of the code.
 	 * Only trust it if it has the right number of entries.
 	 */
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
 		ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 		    &count));
 	}
 	if (count != ds->ds_phys->ds_num_children - 1) {
 		goto fail;
 	}
 	for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 		char buf[ZFS_MAXNAMELEN];
 		/*
 		 * Even though we hold the dp_config_rwlock, the dataset
 		 * may fail to open, returning ENOENT.  If there is a
 		 * thread concurrently attempting to destroy this
 		 * dataset, it will have the ds_rwlock held for
 		 * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
 		 * dsl_dataset_hold_ref() will fail its
 		 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
 		 * dp_config_rwlock, and wait for the destroy progress
 		 * and signal ds_exclusive_cv.  If the destroy was
 		 * successful, we will see that
 		 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
 		 */
 		if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 		    za.za_first_integer, FTAG, &clone) != 0)
 			continue;
 		dsl_dir_name(clone->ds_dir, buf);
 		VERIFY(nvlist_add_boolean(val, buf) == 0);
 		dsl_dataset_rele(clone, FTAG);
 	}
 	zap_cursor_fini(&zc);
 	VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
 	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
 	    propval) == 0);
 fail:
 	nvlist_free(val);
 	nvlist_free(propval);
 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	uint64_t refd, avail, uobjs, aobjs, ratio;
 
 	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
 	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
 	    ds->ds_phys->ds_compressed_bytes);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
+	    ds->ds_phys->ds_uncompressed_bytes);
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 		    ds->ds_phys->ds_unique_bytes);
 		get_clones_stat(ds, nv);
 	} else {
 		dsl_dir_stats(ds->ds_dir, nv);
 	}
 
 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
 	    ds->ds_phys->ds_creation_time);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
 	    ds->ds_phys->ds_creation_txg);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
 	    ds->ds_quota);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
 	    ds->ds_reserved);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
 	    ds->ds_phys->ds_guid);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
 	    ds->ds_phys->ds_unique_bytes);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
 	    ds->ds_object);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
 	    ds->ds_userrefs);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
 	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
 		uint64_t written, comp, uncomp;
 		dsl_pool_t *dp = ds->ds_dir->dd_pool;
 		dsl_dataset_t *prev;
 
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		int err = dsl_dataset_hold_obj(dp,
 		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
 		rw_exit(&dp->dp_config_rwlock);
 		if (err == 0) {
 			err = dsl_dataset_space_written(prev, ds, &written,
 			    &comp, &uncomp);
 			dsl_dataset_rele(prev, FTAG);
 			if (err == 0) {
 				dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
 				    written);
 			}
 		}
 	}
 }
 
 void
 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
 	stat->dds_guid = ds->ds_phys->ds_guid;
 	stat->dds_origin[0] = '\0';
 	if (dsl_dataset_is_snapshot(ds)) {
 		stat->dds_is_snapshot = B_TRUE;
 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
 	} else {
 		stat->dds_is_snapshot = B_FALSE;
 		stat->dds_num_clones = 0;
 
 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 		if (dsl_dir_is_clone(ds->ds_dir)) {
 			dsl_dataset_t *ods;
 
 			VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
 			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
 			dsl_dataset_name(ods, stat->dds_origin);
 			dsl_dataset_drop_ref(ods, FTAG);
 		}
 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 	}
 }
 
 uint64_t
 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
 {
 	return (ds->ds_fsid_guid);
 }
 
 void
 dsl_dataset_space(dsl_dataset_t *ds,
     uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
 	*refdbytesp = ds->ds_phys->ds_referenced_bytes;
 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
 	if (ds->ds_quota != 0) {
 		/*
 		 * Adjust available bytes according to refquota
 		 */
 		if (*refdbytesp < ds->ds_quota)
 			*availbytesp = MIN(*availbytesp,
 			    ds->ds_quota - *refdbytesp);
 		else
 			*availbytesp = 0;
 	}
 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
 }
 
 boolean_t
 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 	if (ds->ds_prev == NULL)
 		return (B_FALSE);
 	if (ds->ds_phys->ds_bp.blk_birth >
 	    ds->ds_prev->ds_phys->ds_creation_txg) {
 		objset_t *os, *os_prev;
 		/*
 		 * It may be that only the ZIL differs, because it was
 		 * reset in the head.  Don't count that as being
 		 * modified.
 		 */
 		if (dmu_objset_from_ds(ds, &os) != 0)
 			return (B_TRUE);
 		if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
 			return (B_TRUE);
 		return (bcmp(&os->os_phys->os_meta_dnode,
 		    &os_prev->os_phys->os_meta_dnode,
 		    sizeof (os->os_phys->os_meta_dnode)) != 0);
 	}
 	return (B_FALSE);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	char *newsnapname = arg2;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_dataset_t *hds;
 	uint64_t val;
 	int err;
 
 	err = dsl_dataset_hold_obj(dd->dd_pool,
 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
 	if (err)
 		return (err);
 
 	/* new name better not be in use */
 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
 	dsl_dataset_rele(hds, FTAG);
 
 	if (err == 0)
 		err = EEXIST;
 	else if (err == ENOENT)
 		err = 0;
 
 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
 		err = ENAMETOOLONG;
 
 	return (err);
 }
 
 static void
 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	const char *newsnapname = arg2;
 	dsl_dir_t *dd = ds->ds_dir;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	dsl_dataset_t *hds;
 	int err;
 
 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
 
 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
 
 	VERIFY(0 == dsl_dataset_get_snapname(ds));
 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
 	ASSERT0(err);
 	mutex_enter(&ds->ds_lock);
 	(void) strcpy(ds->ds_snapname, newsnapname);
 	mutex_exit(&ds->ds_lock);
 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
 	ASSERT0(err);
 
 	spa_history_log_internal_ds(ds, "rename", tx,
 	    "-> @%s", newsnapname);
 	dsl_dataset_rele(hds, FTAG);
 }
 
 struct renamesnaparg {
 	dsl_sync_task_group_t *dstg;
 	char failed[MAXPATHLEN];
 	char *oldsnap;
 	char *newsnap;
 };
 
 static int
 dsl_snapshot_rename_one(const char *name, void *arg)
 {
 	struct renamesnaparg *ra = arg;
 	dsl_dataset_t *ds = NULL;
 	char *snapname;
 	int err;
 
 	snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
 	(void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
 
 	/*
 	 * For recursive snapshot renames the parent won't be changing
 	 * so we just pass name for both the to/from argument.
 	 */
 	err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
 	if (err != 0) {
 		strfree(snapname);
 		return (err == ENOENT ? 0 : err);
 	}
 
 #ifdef _KERNEL
 	/*
 	 * For all filesystems undergoing rename, we'll need to unmount it.
 	 */
 	(void) zfs_unmount_snap(snapname, NULL);
 #endif
 	err = dsl_dataset_hold(snapname, ra->dstg, &ds);
 	strfree(snapname);
 	if (err != 0)
 		return (err == ENOENT ? 0 : err);
 
 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
 
 	return (0);
 }
 
 static int
 dsl_recursive_rename(char *oldname, const char *newname)
 {
 	int err;
 	struct renamesnaparg *ra;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	char *cp, *fsname = spa_strdup(oldname);
 	int len = strlen(oldname) + 1;
 
 	/* truncate the snapshot name to get the fsname */
 	cp = strchr(fsname, '@');
 	*cp = '\0';
 
 	err = spa_open(fsname, &spa, FTAG);
 	if (err) {
 		kmem_free(fsname, len);
 		return (err);
 	}
 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 
 	ra->oldsnap = strchr(oldname, '@') + 1;
 	ra->newsnap = strchr(newname, '@') + 1;
 	*ra->failed = '\0';
 
 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
 	    DS_FIND_CHILDREN);
 	kmem_free(fsname, len);
 
 	if (err == 0) {
 		err = dsl_sync_task_group_wait(ra->dstg);
 	}
 
 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
 		dsl_dataset_t *ds = dst->dst_arg1;
 		if (dst->dst_err) {
 			dsl_dir_name(ds->ds_dir, ra->failed);
 			(void) strlcat(ra->failed, "@", sizeof (ra->failed));
 			(void) strlcat(ra->failed, ra->newsnap,
 			    sizeof (ra->failed));
 		}
 		dsl_dataset_rele(ds, ra->dstg);
 	}
 
 	if (err)
 		(void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
 
 	dsl_sync_task_group_destroy(ra->dstg);
 	kmem_free(ra, sizeof (struct renamesnaparg));
 	spa_close(spa, FTAG);
 	return (err);
 }
 
 static int
 dsl_valid_rename(const char *oldname, void *arg)
 {
 	int delta = *(int *)arg;
 
 	if (strlen(oldname) + delta >= MAXNAMELEN)
 		return (ENAMETOOLONG);
 
 	return (0);
 }
 
 #pragma weak dmu_objset_rename = dsl_dataset_rename
 int
 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	const char *tail;
 	int err;
 
 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
 	if (err)
 		return (err);
 
 	if (tail == NULL) {
 		int delta = strlen(newname) - strlen(oldname);
 
 		/* if we're growing, validate child name lengths */
 		if (delta > 0)
 			err = dmu_objset_find(oldname, dsl_valid_rename,
 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 
 		if (err == 0)
 			err = dsl_dir_rename(dd, newname);
 		dsl_dir_close(dd, FTAG);
 		return (err);
 	}
 
 	if (tail[0] != '@') {
 		/* the name ended in a nonexistent component */
 		dsl_dir_close(dd, FTAG);
 		return (ENOENT);
 	}
 
 	dsl_dir_close(dd, FTAG);
 
 	/* new name must be snapshot in same filesystem */
 	tail = strchr(newname, '@');
 	if (tail == NULL)
 		return (EINVAL);
 	tail++;
 	if (strncmp(oldname, newname, tail - newname) != 0)
 		return (EXDEV);
 
 	if (recursive) {
 		err = dsl_recursive_rename(oldname, newname);
 	} else {
 		err = dsl_dataset_hold(oldname, FTAG, &ds);
 		if (err)
 			return (err);
 
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_snapshot_rename_check,
 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
 
 		dsl_dataset_rele(ds, FTAG);
 	}
 
 	return (err);
 }
 
 struct promotenode {
 	list_node_t link;
 	dsl_dataset_t *ds;
 };
 
 struct promotearg {
 	list_t shared_snaps, origin_snaps, clone_snaps;
 	dsl_dataset_t *origin_origin;
 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 	char *err_ds;
 };
 
 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
 static boolean_t snaplist_unstable(list_t *l);
 
 static int
 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *hds = arg1;
 	struct promotearg *pa = arg2;
 	struct promotenode *snap = list_head(&pa->shared_snaps);
 	dsl_dataset_t *origin_ds = snap->ds;
 	int err;
 	uint64_t unused;
 
 	/* Check that it is a real clone */
 	if (!dsl_dir_is_clone(hds->ds_dir))
 		return (EINVAL);
 
 	/* Since this is so expensive, don't do the preliminary check */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
 		return (EXDEV);
 
 	/* compute origin's new unique space */
 	snap = list_tail(&pa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	dsl_deadlist_space_range(&snap->ds->ds_deadlist,
 	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 	    &pa->unique, &unused, &unused);
 
 	/*
 	 * Walk the snapshots that we are moving
 	 *
 	 * Compute space to transfer.  Consider the incremental changes
 	 * to used for each snapshot:
 	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
 	 * So each snapshot gave birth to:
 	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
 	 * So a sequence would look like:
 	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
 	 * Which simplifies to:
 	 * uN + kN + kN-1 + ... + k1 + k0
 	 * Note however, if we stop before we reach the ORIGIN we get:
 	 * uN + kN + kN-1 + ... + kM - uM-1
 	 */
 	pa->used = origin_ds->ds_phys->ds_referenced_bytes;
 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
 	for (snap = list_head(&pa->shared_snaps); snap;
 	    snap = list_next(&pa->shared_snaps, snap)) {
 		uint64_t val, dlused, dlcomp, dluncomp;
 		dsl_dataset_t *ds = snap->ds;
 
 		/* Check that the snapshot name does not conflict */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
 		if (err == 0) {
 			err = EEXIST;
 			goto out;
 		}
 		if (err != ENOENT)
 			goto out;
 
 		/* The very first snapshot does not have a deadlist */
 		if (ds->ds_phys->ds_prev_snap_obj == 0)
 			continue;
 
 		dsl_deadlist_space(&ds->ds_deadlist,
 		    &dlused, &dlcomp, &dluncomp);
 		pa->used += dlused;
 		pa->comp += dlcomp;
 		pa->uncomp += dluncomp;
 	}
 
 	/*
 	 * If we are a clone of a clone then we never reached ORIGIN,
 	 * so we need to subtract out the clone origin's used space.
 	 */
 	if (pa->origin_origin) {
 		pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
 		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
 		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
 	}
 
 	/* Check that there is enough space here */
 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
 	    pa->used);
 	if (err)
 		return (err);
 
 	/*
 	 * Compute the amounts of space that will be used by snapshots
 	 * after the promotion (for both origin and clone).  For each,
 	 * it is the amount of space that will be on all of their
 	 * deadlists (that was not born before their new origin).
 	 */
 	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		uint64_t space;
 
 		/*
 		 * Note, typically this will not be a clone of a clone,
 		 * so dd_origin_txg will be < TXG_INITIAL, so
 		 * these snaplist_space() -> dsl_deadlist_space_range()
 		 * calls will be fast because they do not have to
 		 * iterate over all bps.
 		 */
 		snap = list_head(&pa->origin_snaps);
 		err = snaplist_space(&pa->shared_snaps,
 		    snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
 		if (err)
 			return (err);
 
 		err = snaplist_space(&pa->clone_snaps,
 		    snap->ds->ds_dir->dd_origin_txg, &space);
 		if (err)
 			return (err);
 		pa->cloneusedsnap += space;
 	}
 	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		err = snaplist_space(&pa->origin_snaps,
 		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
 		if (err)
 			return (err);
 	}
 
 	return (0);
 out:
 	pa->err_ds =  snap->ds->ds_snapname;
 	return (err);
 }
 
 static void
 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *hds = arg1;
 	struct promotearg *pa = arg2;
 	struct promotenode *snap = list_head(&pa->shared_snaps);
 	dsl_dataset_t *origin_ds = snap->ds;
 	dsl_dataset_t *origin_head;
 	dsl_dir_t *dd = hds->ds_dir;
 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
 	dsl_dir_t *odd = NULL;
 	uint64_t oldnext_obj;
 	int64_t delta;
 
 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
 
 	snap = list_head(&pa->origin_snaps);
 	origin_head = snap->ds;
 
 	/*
 	 * We need to explicitly open odd, since origin_ds's dd will be
 	 * changing.
 	 */
 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
 	    NULL, FTAG, &odd));
 
 	/* change origin's next snap */
 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
 	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
 	snap = list_tail(&pa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
 
 	/* change the origin's next clone */
 	if (origin_ds->ds_phys->ds_next_clones_obj) {
 		remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    origin_ds->ds_phys->ds_next_clones_obj,
 		    oldnext_obj, tx));
 	}
 
 	/* change origin */
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
 	dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
 	origin_head->ds_dir->dd_origin_txg =
 	    origin_ds->ds_phys->ds_creation_txg;
 
 	/* change dd_clone entries */
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    odd->dd_phys->dd_clones, hds->ds_object, tx));
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    hds->ds_object, tx));
 
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    origin_head->ds_object, tx));
 		if (dd->dd_phys->dd_clones == 0) {
 			dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
 			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 		}
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    dd->dd_phys->dd_clones, origin_head->ds_object, tx));
 
 	}
 
 	/* move snapshots to this dir */
 	for (snap = list_head(&pa->shared_snaps); snap;
 	    snap = list_next(&pa->shared_snaps, snap)) {
 		dsl_dataset_t *ds = snap->ds;
 
 		/* unregister props as dsl_dir is changing */
 		if (ds->ds_objset) {
 			dmu_objset_evict(ds->ds_objset);
 			ds->ds_objset = NULL;
 		}
 		/* move snap name entry */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
 		    ds->ds_snapname, tx));
 		VERIFY(0 == zap_add(dp->dp_meta_objset,
 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
 		    8, 1, &ds->ds_object, tx));
 
 		/* change containing dsl_dir */
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
 		ds->ds_phys->ds_dir_obj = dd->dd_object;
 		ASSERT3P(ds->ds_dir, ==, odd);
 		dsl_dir_close(ds->ds_dir, ds);
 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
 		    NULL, ds, &ds->ds_dir));
 
 		/* move any clone references */
 		if (ds->ds_phys->ds_next_clones_obj &&
 		    spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 			zap_cursor_t zc;
 			zap_attribute_t za;
 
 			for (zap_cursor_init(&zc, dp->dp_meta_objset,
 			    ds->ds_phys->ds_next_clones_obj);
 			    zap_cursor_retrieve(&zc, &za) == 0;
 			    zap_cursor_advance(&zc)) {
 				dsl_dataset_t *cnds;
 				uint64_t o;
 
 				if (za.za_first_integer == oldnext_obj) {
 					/*
 					 * We've already moved the
 					 * origin's reference.
 					 */
 					continue;
 				}
 
 				VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &cnds));
 				o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
 
 				VERIFY3U(zap_remove_int(dp->dp_meta_objset,
 				    odd->dd_phys->dd_clones, o, tx), ==, 0);
 				VERIFY3U(zap_add_int(dp->dp_meta_objset,
 				    dd->dd_phys->dd_clones, o, tx), ==, 0);
 				dsl_dataset_rele(cnds, FTAG);
 			}
 			zap_cursor_fini(&zc);
 		}
 
 		ASSERT0(dsl_prop_numcb(ds));
 	}
 
 	/*
 	 * Change space accounting.
 	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
 	 * both be valid, or both be 0 (resulting in delta == 0).  This
 	 * is true for each of {clone,origin} independently.
 	 */
 
 	delta = pa->cloneusedsnap -
 	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, >=, 0);
 	ASSERT3U(pa->used, >=, delta);
 	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
 	    pa->used - delta, pa->comp, pa->uncomp, tx);
 
 	delta = pa->originusedsnap -
 	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, <=, 0);
 	ASSERT3U(pa->used, >=, -delta);
 	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(odd, DD_USED_HEAD,
 	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
 
 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
 
 	/* log history record */
 	spa_history_log_internal_ds(hds, "promote", tx, "");
 
 	dsl_dir_close(odd, FTAG);
 }
 
 static char *snaplist_tag = "snaplist";
 /*
  * Make a list of dsl_dataset_t's for the snapshots between first_obj
  * (exclusive) and last_obj (inclusive).  The list will be in reverse
  * order (last_obj will be the list_head()).  If first_obj == 0, do all
  * snapshots back to this dataset's origin.
  */
 static int
 snaplist_make(dsl_pool_t *dp, boolean_t own,
     uint64_t first_obj, uint64_t last_obj, list_t *l)
 {
 	uint64_t obj = last_obj;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
 
 	list_create(l, sizeof (struct promotenode),
 	    offsetof(struct promotenode, link));
 
 	while (obj != first_obj) {
 		dsl_dataset_t *ds;
 		struct promotenode *snap;
 		int err;
 
 		if (own) {
 			err = dsl_dataset_own_obj(dp, obj,
 			    0, snaplist_tag, &ds);
 			if (err == 0)
 				dsl_dataset_make_exclusive(ds, snaplist_tag);
 		} else {
 			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
 		}
 		if (err == ENOENT) {
 			/* lost race with snapshot destroy */
 			struct promotenode *last = list_tail(l);
 			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
 			obj = last->ds->ds_phys->ds_prev_snap_obj;
 			continue;
 		} else if (err) {
 			return (err);
 		}
 
 		if (first_obj == 0)
 			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
 
 		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
 		snap->ds = ds;
 		list_insert_tail(l, snap);
 		obj = ds->ds_phys->ds_prev_snap_obj;
 	}
 
 	return (0);
 }
 
 static int
 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
 {
 	struct promotenode *snap;
 
 	*spacep = 0;
 	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
 		uint64_t used, comp, uncomp;
 		dsl_deadlist_space_range(&snap->ds->ds_deadlist,
 		    mintxg, UINT64_MAX, &used, &comp, &uncomp);
 		*spacep += used;
 	}
 	return (0);
 }
 
 static void
 snaplist_destroy(list_t *l, boolean_t own)
 {
 	struct promotenode *snap;
 
 	if (!l || !list_link_active(&l->list_head))
 		return;
 
 	while ((snap = list_tail(l)) != NULL) {
 		list_remove(l, snap);
 		if (own)
 			dsl_dataset_disown(snap->ds, snaplist_tag);
 		else
 			dsl_dataset_rele(snap->ds, snaplist_tag);
 		kmem_free(snap, sizeof (struct promotenode));
 	}
 	list_destroy(l);
 }
 
 /*
  * Promote a clone.  Nomenclature note:
  * "clone" or "cds": the original clone which is being promoted
  * "origin" or "ods": the snapshot which is originally clone's origin
  * "origin head" or "ohds": the dataset which is the head
  * (filesystem/volume) for the origin
  * "origin origin": the origin of the origin's filesystem (typically
  * NULL, indicating that the clone is not a clone of a clone).
  */
 int
 dsl_dataset_promote(const char *name, char *conflsnap)
 {
 	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	dmu_object_info_t doi;
 	struct promotearg pa = { 0 };
 	struct promotenode *snap;
 	int err;
 
 	err = dsl_dataset_hold(name, FTAG, &ds);
 	if (err)
 		return (err);
 	dd = ds->ds_dir;
 	dp = dd->dd_pool;
 
 	err = dmu_object_info(dp->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (EINVAL);
 	}
 
 	/*
 	 * We are going to inherit all the snapshots taken before our
 	 * origin (i.e., our new origin will be our parent's origin).
 	 * Take ownership of them so that we can rename them into our
 	 * namespace.
 	 */
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
 	    &pa.shared_snaps);
 	if (err != 0)
 		goto out;
 
 	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
 	if (err != 0)
 		goto out;
 
 	snap = list_head(&pa.shared_snaps);
 	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
 	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
 	if (err != 0)
 		goto out;
 
 	if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
 		err = dsl_dataset_hold_obj(dp,
 		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
 		    FTAG, &pa.origin_origin);
 		if (err != 0)
 			goto out;
 	}
 
 out:
 	rw_exit(&dp->dp_config_rwlock);
 
 	/*
 	 * Add in 128x the snapnames zapobj size, since we will be moving
 	 * a bunch of snapnames to the promoted ds, and dirtying their
 	 * bonus buffers.
 	 */
 	if (err == 0) {
 		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
 		    dsl_dataset_promote_sync, ds, &pa,
 		    2 + 2 * doi.doi_physical_blocks_512);
 		if (err && pa.err_ds && conflsnap)
 			(void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
 	}
 
 	snaplist_destroy(&pa.shared_snaps, B_TRUE);
 	snaplist_destroy(&pa.clone_snaps, B_FALSE);
 	snaplist_destroy(&pa.origin_snaps, B_FALSE);
 	if (pa.origin_origin)
 		dsl_dataset_rele(pa.origin_origin, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 struct cloneswaparg {
 	dsl_dataset_t *cds; /* clone dataset */
 	dsl_dataset_t *ohds; /* origin's head dataset */
 	boolean_t force;
 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
 };
 
 /* ARGSUSED */
 static int
 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct cloneswaparg *csa = arg1;
 
 	/* they should both be heads */
 	if (dsl_dataset_is_snapshot(csa->cds) ||
 	    dsl_dataset_is_snapshot(csa->ohds))
 		return (EINVAL);
 
 	/* the branch point should be just before them */
 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
 		return (EINVAL);
 
 	/* cds should be the clone (unless they are unrelated) */
 	if (csa->cds->ds_prev != NULL &&
 	    csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
 	    csa->ohds->ds_object !=
 	    csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
 		return (EINVAL);
 
 	/* the clone should be a child of the origin */
 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
 		return (EINVAL);
 
 	/* ohds shouldn't be modified unless 'force' */
 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
 		return (ETXTBSY);
 
 	/* adjust amount of any unconsumed refreservation */
 	csa->unused_refres_delta =
 	    (int64_t)MIN(csa->ohds->ds_reserved,
 	    csa->ohds->ds_phys->ds_unique_bytes) -
 	    (int64_t)MIN(csa->ohds->ds_reserved,
 	    csa->cds->ds_phys->ds_unique_bytes);
 
 	if (csa->unused_refres_delta > 0 &&
 	    csa->unused_refres_delta >
 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
 		return (ENOSPC);
 
 	if (csa->ohds->ds_quota != 0 &&
 	    csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
 		return (EDQUOT);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct cloneswaparg *csa = arg1;
 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
 
 	ASSERT(csa->cds->ds_reserved == 0);
 	ASSERT(csa->ohds->ds_quota == 0 ||
 	    csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
 
 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
 
 	if (csa->cds->ds_objset != NULL) {
 		dmu_objset_evict(csa->cds->ds_objset);
 		csa->cds->ds_objset = NULL;
 	}
 
 	if (csa->ohds->ds_objset != NULL) {
 		dmu_objset_evict(csa->ohds->ds_objset);
 		csa->ohds->ds_objset = NULL;
 	}
 
 	/*
 	 * Reset origin's unique bytes, if it exists.
 	 */
 	if (csa->cds->ds_prev) {
 		dsl_dataset_t *origin = csa->cds->ds_prev;
 		uint64_t comp, uncomp;
 
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
 		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
 		    origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 		    &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
 	}
 
 	/* swap blkptrs */
 	{
 		blkptr_t tmp;
 		tmp = csa->ohds->ds_phys->ds_bp;
 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
 		csa->cds->ds_phys->ds_bp = tmp;
 	}
 
 	/* set dd_*_bytes */
 	{
 		int64_t dused, dcomp, duncomp;
 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
 		uint64_t odl_used, odl_comp, odl_uncomp;
 
 		ASSERT3U(csa->cds->ds_dir->dd_phys->
 		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
 
 		dsl_deadlist_space(&csa->cds->ds_deadlist,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
 		dsl_deadlist_space(&csa->ohds->ds_deadlist,
 		    &odl_used, &odl_comp, &odl_uncomp);
 
 		dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
 		    (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
 		    cdl_uncomp -
 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
 
 		dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
 		    dused, dcomp, duncomp, tx);
 		dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
 		    -dused, -dcomp, -duncomp, tx);
 
 		/*
 		 * The difference in the space used by snapshots is the
 		 * difference in snapshot space due to the head's
 		 * deadlist (since that's the only thing that's
 		 * changing that affects the snapused).
 		 */
 		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
 		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
 		dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
 		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &odl_used, &odl_comp, &odl_uncomp);
 		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
 		    DD_USED_HEAD, DD_USED_SNAP, tx);
 	}
 
 	/* swap ds_*_bytes */
 	SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
 	    csa->cds->ds_phys->ds_referenced_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
 	    csa->cds->ds_phys->ds_compressed_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
 	    csa->cds->ds_phys->ds_uncompressed_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
 	    csa->cds->ds_phys->ds_unique_bytes);
 
 	/* apply any parent delta for change in unconsumed refreservation */
 	dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
 	    csa->unused_refres_delta, 0, 0, tx);
 
 	/*
 	 * Swap deadlists.
 	 */
 	dsl_deadlist_close(&csa->cds->ds_deadlist);
 	dsl_deadlist_close(&csa->ohds->ds_deadlist);
 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
 	    csa->cds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
 	    csa->cds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
 	    csa->ohds->ds_phys->ds_deadlist_obj);
 
 	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
 
 	spa_history_log_internal_ds(csa->cds, "clone swap", tx,
 	    "parent=%s", csa->ohds->ds_dir->dd_myname);
 }
 
 /*
  * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
  * recv" into an existing fs to swizzle the file system to the new
  * version, and by "zfs rollback".  Can also be used to swap two
  * independent head datasets if neither has any snapshots.
  */
 int
 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force)
 {
 	struct cloneswaparg csa;
 	int error;
 
 	ASSERT(clone->ds_owner);
 	ASSERT(origin_head->ds_owner);
 retry:
 	/*
 	 * Need exclusive access for the swap. If we're swapping these
 	 * datasets back after an error, we already hold the locks.
 	 */
 	if (!RW_WRITE_HELD(&clone->ds_rwlock))
 		rw_enter(&clone->ds_rwlock, RW_WRITER);
 	if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
 	    !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
 		rw_exit(&clone->ds_rwlock);
 		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
 		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
 			rw_exit(&origin_head->ds_rwlock);
 			goto retry;
 		}
 	}
 	csa.cds = clone;
 	csa.ohds = origin_head;
 	csa.force = force;
 	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
 	    dsl_dataset_clone_swap_check,
 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
 	return (error);
 }
 
 /*
  * Given a pool name and a dataset object number in that pool,
  * return the name of that dataset.
  */
 int
 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
 {
 	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int error;
 
 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
 		return (error);
 	dp = spa_get_dsl(spa);
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 int
 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
     uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
 {
 	int error = 0;
 
 	ASSERT3S(asize, >, 0);
 
 	/*
 	 * *ref_rsrv is the portion of asize that will come from any
 	 * unconsumed refreservation space.
 	 */
 	*ref_rsrv = 0;
 
 	mutex_enter(&ds->ds_lock);
 	/*
 	 * Make a space adjustment for reserved bytes.
 	 */
 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
 		ASSERT3U(*used, >=,
 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
 		*ref_rsrv =
 		    asize - MIN(asize, parent_delta(ds, asize + inflight));
 	}
 
 	if (!check_quota || ds->ds_quota == 0) {
 		mutex_exit(&ds->ds_lock);
 		return (0);
 	}
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
 	 * on-disk is over quota and there are no pending changes (which
 	 * may free up space for us).
 	 */
 	if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
 		if (inflight > 0 ||
 		    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
 			error = ERESTART;
 		else
 			error = EDQUOT;
 	}
 	mutex_exit(&ds->ds_lock);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	int err;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
 		return (ENOTSUP);
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	if (psa->psa_effective_value == 0)
 		return (0);
 
 	if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
 	    psa->psa_effective_value < ds->ds_reserved)
 		return (ENOSPC);
 
 	return (0);
 }
 
 extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
 
 void
 dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	if (ds->ds_quota != effective_value) {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_quota = effective_value;
 	}
 }
 
 int
 dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
 {
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	/*
 	 * If someone removes a file, then tries to set the quota, we
 	 * want to make sure the file freeing takes effect.
 	 */
 	txg_wait_open(ds->ds_dir->dd_pool, 0);
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
 	    ds, &psa, 0);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 static int
 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value;
 	uint64_t unique;
 	int err;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
 	    SPA_VERSION_REFRESERVATION)
 		return (ENOTSUP);
 
 	if (dsl_dataset_is_snapshot(ds))
 		return (EINVAL);
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	effective_value = psa->psa_effective_value;
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	mutex_enter(&ds->ds_lock);
 	if (!DS_UNIQUE_IS_ACCURATE(ds))
 		dsl_dataset_recalc_head_uniq(ds);
 	unique = ds->ds_phys->ds_unique_bytes;
 	mutex_exit(&ds->ds_lock);
 
 	if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
 		uint64_t delta = MAX(unique, effective_value) -
 		    MAX(unique, ds->ds_reserved);
 
 		if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
 			return (ENOSPC);
 		if (ds->ds_quota > 0 &&
 		    effective_value > ds->ds_quota)
 			return (ENOSPC);
 	}
 
 	return (0);
 }
 
 static void
 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 	uint64_t unique;
 	int64_t delta;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
 	unique = ds->ds_phys->ds_unique_bytes;
 	delta = MAX(0, (int64_t)(effective_value - unique)) -
 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
 	ds->ds_reserved = effective_value;
 	mutex_exit(&ds->ds_lock);
 
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
 }
 
 int
 dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
     uint64_t reservation)
 {
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
 	    &reservation);
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_set_reservation_check,
 	    dsl_dataset_set_reservation_sync, ds, &psa, 0);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 typedef struct zfs_hold_cleanup_arg {
 	dsl_pool_t *dp;
 	uint64_t dsobj;
 	char htag[MAXNAMELEN];
 } zfs_hold_cleanup_arg_t;
 
 static void
 dsl_dataset_user_release_onexit(void *arg)
 {
 	zfs_hold_cleanup_arg_t *ca = arg;
 
 	(void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
 	    B_TRUE);
 	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 }
 
 void
 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
     minor_t minor)
 {
 	zfs_hold_cleanup_arg_t *ca;
 
 	ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
 	ca->dp = ds->ds_dir->dd_pool;
 	ca->dsobj = ds->ds_object;
 	(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
 	VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
 	    dsl_dataset_user_release_onexit, ca, NULL));
 }
 
 /*
  * If you add new checks here, you may need to add
  * additional checks to the "temporary" case in
  * snapshot_check() in dmu_objset.c.
  */
 static int
 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct dsl_ds_holdarg *ha = arg2;
 	const char *htag = ha->htag;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	int error = 0;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
 		return (ENOTSUP);
 
 	if (!dsl_dataset_is_snapshot(ds))
 		return (EINVAL);
 
 	/* tags must be unique */
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_userrefs_obj) {
 		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
 		    8, 1, tx);
 		if (error == 0)
 			error = EEXIST;
 		else if (error == ENOENT)
 			error = 0;
 	}
 	mutex_exit(&ds->ds_lock);
 
 	if (error == 0 && ha->temphold &&
 	    strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
 		error = E2BIG;
 
 	return (error);
 }
 
 void
 dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct dsl_ds_holdarg *ha = arg2;
 	const char *htag = ha->htag;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t now = gethrestime_sec();
 	uint64_t zapobj;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_userrefs_obj == 0) {
 		/*
 		 * This is the first user hold for this dataset.  Create
 		 * the userrefs zap object.
 		 */
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		zapobj = ds->ds_phys->ds_userrefs_obj =
 		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 	} else {
 		zapobj = ds->ds_phys->ds_userrefs_obj;
 	}
 	ds->ds_userrefs++;
 	mutex_exit(&ds->ds_lock);
 
 	VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 
 	if (ha->temphold) {
 		VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
 		    htag, &now, tx));
 	}
 
 	spa_history_log_internal_ds(ds, "hold", tx,
 	    "tag = %s temp = %d holds now = %llu",
 	    htag, (int)ha->temphold, ds->ds_userrefs);
 }
 
 static int
 dsl_dataset_user_hold_one(const char *dsname, void *arg)
 {
 	struct dsl_ds_holdarg *ha = arg;
 	dsl_dataset_t *ds;
 	int error;
 	char *name;
 
 	/* alloc a buffer to hold dsname@snapname plus terminating NULL */
 	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 	error = dsl_dataset_hold(name, ha->dstg, &ds);
 	strfree(name);
 	if (error == 0) {
 		ha->gotone = B_TRUE;
 		dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
 		    dsl_dataset_user_hold_sync, ds, ha, 0);
 	} else if (error == ENOENT && ha->recursive) {
 		error = 0;
 	} else {
 		(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 	}
 	return (error);
 }
 
 int
 dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
     boolean_t temphold)
 {
 	struct dsl_ds_holdarg *ha;
 	int error;
 
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 	ha->htag = htag;
 	ha->temphold = temphold;
 	error = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
 	    ds, ha, 0);
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 
 	return (error);
 }
 
 int
 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
     boolean_t recursive, boolean_t temphold, int cleanup_fd)
 {
 	struct dsl_ds_holdarg *ha;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	int error;
 	minor_t minor = 0;
 
 	if (cleanup_fd != -1) {
 		/* Currently we only support cleanup-on-exit of tempholds. */
 		if (!temphold)
 			return (EINVAL);
 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
 		if (error)
 			return (error);
 	}
 
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 
 	error = spa_open(dsname, &spa, FTAG);
 	if (error) {
 		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 		if (cleanup_fd != -1)
 			zfs_onexit_fd_rele(cleanup_fd);
 		return (error);
 	}
 
 	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	ha->htag = htag;
 	ha->snapname = snapname;
 	ha->recursive = recursive;
 	ha->temphold = temphold;
 
 	if (recursive) {
 		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
 		    ha, DS_FIND_CHILDREN);
 	} else {
 		error = dsl_dataset_user_hold_one(dsname, ha);
 	}
 	if (error == 0)
 		error = dsl_sync_task_group_wait(ha->dstg);
 
 	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
 	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
 		dsl_dataset_t *ds = dst->dst_arg1;
 
 		if (dst->dst_err) {
 			dsl_dataset_name(ds, ha->failed);
 			*strchr(ha->failed, '@') = '\0';
 		} else if (error == 0 && minor != 0 && temphold) {
 			/*
 			 * If this hold is to be released upon process exit,
 			 * register that action now.
 			 */
 			dsl_register_onexit_hold_cleanup(ds, htag, minor);
 		}
 		dsl_dataset_rele(ds, ha->dstg);
 	}
 
 	if (error == 0 && recursive && !ha->gotone)
 		error = ENOENT;
 
 	if (error)
 		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
 	dsl_sync_task_group_destroy(ha->dstg);
 
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
 	if (cleanup_fd != -1)
 		zfs_onexit_fd_rele(cleanup_fd);
 	return (error);
 }
 
 struct dsl_ds_releasearg {
 	dsl_dataset_t *ds;
 	const char *htag;
 	boolean_t own;		/* do we own or just hold ds? */
 };
 
 static int
 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
     boolean_t *might_destroy)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t zapobj;
 	uint64_t tmp;
 	int error;
 
 	*might_destroy = B_FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	zapobj = ds->ds_phys->ds_userrefs_obj;
 	if (zapobj == 0) {
 		/* The tag can't possibly exist */
 		mutex_exit(&ds->ds_lock);
 		return (ESRCH);
 	}
 
 	/* Make sure the tag exists */
 	error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
 	if (error) {
 		mutex_exit(&ds->ds_lock);
 		if (error == ENOENT)
 			error = ESRCH;
 		return (error);
 	}
 
 	if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
 	    DS_IS_DEFER_DESTROY(ds))
 		*might_destroy = B_TRUE;
 
 	mutex_exit(&ds->ds_lock);
 	return (0);
 }
 
 static int
 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_releasearg *ra = arg1;
 	dsl_dataset_t *ds = ra->ds;
 	boolean_t might_destroy;
 	int error;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
 		return (ENOTSUP);
 
 	error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
 	if (error)
 		return (error);
 
 	if (might_destroy) {
 		struct dsl_ds_destroyarg dsda = {0};
 
 		if (dmu_tx_is_syncing(tx)) {
 			/*
 			 * If we're not prepared to remove the snapshot,
 			 * we can't allow the release to happen right now.
 			 */
 			if (!ra->own)
 				return (EBUSY);
 		}
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
 		return (dsl_dataset_destroy_check(&dsda, tag, tx));
 	}
 
 	return (0);
 }
 
 static void
 dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_releasearg *ra = arg1;
 	dsl_dataset_t *ds = ra->ds;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj;
 	uint64_t refs;
 	int error;
 
 	mutex_enter(&ds->ds_lock);
 	ds->ds_userrefs--;
 	refs = ds->ds_userrefs;
 	mutex_exit(&ds->ds_lock);
 	error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
 	VERIFY(error == 0 || error == ENOENT);
 	zapobj = ds->ds_phys->ds_userrefs_obj;
 	VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
 
 	spa_history_log_internal_ds(ds, "release", tx,
 	    "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);
 
 	if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
 	    DS_IS_DEFER_DESTROY(ds)) {
 		struct dsl_ds_destroyarg dsda = {0};
 
 		ASSERT(ra->own);
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
 		/* We already did the destroy_check */
 		dsl_dataset_destroy_sync(&dsda, tag, tx);
 	}
 }
 
 static int
 dsl_dataset_user_release_one(const char *dsname, void *arg)
 {
 	struct dsl_ds_holdarg *ha = arg;
 	struct dsl_ds_releasearg *ra;
 	dsl_dataset_t *ds;
 	int error;
 	void *dtag = ha->dstg;
 	char *name;
 	boolean_t own = B_FALSE;
 	boolean_t might_destroy;
 
 	/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
 	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 	error = dsl_dataset_hold(name, dtag, &ds);
 	strfree(name);
 	if (error == ENOENT && ha->recursive)
 		return (0);
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 	if (error)
 		return (error);
 
 	ha->gotone = B_TRUE;
 
 	ASSERT(dsl_dataset_is_snapshot(ds));
 
 	error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
 	if (error) {
 		dsl_dataset_rele(ds, dtag);
 		return (error);
 	}
 
 	if (might_destroy) {
 #ifdef _KERNEL
 		name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 		error = zfs_unmount_snap(name, NULL);
 		strfree(name);
 		if (error) {
 			dsl_dataset_rele(ds, dtag);
 			return (error);
 		}
 #endif
 		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
 			dsl_dataset_rele(ds, dtag);
 			return (EBUSY);
 		} else {
 			own = B_TRUE;
 			dsl_dataset_make_exclusive(ds, dtag);
 		}
 	}
 
 	ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
 	ra->ds = ds;
 	ra->htag = ha->htag;
 	ra->own = own;
 	dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
 	    dsl_dataset_user_release_sync, ra, dtag, 0);
 
 	return (0);
 }
 
 int
 dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
     boolean_t recursive)
 {
 	struct dsl_ds_holdarg *ha;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	int error;
 
 top:
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 
 	error = spa_open(dsname, &spa, FTAG);
 	if (error) {
 		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 		return (error);
 	}
 
 	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	ha->htag = htag;
 	ha->snapname = snapname;
 	ha->recursive = recursive;
 	if (recursive) {
 		error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
 		    ha, DS_FIND_CHILDREN);
 	} else {
 		error = dsl_dataset_user_release_one(dsname, ha);
 	}
 	if (error == 0)
 		error = dsl_sync_task_group_wait(ha->dstg);
 
 	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
 	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
 		struct dsl_ds_releasearg *ra = dst->dst_arg1;
 		dsl_dataset_t *ds = ra->ds;
 
 		if (dst->dst_err)
 			dsl_dataset_name(ds, ha->failed);
 
 		if (ra->own)
 			dsl_dataset_disown(ds, ha->dstg);
 		else
 			dsl_dataset_rele(ds, ha->dstg);
 
 		kmem_free(ra, sizeof (struct dsl_ds_releasearg));
 	}
 
 	if (error == 0 && recursive && !ha->gotone)
 		error = ENOENT;
 
 	if (error && error != EBUSY)
 		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
 	dsl_sync_task_group_destroy(ha->dstg);
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
 
 	/*
 	 * We can get EBUSY if we were racing with deferred destroy and
 	 * dsl_dataset_user_release_check() hadn't done the necessary
 	 * open context setup.  We can also get EBUSY if we're racing
 	 * with destroy and that thread is the ds_owner.  Either way
 	 * the busy condition should be transient, and we should retry
 	 * the release operation.
 	 */
 	if (error == EBUSY)
 		goto top;
 
 	return (error);
 }
 
 /*
  * Called at spa_load time (with retry == B_FALSE) to release a stale
  * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
  */
 int
 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
     boolean_t retry)
 {
 	dsl_dataset_t *ds;
 	char *snap;
 	char *name;
 	int namelen;
 	int error;
 
 	do {
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 		rw_exit(&dp->dp_config_rwlock);
 		if (error)
 			return (error);
 		namelen = dsl_dataset_namelen(ds)+1;
 		name = kmem_alloc(namelen, KM_SLEEP);
 		dsl_dataset_name(ds, name);
 		dsl_dataset_rele(ds, FTAG);
 
 		snap = strchr(name, '@');
 		*snap = '\0';
 		++snap;
 		error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
 		kmem_free(name, namelen);
 
 		/*
 		 * The object can't have been destroyed because we have a hold,
 		 * but it might have been renamed, resulting in ENOENT.  Retry
 		 * if we've been requested to do so.
 		 *
 		 * It would be nice if we could use the dsobj all the way
 		 * through and avoid ENOENT entirely.  But we might need to
 		 * unmount the snapshot, and there's currently no way to lookup
 		 * a vfsp using a ZFS object id.
 		 */
 	} while ((error == ENOENT) && retry);
 
 	return (error);
 }
 
 int
 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
 {
 	dsl_dataset_t *ds;
 	int err;
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
 	if (ds->ds_phys->ds_userrefs_obj != 0) {
 		zap_attribute_t *za;
 		zap_cursor_t zc;
 
 		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
 		    ds->ds_phys->ds_userrefs_obj);
 		    zap_cursor_retrieve(&zc, za) == 0;
 		    zap_cursor_advance(&zc)) {
 			VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
 			    za->za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 		kmem_free(za, sizeof (zap_attribute_t));
 	}
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 /*
  * Note, this function is used as the callback for dmu_objset_find().  We
  * always return 0 so that we will continue to find and process
  * inconsistent datasets, even if we encounter an error trying to
  * process one of them.
  */
 /* ARGSUSED */
 int
 dsl_destroy_inconsistent(const char *dsname, void *arg)
 {
 	dsl_dataset_t *ds;
 
 	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
 		if (DS_IS_INCONSISTENT(ds))
 			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
 		else
 			dsl_dataset_disown(ds, FTAG);
 	}
 	return (0);
 }
 
 /*
  * Return (in *usedp) the amount of space written in new that is not
  * present in oldsnap.  New may be a snapshot or the head.  Old must be
  * a snapshot before new, in new's filesystem (or its origin).  If not then
  * fail and return EINVAL.
  *
  * The written space is calculated by considering two components:  First, we
  * ignore any freed space, and calculate the written as new's used space
  * minus old's used space.  Next, we add in the amount of space that was freed
  * between the two snapshots, thus reducing new's used space relative to old's.
  * Specifically, this is the space that was born before old->ds_creation_txg,
  * and freed before new (ie. on new's deadlist or a previous deadlist).
  *
  * space freed                         [---------------------]
  * snapshots                       ---O-------O--------O-------O------
  *                                         oldsnap            new
  */
 int
 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
 	int err = 0;
 	uint64_t snapobj;
 	dsl_pool_t *dp = new->ds_dir->dd_pool;
 
 	*usedp = 0;
 	*usedp += new->ds_phys->ds_referenced_bytes;
 	*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
 
 	*compp = 0;
 	*compp += new->ds_phys->ds_compressed_bytes;
 	*compp -= oldsnap->ds_phys->ds_compressed_bytes;
 
 	*uncompp = 0;
 	*uncompp += new->ds_phys->ds_uncompressed_bytes;
 	*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = new->ds_object;
 	while (snapobj != oldsnap->ds_object) {
 		dsl_dataset_t *snap;
 		uint64_t used, comp, uncomp;
 
 		if (snapobj == new->ds_object) {
 			snap = new;
 		} else {
 			err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
 			if (err != 0)
 				break;
 		}
 
 		if (snap->ds_phys->ds_prev_snap_txg ==
 		    oldsnap->ds_phys->ds_creation_txg) {
 			/*
 			 * The blocks in the deadlist can not be born after
 			 * ds_prev_snap_txg, so get the whole deadlist space,
 			 * which is more efficient (especially for old-format
 			 * deadlists).  Unfortunately the deadlist code
 			 * doesn't have enough information to make this
 			 * optimization itself.
 			 */
 			dsl_deadlist_space(&snap->ds_deadlist,
 			    &used, &comp, &uncomp);
 		} else {
 			dsl_deadlist_space_range(&snap->ds_deadlist,
 			    0, oldsnap->ds_phys->ds_creation_txg,
 			    &used, &comp, &uncomp);
 		}
 		*usedp += used;
 		*compp += comp;
 		*uncompp += uncomp;
 
 		/*
 		 * If we get to the beginning of the chain of snapshots
 		 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
 		 * was not a snapshot of/before new.
 		 */
 		snapobj = snap->ds_phys->ds_prev_snap_obj;
 		if (snap != new)
 			dsl_dataset_rele(snap, FTAG);
 		if (snapobj == 0) {
 			err = EINVAL;
 			break;
 		}
 
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 /*
  * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
  * lastsnap, and all snapshots in between are deleted.
  *
  * blocks that would be freed            [---------------------------]
  * snapshots                       ---O-------O--------O-------O--------O
  *                                        firstsnap        lastsnap
  *
  * This is the set of blocks that were born after the snap before firstsnap,
  * (birth > firstsnap->prev_snap_txg) and died before the snap after the
  * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
  * We calculate this by iterating over the relevant deadlists (from the snap
  * after lastsnap, backward to the snap after firstsnap), summing up the
  * space on the deadlist that was born after the snap before firstsnap.
  */
 int
 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
     dsl_dataset_t *lastsnap,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
 	int err = 0;
 	uint64_t snapobj;
 	dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
 
 	ASSERT(dsl_dataset_is_snapshot(firstsnap));
 	ASSERT(dsl_dataset_is_snapshot(lastsnap));
 
 	/*
 	 * Check that the snapshots are in the same dsl_dir, and firstsnap
 	 * is before lastsnap.
 	 */
 	if (firstsnap->ds_dir != lastsnap->ds_dir ||
 	    firstsnap->ds_phys->ds_creation_txg >
 	    lastsnap->ds_phys->ds_creation_txg)
 		return (EINVAL);
 
 	*usedp = *compp = *uncompp = 0;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = lastsnap->ds_phys->ds_next_snap_obj;
 	while (snapobj != firstsnap->ds_object) {
 		dsl_dataset_t *ds;
 		uint64_t used, comp, uncomp;
 
 		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
 		if (err != 0)
 			break;
 
 		dsl_deadlist_space_range(&ds->ds_deadlist,
 		    firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 		    &used, &comp, &uncomp);
 		*usedp += used;
 		*compp += comp;
 		*uncompp += uncomp;
 
 		snapobj = ds->ds_phys->ds_prev_snap_obj;
 		ASSERT3U(snapobj, !=, 0);
 		dsl_dataset_rele(ds, FTAG);
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
Index: vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c
===================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c	(revision 247315)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_dir.c	(revision 247316)
@@ -1,1412 +1,1414 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/metaslab.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/arc.h>
 #include <sys/sunddi.h>
 #include "zfs_namecheck.h"
 
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
     uint64_t value, dmu_tx_t *tx);
 
 /* ARGSUSED */
 static void
 dsl_dir_evict(dmu_buf_t *db, void *arg)
 {
 	dsl_dir_t *dd = arg;
 	dsl_pool_t *dp = dd->dd_pool;
 	int t;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
 		ASSERT(dd->dd_tempreserved[t] == 0);
 		ASSERT(dd->dd_space_towrite[t] == 0);
 	}
 
 	if (dd->dd_parent)
 		dsl_dir_close(dd->dd_parent, dd);
 
 	spa_close(dd->dd_pool->dp_spa, dd);
 
 	/*
 	 * The props callback list should have been cleaned up by
 	 * objset_evict().
 	 */
 	list_destroy(&dd->dd_prop_cbs);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 }
 
 int
 dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
     const char *tail, void *tag, dsl_dir_t **ddp)
 {
 	dmu_buf_t *dbuf;
 	dsl_dir_t *dd;
 	int err;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 
 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
 	if (err)
 		return (err);
 	dd = dmu_buf_get_user(dbuf);
 #ifdef ZFS_DEBUG
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(dbuf, &doi);
 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
 		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
 	}
 #endif
 	if (dd == NULL) {
 		dsl_dir_t *winner;
 
 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
 		dd->dd_object = ddobj;
 		dd->dd_dbuf = dbuf;
 		dd->dd_pool = dp;
 		dd->dd_phys = dbuf->db_data;
 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 
 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
 		    offsetof(dsl_prop_cb_record_t, cbr_node));
 
 		dsl_dir_snap_cmtime_update(dd);
 
 		if (dd->dd_phys->dd_parent_obj) {
 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
 			    NULL, dd, &dd->dd_parent);
 			if (err)
 				goto errout;
 			if (tail) {
 #ifdef ZFS_DEBUG
 				uint64_t foundobj;
 
 				err = zap_lookup(dp->dp_meta_objset,
 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 				    tail, sizeof (foundobj), 1, &foundobj);
 				ASSERT(err || foundobj == ddobj);
 #endif
 				(void) strcpy(dd->dd_myname, tail);
 			} else {
 				err = zap_value_search(dp->dp_meta_objset,
 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 				    ddobj, 0, dd->dd_myname);
 			}
 			if (err)
 				goto errout;
 		} else {
 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 		}
 
 		if (dsl_dir_is_clone(dd)) {
 			dmu_buf_t *origin_bonus;
 			dsl_dataset_phys_t *origin_phys;
 
 			/*
 			 * We can't open the origin dataset, because
 			 * that would require opening this dsl_dir.
 			 * Just look at its phys directly instead.
 			 */
 			err = dmu_bonus_hold(dp->dp_meta_objset,
 			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
 			if (err)
 				goto errout;
 			origin_phys = origin_bonus->db_data;
 			dd->dd_origin_txg =
 			    origin_phys->ds_creation_txg;
 			dmu_buf_rele(origin_bonus, FTAG);
 		}
 
 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
 		    dsl_dir_evict);
 		if (winner) {
 			if (dd->dd_parent)
 				dsl_dir_close(dd->dd_parent, dd);
 			mutex_destroy(&dd->dd_lock);
 			kmem_free(dd, sizeof (dsl_dir_t));
 			dd = winner;
 		} else {
 			spa_open_ref(dp->dp_spa, dd);
 		}
 	}
 
 	/*
 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
 	 * holds on the spa.  We need the open-to-close holds because
 	 * otherwise the spa_refcnt wouldn't change when we open a
 	 * dir which the spa also has open, so we could incorrectly
 	 * think it was OK to unload/export/destroy the pool.  We need
 	 * the instantiate-to-evict hold because the dsl_dir_t has a
 	 * pointer to the dd_pool, which has a pointer to the spa_t.
 	 */
 	spa_open_ref(dp->dp_spa, tag);
 	ASSERT3P(dd->dd_pool, ==, dp);
 	ASSERT3U(dd->dd_object, ==, ddobj);
 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
 	*ddp = dd;
 	return (0);
 
 errout:
 	if (dd->dd_parent)
 		dsl_dir_close(dd->dd_parent, dd);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 	dmu_buf_rele(dbuf, tag);
 	return (err);
 }
 
 void
 dsl_dir_close(dsl_dir_t *dd, void *tag)
 {
 	dprintf_dd(dd, "%s\n", "");
 	spa_close(dd->dd_pool->dp_spa, tag);
 	dmu_buf_rele(dd->dd_dbuf, tag);
 }
 
 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
 void
 dsl_dir_name(dsl_dir_t *dd, char *buf)
 {
 	if (dd->dd_parent) {
 		dsl_dir_name(dd->dd_parent, buf);
 		(void) strcat(buf, "/");
 	} else {
 		buf[0] = '\0';
 	}
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/*
 		 * recursive mutex so that we can use
 		 * dprintf_dd() with dd_lock held
 		 */
 		mutex_enter(&dd->dd_lock);
 		(void) strcat(buf, dd->dd_myname);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		(void) strcat(buf, dd->dd_myname);
 	}
 }
 
 /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 int
 dsl_dir_namelen(dsl_dir_t *dd)
 {
 	int result = 0;
 
 	if (dd->dd_parent) {
 		/* parent's name + 1 for the "/" */
 		result = dsl_dir_namelen(dd->dd_parent) + 1;
 	}
 
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/* see dsl_dir_name */
 		mutex_enter(&dd->dd_lock);
 		result += strlen(dd->dd_myname);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		result += strlen(dd->dd_myname);
 	}
 
 	return (result);
 }
 
 static int
 getcomponent(const char *path, char *component, const char **nextp)
 {
 	char *p;
 	if ((path == NULL) || (path[0] == '\0'))
 		return (ENOENT);
 	/* This would be a good place to reserve some namespace... */
 	p = strpbrk(path, "/@");
 	if (p && (p[1] == '/' || p[1] == '@')) {
 		/* two separators in a row */
 		return (EINVAL);
 	}
 	if (p == NULL || p == path) {
 		/*
 		 * if the first thing is an @ or /, it had better be an
 		 * @ and it had better not have any more ats or slashes,
 		 * and it had better have something after the @.
 		 */
 		if (p != NULL &&
 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 			return (EINVAL);
 		if (strlen(path) >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strcpy(component, path);
 		p = NULL;
 	} else if (p[0] == '/') {
 		if (p-path >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(component, path, p - path);
 		component[p-path] = '\0';
 		p++;
 	} else if (p[0] == '@') {
 		/*
 		 * if the next separator is an @, there better not be
 		 * any more slashes.
 		 */
 		if (strchr(path, '/'))
 			return (EINVAL);
 		if (p-path >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(component, path, p - path);
 		component[p-path] = '\0';
 	} else {
 		ASSERT(!"invalid p");
 	}
 	*nextp = p;
 	return (0);
 }
 
 /*
  * same as dsl_open_dir, ignore the first component of name and use the
  * spa instead
  */
 int
 dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
     dsl_dir_t **ddp, const char **tailp)
 {
 	char buf[MAXNAMELEN];
 	const char *next, *nextnext = NULL;
 	int err;
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	uint64_t ddobj;
 	int openedspa = FALSE;
 
 	dprintf("%s\n", name);
 
 	err = getcomponent(name, buf, &next);
 	if (err)
 		return (err);
 	if (spa == NULL) {
 		err = spa_open(buf, &spa, FTAG);
 		if (err) {
 			dprintf("spa_open(%s) failed\n", buf);
 			return (err);
 		}
 		openedspa = TRUE;
 
 		/* XXX this assertion belongs in spa_open */
 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
 	}
 
 	dp = spa_get_dsl(spa);
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 	if (err) {
 		rw_exit(&dp->dp_config_rwlock);
 		if (openedspa)
 			spa_close(spa, FTAG);
 		return (err);
 	}
 
 	while (next != NULL) {
 		dsl_dir_t *child_ds;
 		err = getcomponent(next, buf, &nextnext);
 		if (err)
 			break;
 		ASSERT(next[0] != '\0');
 		if (next[0] == '@')
 			break;
 		dprintf("looking up %s in obj%lld\n",
 		    buf, dd->dd_phys->dd_child_dir_zapobj);
 
 		err = zap_lookup(dp->dp_meta_objset,
 		    dd->dd_phys->dd_child_dir_zapobj,
 		    buf, sizeof (ddobj), 1, &ddobj);
 		if (err) {
 			if (err == ENOENT)
 				err = 0;
 			break;
 		}
 
 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
 		if (err)
 			break;
 		dsl_dir_close(dd, tag);
 		dd = child_ds;
 		next = nextnext;
 	}
 	rw_exit(&dp->dp_config_rwlock);
 
 	if (err) {
 		dsl_dir_close(dd, tag);
 		if (openedspa)
 			spa_close(spa, FTAG);
 		return (err);
 	}
 
 	/*
 	 * It's an error if there's more than one component left, or
 	 * tailp==NULL and there's any component left.
 	 */
 	if (next != NULL &&
 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 		/* bad path name */
 		dsl_dir_close(dd, tag);
 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 		err = ENOENT;
 	}
 	if (tailp)
 		*tailp = next;
 	if (openedspa)
 		spa_close(spa, FTAG);
 	*ddp = dd;
 	return (err);
 }
 
 /*
  * Return the dsl_dir_t, and possibly the last component which couldn't
  * be found in *tail.  Return NULL if the path is bogus, or if
  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
  * means that the last component is a snapshot.
  */
 int
 dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
 {
 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
 }
 
 uint64_t
 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
     dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t ddobj;
 	dsl_dir_phys_t *ddphys;
 	dmu_buf_t *dbuf;
 
 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 	if (pds) {
 		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
 		    name, sizeof (uint64_t), 1, &ddobj, tx));
 	} else {
 		/* it's the root dir */
 		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 	}
 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	ddphys = dbuf->db_data;
 
 	ddphys->dd_creation_time = gethrestime_sec();
 	if (pds)
 		ddphys->dd_parent_obj = pds->dd_object;
 	ddphys->dd_props_zapobj = zap_create(mos,
 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 	ddphys->dd_child_dir_zapobj = zap_create(mos,
 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 	dmu_buf_rele(dbuf, FTAG);
 
 	return (ddobj);
 }
 
 /* ARGSUSED */
 int
 dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 	uint64_t count;
 
 	/*
 	 * There should be exactly two holds, both from
 	 * dsl_dataset_destroy: one on the dd directory, and one on its
 	 * head ds.  If there are more holds, then a concurrent thread is
 	 * performing a lookup inside this dir while we're trying to destroy
 	 * it.  To minimize this possibility, we perform this check only
 	 * in syncing context and fail the operation if we encounter
 	 * additional holds.  The dp_config_rwlock ensures that nobody else
 	 * opens it after we check.
 	 */
 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
 		return (EBUSY);
 
 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
 	if (err)
 		return (err);
 	if (count != 0)
 		return (EEXIST);
 
 	return (0);
 }
 
 void
 dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	uint64_t obj;
 	dd_used_t t;
 
 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 
 	/*
 	 * Remove our reservation. The impl() routine avoids setting the
 	 * actual property, which would require the (already destroyed) ds.
 	 */
 	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 
 	ASSERT0(dd->dd_phys->dd_used_bytes);
 	ASSERT0(dd->dd_phys->dd_reserved);
 	for (t = 0; t < DD_USED_NUM; t++)
 		ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
 
 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
 	VERIFY(0 == zap_remove(mos,
 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
 
 	obj = dd->dd_object;
 	dsl_dir_close(dd, tag);
 	VERIFY(0 == dmu_object_free(mos, obj, tx));
 }
 
 boolean_t
 dsl_dir_is_clone(dsl_dir_t *dd)
 {
 	return (dd->dd_phys->dd_origin_obj &&
 	    (dd->dd_pool->dp_origin_snap == NULL ||
 	    dd->dd_phys->dd_origin_obj !=
 	    dd->dd_pool->dp_origin_snap->ds_object));
 }
 
 void
 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 {
 	mutex_enter(&dd->dd_lock);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 	    dd->dd_phys->dd_used_bytes);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 	    dd->dd_phys->dd_reserved);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
 	    dd->dd_phys->dd_compressed_bytes));
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
+	    dd->dd_phys->dd_uncompressed_bytes);
 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 	}
 	mutex_exit(&dd->dd_lock);
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	if (dsl_dir_is_clone(dd)) {
 		dsl_dataset_t *ds;
 		char buf[MAXNAMELEN];
 
 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
 	rw_exit(&dd->dd_pool->dp_config_rwlock);
 }
 
 void
 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 
 	ASSERT(dd->dd_phys);
 
 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(dd->dd_dbuf, dd);
 	}
 }
 
 static int64_t
 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 {
 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
 	return (new_accounted - old_accounted);
 }
 
 void
 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	mutex_enter(&dd->dd_lock);
 	ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
 	mutex_exit(&dd->dd_lock);
 
 	/* release the hold from dsl_dir_dirty */
 	dmu_buf_rele(dd->dd_dbuf, dd);
 }
 
 static uint64_t
 dsl_dir_space_towrite(dsl_dir_t *dd)
 {
 	uint64_t space = 0;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dd->dd_lock));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		space += dd->dd_space_towrite[i&TXG_MASK];
 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
 	}
 	return (space);
 }
 
 /*
  * How much space would dd have available if ancestor had delta applied
  * to it?  If ondiskonly is set, we're only interested in what's
  * on-disk, not estimated pending changes.
  */
 uint64_t
 dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 {
 	uint64_t parentspace, myspace, quota, used;
 
 	/*
 	 * If there are no restrictions otherwise, assume we have
 	 * unlimited space available.
 	 */
 	quota = UINT64_MAX;
 	parentspace = UINT64_MAX;
 
 	if (dd->dd_parent != NULL) {
 		parentspace = dsl_dir_space_available(dd->dd_parent,
 		    ancestor, delta, ondiskonly);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	if (dd->dd_phys->dd_quota != 0)
 		quota = dd->dd_phys->dd_quota;
 	used = dd->dd_phys->dd_used_bytes;
 	if (!ondiskonly)
 		used += dsl_dir_space_towrite(dd);
 
 	if (dd->dd_parent == NULL) {
 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
 		quota = MIN(quota, poolsize);
 	}
 
 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
 		/*
 		 * We have some space reserved, in addition to what our
 		 * parent gave us.
 		 */
 		parentspace += dd->dd_phys->dd_reserved - used;
 	}
 
 	if (dd == ancestor) {
 		ASSERT(delta <= 0);
 		ASSERT(used >= -delta);
 		used += delta;
 		if (parentspace != UINT64_MAX)
 			parentspace -= delta;
 	}
 
 	if (used > quota) {
 		/* over quota */
 		myspace = 0;
 	} else {
 		/*
 		 * the lesser of the space provided by our parent and
 		 * the space left in our quota
 		 */
 		myspace = MIN(parentspace, quota - used);
 	}
 
 	mutex_exit(&dd->dd_lock);
 
 	return (myspace);
 }
 
 struct tempreserve {
 	list_node_t tr_node;
 	dsl_pool_t *tr_dp;
 	dsl_dir_t *tr_ds;
 	uint64_t tr_size;
 };
 
 static int
 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
     dmu_tx_t *tx, boolean_t first)
 {
 	uint64_t txg = tx->tx_txg;
 	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
 	uint64_t deferred = 0;
 	struct tempreserve *tr;
 	int retval = EDQUOT;
 	int txgidx = txg & TXG_MASK;
 	int i;
 	uint64_t ref_rsrv = 0;
 
 	ASSERT3U(txg, !=, 0);
 	ASSERT3S(asize, >, 0);
 
 	mutex_enter(&dd->dd_lock);
 
 	/*
 	 * Check against the dsl_dir's quota.  We don't add in the delta
 	 * when checking for over-quota because they get one free hit.
 	 */
 	est_inflight = dsl_dir_space_towrite(dd);
 	for (i = 0; i < TXG_SIZE; i++)
 		est_inflight += dd->dd_tempreserved[i];
 	used_on_disk = dd->dd_phys->dd_used_bytes;
 
 	/*
 	 * On the first iteration, fetch the dataset's used-on-disk and
 	 * refreservation values. Also, if checkrefquota is set, test if
 	 * allocating this space would exceed the dataset's refquota.
 	 */
 	if (first && tx->tx_objset) {
 		int error;
 		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 
 		error = dsl_dataset_check_quota(ds, checkrefquota,
 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
 		if (error) {
 			mutex_exit(&dd->dd_lock);
 			return (error);
 		}
 	}
 
 	/*
 	 * If this transaction will result in a net free of space,
 	 * we want to let it through.
 	 */
 	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
 		quota = UINT64_MAX;
 	else
 		quota = dd->dd_phys->dd_quota;
 
 	/*
 	 * Adjust the quota against the actual pool size at the root
 	 * minus any outstanding deferred frees.
 	 * To ensure that it's possible to remove files from a full
 	 * pool without inducing transient overcommits, we throttle
 	 * netfree transactions against a quota that is slightly larger,
 	 * but still within the pool's allocation slop.  In cases where
 	 * we're very close to full, this will allow a steady trickle of
 	 * removes to get through.
 	 */
 	if (dd->dd_parent == NULL) {
 		spa_t *spa = dd->dd_pool->dp_spa;
 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
 		deferred = metaslab_class_get_deferred(spa_normal_class(spa));
 		if (poolsize - deferred < quota) {
 			quota = poolsize - deferred;
 			retval = ENOSPC;
 		}
 	}
 
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
 	 * on-disk is over quota and there are no pending changes (which
 	 * may free up space for us).
 	 */
 	if (used_on_disk + est_inflight >= quota) {
 		if (est_inflight > 0 || used_on_disk < quota ||
 		    (retval == ENOSPC && used_on_disk < quota + deferred))
 			retval = ERESTART;
 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
 		    "quota=%lluK tr=%lluK err=%d\n",
 		    used_on_disk>>10, est_inflight>>10,
 		    quota>>10, asize>>10, retval);
 		mutex_exit(&dd->dd_lock);
 		return (retval);
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
 	dd->dd_tempreserved[txgidx] += asize;
 
 	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
 	    asize - ref_rsrv);
 	mutex_exit(&dd->dd_lock);
 
 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 	tr->tr_ds = dd;
 	tr->tr_size = asize;
 	list_insert_tail(tr_list, tr);
 
 	/* see if it's OK with our parent */
 	if (dd->dd_parent && parent_rsrv) {
 		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
 
 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
 		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
 	} else {
 		return (0);
 	}
 }
 
 /*
  * Reserve space in this dsl_dir, to be used in this tx's txg.
  * After the space has been dirtied (and dsl_dir_willuse_space()
  * has been called), the reservation should be canceled, using
  * dsl_dir_tempreserve_clear().
  */
 int
 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
 {
 	int err;
 	list_t *tr_list;
 
 	if (asize == 0) {
 		*tr_cookiep = NULL;
 		return (0);
 	}
 
 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
 	list_create(tr_list, sizeof (struct tempreserve),
 	    offsetof(struct tempreserve, tr_node));
 	ASSERT3S(asize, >, 0);
 	ASSERT3S(fsize, >=, 0);
 
 	err = arc_tempreserve_space(lsize, tx->tx_txg);
 	if (err == 0) {
 		struct tempreserve *tr;
 
 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 		tr->tr_size = lsize;
 		list_insert_tail(tr_list, tr);
 
 		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
 	} else {
 		if (err == EAGAIN) {
 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
 			err = ERESTART;
 		}
 		dsl_pool_memory_pressure(dd->dd_pool);
 	}
 
 	if (err == 0) {
 		struct tempreserve *tr;
 
 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 		tr->tr_dp = dd->dd_pool;
 		tr->tr_size = asize;
 		list_insert_tail(tr_list, tr);
 
 		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
 		    FALSE, asize > usize, tr_list, tx, TRUE);
 	}
 
 	if (err)
 		dsl_dir_tempreserve_clear(tr_list, tx);
 	else
 		*tr_cookiep = tr_list;
 
 	return (err);
 }
 
 /*
  * Clear a temporary reservation that we previously made with
  * dsl_dir_tempreserve_space().
  */
 void
 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 {
 	int txgidx = tx->tx_txg & TXG_MASK;
 	list_t *tr_list = tr_cookie;
 	struct tempreserve *tr;
 
 	ASSERT3U(tx->tx_txg, !=, 0);
 
 	if (tr_cookie == NULL)
 		return;
 
 	while (tr = list_head(tr_list)) {
 		if (tr->tr_dp) {
 			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
 		} else if (tr->tr_ds) {
 			mutex_enter(&tr->tr_ds->dd_lock);
 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
 			    tr->tr_size);
 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
 			mutex_exit(&tr->tr_ds->dd_lock);
 		} else {
 			arc_tempreserve_clear(tr->tr_size);
 		}
 		list_remove(tr_list, tr);
 		kmem_free(tr, sizeof (struct tempreserve));
 	}
 
 	kmem_free(tr_list, sizeof (list_t));
 }
 
 static void
 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 {
 	int64_t parent_space;
 	uint64_t est_used;
 
 	mutex_enter(&dd->dd_lock);
 	if (space > 0)
 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
 
 	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
 	parent_space = parent_delta(dd, est_used, space);
 	mutex_exit(&dd->dd_lock);
 
 	/* Make sure that we clean up dd_space_to* */
 	dsl_dir_dirty(dd, tx);
 
 	/* XXX this is potentially expensive and unnecessary... */
 	if (parent_space && dd->dd_parent)
 		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
 }
 
 /*
  * Call in open context when we think we're going to write/free space,
  * eg. when dirtying data.  Be conservative (ie. OK to write less than
  * this or free more than this, but don't write more or free less).
  */
 void
 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 {
 	dsl_pool_willuse_space(dd->dd_pool, space, tx);
 	dsl_dir_willuse_space_impl(dd, space, tx);
 }
 
 /* call from syncing context when we actually write/free space for this dd */
 void
 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(type < DD_USED_NUM);
 
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
 	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
 	ASSERT(compressed >= 0 ||
 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_used_bytes += used;
 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
 	dd->dd_phys->dd_compressed_bytes += compressed;
 
 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		ASSERT(used > 0 ||
 		    dd->dd_phys->dd_used_breakdown[type] >= -used);
 		dd->dd_phys->dd_used_breakdown[type] += used;
 #ifdef DEBUG
 		dd_used_t t;
 		uint64_t u = 0;
 		for (t = 0; t < DD_USED_NUM; t++)
 			u += dd->dd_phys->dd_used_breakdown[t];
 		ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
 #endif
 	}
 	if (needlock)
 		mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    accounted_delta, compressed, uncompressed, tx);
 		dsl_dir_transfer_space(dd->dd_parent,
 		    used - accounted_delta,
 		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 	}
 }
 
 void
 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 {
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(oldtype < DD_USED_NUM);
 	ASSERT(newtype < DD_USED_NUM);
 
 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
 		return;
 
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	ASSERT(delta > 0 ?
 	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
 	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
 	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
 	dd->dd_phys->dd_used_breakdown[newtype] += delta;
 	if (needlock)
 		mutex_exit(&dd->dd_lock);
 }
 
 static int
 dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	int err;
 	uint64_t towrite;
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	if (psa->psa_effective_value == 0)
 		return (0);
 
 	mutex_enter(&dd->dd_lock);
 	/*
 	 * If we are doing the preliminary check in open context, and
 	 * there are pending changes, then don't fail it, since the
 	 * pending changes could under-estimate the amount of space to be
 	 * freed up.
 	 */
 	towrite = dsl_dir_space_towrite(dd);
 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
 	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
 	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
 		err = ENOSPC;
 	}
 	mutex_exit(&dd->dd_lock);
 	return (err);
 }
 
 extern dsl_syncfunc_t dsl_prop_set_sync;
 
 static void
 dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(dd, psa);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	dd->dd_phys->dd_quota = effective_value;
 	mutex_exit(&dd->dd_lock);
 }
 
 int
 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
 
 	err = dsl_dataset_hold(ddname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	ASSERT(ds->ds_dir == dd);
 
 	/*
 	 * If someone removes a file, then tries to set the quota, we want to
 	 * make sure the file freeing takes effect.
 	 */
 	txg_wait_open(dd->dd_pool, 0);
 
 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
 	    dsl_dir_set_quota_sync, ds, &psa, 0);
 
 	dsl_dir_close(dd, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 int
 dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value;
 	uint64_t used, avail;
 	int err;
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	effective_value = psa->psa_effective_value;
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
 	mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent) {
 		avail = dsl_dir_space_available(dd->dd_parent,
 		    NULL, 0, FALSE);
 	} else {
 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
 	}
 
 	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
 		uint64_t delta = MAX(used, effective_value) -
 		    MAX(used, dd->dd_phys->dd_reserved);
 
 		if (delta > avail)
 			return (ENOSPC);
 		if (dd->dd_phys->dd_quota > 0 &&
 		    effective_value > dd->dd_phys->dd_quota)
 			return (ENOSPC);
 	}
 
 	return (0);
 }
 
 static void
 dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
 {
 	uint64_t used;
 	int64_t delta;
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
 	delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
 	dd->dd_phys->dd_reserved = value;
 
 	if (dd->dd_parent != NULL) {
 		/* Roll up this additional usage into our ancestors */
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 		    delta, 0, 0, tx);
 	}
 	mutex_exit(&dd->dd_lock);
 }
 
 
 static void
 dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t value = psa->psa_effective_value;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(dd, psa);
 
 	dsl_dir_set_reservation_sync_impl(dd, value, tx);
 }
 
 int
 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
     uint64_t reservation)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
 
 	err = dsl_dataset_hold(ddname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	ASSERT(ds->ds_dir == dd);
 
 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
 	    dsl_dir_set_reservation_sync, ds, &psa, 0);
 
 	dsl_dir_close(dd, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 static dsl_dir_t *
 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
 {
 	for (; ds1; ds1 = ds1->dd_parent) {
 		dsl_dir_t *dd;
 		for (dd = ds2; dd; dd = dd->dd_parent) {
 			if (ds1 == dd)
 				return (dd);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * If delta is applied to dd, how much of that delta would be applied to
  * ancestor?  Syncing context only.
  */
 static int64_t
 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
 {
 	if (dd == ancestor)
 		return (delta);
 
 	mutex_enter(&dd->dd_lock);
 	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
 	mutex_exit(&dd->dd_lock);
 	return (would_change(dd->dd_parent, delta, ancestor));
 }
 
 struct renamearg {
 	dsl_dir_t *newparent;
 	const char *mynewname;
 };
 
 static int
 dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct renamearg *ra = arg2;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 	uint64_t val;
 
 	/*
 	 * There should only be one reference, from dmu_objset_rename().
 	 * Fleeting holds are also possible (eg, from "zfs list" getting
 	 * stats), but any that are present in open context will likely
 	 * be gone by syncing context, so only fail from syncing
 	 * context.
 	 */
 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
 		return (EBUSY);
 
 	/* check for existing name */
 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
 	    ra->mynewname, 8, 1, &val);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	if (ra->newparent != dd->dd_parent) {
 		/* is there enough space? */
 		uint64_t myspace =
 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
 
 		/* no rename into our descendant */
 		if (closest_common_ancestor(dd, ra->newparent) == dd)
 			return (EINVAL);
 
 		if (err = dsl_dir_transfer_possible(dd->dd_parent,
 		    ra->newparent, myspace))
 			return (err);
 	}
 
 	return (0);
 }
 
 static void
 dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct renamearg *ra = arg2;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 	char namebuf[MAXNAMELEN];
 
 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
 
 	/* Log this before we change the name. */
 	dsl_dir_name(ra->newparent, namebuf);
 	spa_history_log_internal_dd(dd, "rename", tx,
 	    "-> %s/%s", namebuf, ra->mynewname);
 
 	if (ra->newparent != dd->dd_parent) {
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    -dd->dd_phys->dd_used_bytes,
 		    -dd->dd_phys->dd_compressed_bytes,
 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
 		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
 		    dd->dd_phys->dd_used_bytes,
 		    dd->dd_phys->dd_compressed_bytes,
 		    dd->dd_phys->dd_uncompressed_bytes, tx);
 
 		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
 			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
 			    dd->dd_phys->dd_used_bytes;
 
 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 			    -unused_rsrv, 0, 0, tx);
 			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
 			    unused_rsrv, 0, 0, tx);
 		}
 	}
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/* remove from old parent zapobj */
 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 	    dd->dd_myname, tx);
 	ASSERT0(err);
 
 	(void) strcpy(dd->dd_myname, ra->mynewname);
 	dsl_dir_close(dd->dd_parent, dd);
 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
 
 	/* add to new parent zapobj */
 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
 	ASSERT0(err);
 
 }
 
 int
 dsl_dir_rename(dsl_dir_t *dd, const char *newname)
 {
 	struct renamearg ra;
 	int err;
 
 	/* new parent should exist */
 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
 	if (err)
 		return (err);
 
 	/* can't rename to different pool */
 	if (dd->dd_pool != ra.newparent->dd_pool) {
 		err = ENXIO;
 		goto out;
 	}
 
 	/* new name should not already exist */
 	if (ra.mynewname == NULL) {
 		err = EEXIST;
 		goto out;
 	}
 
 	err = dsl_sync_task_do(dd->dd_pool,
 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
 
 out:
 	dsl_dir_close(ra.newparent, FTAG);
 	return (err);
 }
 
 int
 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
 {
 	dsl_dir_t *ancestor;
 	int64_t adelta;
 	uint64_t avail;
 
 	ancestor = closest_common_ancestor(sdd, tdd);
 	adelta = would_change(sdd, -space, ancestor);
 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
 	if (avail < space)
 		return (ENOSPC);
 
 	return (0);
 }
 
 timestruc_t
 dsl_dir_snap_cmtime(dsl_dir_t *dd)
 {
 	timestruc_t t;
 
 	mutex_enter(&dd->dd_lock);
 	t = dd->dd_snap_cmtime;
 	mutex_exit(&dd->dd_lock);
 
 	return (t);
 }
 
 void
 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
 {
 	timestruc_t t;
 
 	gethrestime(&t);
 	mutex_enter(&dd->dd_lock);
 	dd->dd_snap_cmtime = t;
 	mutex_exit(&dd->dd_lock);
 }
Index: vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h	(revision 247315)
+++ vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h	(revision 247316)
@@ -1,920 +1,922 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #ifndef	_SYS_FS_ZFS_H
 #define	_SYS_FS_ZFS_H
 
 #include <sys/time.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Types and constants shared between userland and the kernel.
  */
 
 /*
  * Each dataset can be one of the following types.  These constants can be
  * combined into masks that can be passed to various functions.
  */
 typedef enum {
 	ZFS_TYPE_FILESYSTEM	= 0x1,
 	ZFS_TYPE_SNAPSHOT	= 0x2,
 	ZFS_TYPE_VOLUME		= 0x4,
 	ZFS_TYPE_POOL		= 0x8
 } zfs_type_t;
 
 typedef enum dmu_objset_type {
 	DMU_OST_NONE,
 	DMU_OST_META,
 	DMU_OST_ZFS,
 	DMU_OST_ZVOL,
 	DMU_OST_OTHER,			/* For testing only! */
 	DMU_OST_ANY,			/* Be careful! */
 	DMU_OST_NUMTYPES
 } dmu_objset_type_t;
 
 #define	ZFS_TYPE_DATASET	\
 	(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
 
 #define	ZAP_MAXNAMELEN 256
 #define	ZAP_MAXVALUELEN (1024 * 8)
 #define	ZAP_OLDMAXVALUELEN 1024
 
 /*
  * Dataset properties are identified by these constants and must be added to
  * the end of this list to ensure that external consumers are not affected
  * by the change. If you make any changes to this list, be sure to update
  * the property table in usr/src/common/zfs/zfs_prop.c.
  */
 typedef enum {
 	ZFS_PROP_TYPE,
 	ZFS_PROP_CREATION,
 	ZFS_PROP_USED,
 	ZFS_PROP_AVAILABLE,
 	ZFS_PROP_REFERENCED,
 	ZFS_PROP_COMPRESSRATIO,
 	ZFS_PROP_MOUNTED,
 	ZFS_PROP_ORIGIN,
 	ZFS_PROP_QUOTA,
 	ZFS_PROP_RESERVATION,
 	ZFS_PROP_VOLSIZE,
 	ZFS_PROP_VOLBLOCKSIZE,
 	ZFS_PROP_RECORDSIZE,
 	ZFS_PROP_MOUNTPOINT,
 	ZFS_PROP_SHARENFS,
 	ZFS_PROP_CHECKSUM,
 	ZFS_PROP_COMPRESSION,
 	ZFS_PROP_ATIME,
 	ZFS_PROP_DEVICES,
 	ZFS_PROP_EXEC,
 	ZFS_PROP_SETUID,
 	ZFS_PROP_READONLY,
 	ZFS_PROP_ZONED,
 	ZFS_PROP_SNAPDIR,
 	ZFS_PROP_ACLMODE,
 	ZFS_PROP_ACLINHERIT,
 	ZFS_PROP_CREATETXG,		/* not exposed to the user */
 	ZFS_PROP_NAME,			/* not exposed to the user */
 	ZFS_PROP_CANMOUNT,
 	ZFS_PROP_ISCSIOPTIONS,		/* not exposed to the user */
 	ZFS_PROP_XATTR,
 	ZFS_PROP_NUMCLONES,		/* not exposed to the user */
 	ZFS_PROP_COPIES,
 	ZFS_PROP_VERSION,
 	ZFS_PROP_UTF8ONLY,
 	ZFS_PROP_NORMALIZE,
 	ZFS_PROP_CASE,
 	ZFS_PROP_VSCAN,
 	ZFS_PROP_NBMAND,
 	ZFS_PROP_SHARESMB,
 	ZFS_PROP_REFQUOTA,
 	ZFS_PROP_REFRESERVATION,
 	ZFS_PROP_GUID,
 	ZFS_PROP_PRIMARYCACHE,
 	ZFS_PROP_SECONDARYCACHE,
 	ZFS_PROP_USEDSNAP,
 	ZFS_PROP_USEDDS,
 	ZFS_PROP_USEDCHILD,
 	ZFS_PROP_USEDREFRESERV,
 	ZFS_PROP_USERACCOUNTING,	/* not exposed to the user */
 	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
 	ZFS_PROP_DEFER_DESTROY,
 	ZFS_PROP_USERREFS,
 	ZFS_PROP_LOGBIAS,
 	ZFS_PROP_UNIQUE,		/* not exposed to the user */
 	ZFS_PROP_OBJSETID,		/* not exposed to the user */
 	ZFS_PROP_DEDUP,
 	ZFS_PROP_MLSLABEL,
 	ZFS_PROP_SYNC,
 	ZFS_PROP_REFRATIO,
 	ZFS_PROP_WRITTEN,
 	ZFS_PROP_CLONES,
+	ZFS_PROP_LOGICALUSED,
+	ZFS_PROP_LOGICALREFERENCED,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
 typedef enum {
 	ZFS_PROP_USERUSED,
 	ZFS_PROP_USERQUOTA,
 	ZFS_PROP_GROUPUSED,
 	ZFS_PROP_GROUPQUOTA,
 	ZFS_NUM_USERQUOTA_PROPS
 } zfs_userquota_prop_t;
 
 extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
 
 /*
  * Pool properties are identified by these constants and must be added to the
  * end of this list to ensure that external consumers are not affected
  * by the change. If you make any changes to this list, be sure to update
  * the property table in usr/src/common/zfs/zpool_prop.c.
  */
 typedef enum {
 	ZPOOL_PROP_NAME,
 	ZPOOL_PROP_SIZE,
 	ZPOOL_PROP_CAPACITY,
 	ZPOOL_PROP_ALTROOT,
 	ZPOOL_PROP_HEALTH,
 	ZPOOL_PROP_GUID,
 	ZPOOL_PROP_VERSION,
 	ZPOOL_PROP_BOOTFS,
 	ZPOOL_PROP_DELEGATION,
 	ZPOOL_PROP_AUTOREPLACE,
 	ZPOOL_PROP_CACHEFILE,
 	ZPOOL_PROP_FAILUREMODE,
 	ZPOOL_PROP_LISTSNAPS,
 	ZPOOL_PROP_AUTOEXPAND,
 	ZPOOL_PROP_DEDUPDITTO,
 	ZPOOL_PROP_DEDUPRATIO,
 	ZPOOL_PROP_FREE,
 	ZPOOL_PROP_ALLOCATED,
 	ZPOOL_PROP_READONLY,
 	ZPOOL_PROP_COMMENT,
 	ZPOOL_PROP_EXPANDSZ,
 	ZPOOL_PROP_FREEING,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
 /* Small enough to not hog a whole line of printout in zpool(1M). */
 #define	ZPROP_MAX_COMMENT	32
 
 #define	ZPROP_CONT		-2
 #define	ZPROP_INVAL		-1
 
 #define	ZPROP_VALUE		"value"
 #define	ZPROP_SOURCE		"source"
 
 typedef enum {
 	ZPROP_SRC_NONE = 0x1,
 	ZPROP_SRC_DEFAULT = 0x2,
 	ZPROP_SRC_TEMPORARY = 0x4,
 	ZPROP_SRC_LOCAL = 0x8,
 	ZPROP_SRC_INHERITED = 0x10,
 	ZPROP_SRC_RECEIVED = 0x20
 } zprop_source_t;
 
 #define	ZPROP_SRC_ALL	0x3f
 
 #define	ZPROP_SOURCE_VAL_RECVD	"$recvd"
 #define	ZPROP_N_MORE_ERRORS	"N_MORE_ERRORS"
 /*
  * Dataset flag implemented as a special entry in the props zap object
  * indicating that the dataset has received properties on or after
  * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
  * just as it did in earlier versions, and thereafter, local properties are
  * preserved.
  */
 #define	ZPROP_HAS_RECVD		"$hasrecvd"
 
 typedef enum {
 	ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
 	ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
 } zprop_errflags_t;
 
 typedef int (*zprop_func)(int, void *);
 
 /*
  * Properties to be set on the root file system of a new pool
  * are stuffed into their own nvlist, which is then included in
  * the properties nvlist with the pool properties.
  */
 #define	ZPOOL_ROOTFS_PROPS	"root-props-nvl"
 
 /*
  * Dataset property functions shared between libzfs and kernel.
  */
 const char *zfs_prop_default_string(zfs_prop_t);
 uint64_t zfs_prop_default_numeric(zfs_prop_t);
 boolean_t zfs_prop_readonly(zfs_prop_t);
 boolean_t zfs_prop_inheritable(zfs_prop_t);
 boolean_t zfs_prop_setonce(zfs_prop_t);
 const char *zfs_prop_to_name(zfs_prop_t);
 zfs_prop_t zfs_name_to_prop(const char *);
 boolean_t zfs_prop_user(const char *);
 boolean_t zfs_prop_userquota(const char *);
 boolean_t zfs_prop_written(const char *);
 int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
 int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
 uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
 boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
 
 /*
  * Pool property functions shared between libzfs and kernel.
  */
 zpool_prop_t zpool_name_to_prop(const char *);
 const char *zpool_prop_to_name(zpool_prop_t);
 const char *zpool_prop_default_string(zpool_prop_t);
 uint64_t zpool_prop_default_numeric(zpool_prop_t);
 boolean_t zpool_prop_readonly(zpool_prop_t);
 boolean_t zpool_prop_feature(const char *);
 boolean_t zpool_prop_unsupported(const char *name);
 int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
 int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
 uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
 
 /*
  * Definitions for the Delegation.
  */
 typedef enum {
 	ZFS_DELEG_WHO_UNKNOWN = 0,
 	ZFS_DELEG_USER = 'u',
 	ZFS_DELEG_USER_SETS = 'U',
 	ZFS_DELEG_GROUP = 'g',
 	ZFS_DELEG_GROUP_SETS = 'G',
 	ZFS_DELEG_EVERYONE = 'e',
 	ZFS_DELEG_EVERYONE_SETS = 'E',
 	ZFS_DELEG_CREATE = 'c',
 	ZFS_DELEG_CREATE_SETS = 'C',
 	ZFS_DELEG_NAMED_SET = 's',
 	ZFS_DELEG_NAMED_SET_SETS = 'S'
 } zfs_deleg_who_type_t;
 
 typedef enum {
 	ZFS_DELEG_NONE = 0,
 	ZFS_DELEG_PERM_LOCAL = 1,
 	ZFS_DELEG_PERM_DESCENDENT = 2,
 	ZFS_DELEG_PERM_LOCALDESCENDENT = 3,
 	ZFS_DELEG_PERM_CREATE = 4
 } zfs_deleg_inherit_t;
 
 #define	ZFS_DELEG_PERM_UID	"uid"
 #define	ZFS_DELEG_PERM_GID	"gid"
 #define	ZFS_DELEG_PERM_GROUPS	"groups"
 
 #define	ZFS_MLSLABEL_DEFAULT	"none"
 
 #define	ZFS_SMB_ACL_SRC		"src"
 #define	ZFS_SMB_ACL_TARGET	"target"
 
 typedef enum {
 	ZFS_CANMOUNT_OFF = 0,
 	ZFS_CANMOUNT_ON = 1,
 	ZFS_CANMOUNT_NOAUTO = 2
 } zfs_canmount_type_t;
 
 typedef enum {
 	ZFS_LOGBIAS_LATENCY = 0,
 	ZFS_LOGBIAS_THROUGHPUT = 1
 } zfs_logbias_op_t;
 
 typedef enum zfs_share_op {
 	ZFS_SHARE_NFS = 0,
 	ZFS_UNSHARE_NFS = 1,
 	ZFS_SHARE_SMB = 2,
 	ZFS_UNSHARE_SMB = 3
 } zfs_share_op_t;
 
 typedef enum zfs_smb_acl_op {
 	ZFS_SMB_ACL_ADD,
 	ZFS_SMB_ACL_REMOVE,
 	ZFS_SMB_ACL_RENAME,
 	ZFS_SMB_ACL_PURGE
 } zfs_smb_acl_op_t;
 
 typedef enum zfs_cache_type {
 	ZFS_CACHE_NONE = 0,
 	ZFS_CACHE_METADATA = 1,
 	ZFS_CACHE_ALL = 2
 } zfs_cache_type_t;
 
 typedef enum {
 	ZFS_SYNC_STANDARD = 0,
 	ZFS_SYNC_ALWAYS = 1,
 	ZFS_SYNC_DISABLED = 2
 } zfs_sync_type_t;
 
 
 /*
  * On-disk version number.
  */
 #define	SPA_VERSION_1			1ULL
 #define	SPA_VERSION_2			2ULL
 #define	SPA_VERSION_3			3ULL
 #define	SPA_VERSION_4			4ULL
 #define	SPA_VERSION_5			5ULL
 #define	SPA_VERSION_6			6ULL
 #define	SPA_VERSION_7			7ULL
 #define	SPA_VERSION_8			8ULL
 #define	SPA_VERSION_9			9ULL
 #define	SPA_VERSION_10			10ULL
 #define	SPA_VERSION_11			11ULL
 #define	SPA_VERSION_12			12ULL
 #define	SPA_VERSION_13			13ULL
 #define	SPA_VERSION_14			14ULL
 #define	SPA_VERSION_15			15ULL
 #define	SPA_VERSION_16			16ULL
 #define	SPA_VERSION_17			17ULL
 #define	SPA_VERSION_18			18ULL
 #define	SPA_VERSION_19			19ULL
 #define	SPA_VERSION_20			20ULL
 #define	SPA_VERSION_21			21ULL
 #define	SPA_VERSION_22			22ULL
 #define	SPA_VERSION_23			23ULL
 #define	SPA_VERSION_24			24ULL
 #define	SPA_VERSION_25			25ULL
 #define	SPA_VERSION_26			26ULL
 #define	SPA_VERSION_27			27ULL
 #define	SPA_VERSION_28			28ULL
 #define	SPA_VERSION_5000		5000ULL
 
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
 #define	SPA_VERSION			SPA_VERSION_5000
 #define	SPA_VERSION_STRING		"5000"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
  * Used in the code when checking for presence or absence of a feature.
  * Feel free to define multiple symbolic names for each version if there
  * were multiple changes to on-disk structures during that version.
  *
  * NOTE: When checking the current SPA_VERSION in your code, be sure
  *       to use spa_version() since it reports the version of the
  *       last synced uberblock.  Checking the in-flight version can
  *       be dangerous in some cases.
  */
 #define	SPA_VERSION_INITIAL		SPA_VERSION_1
 #define	SPA_VERSION_DITTO_BLOCKS	SPA_VERSION_2
 #define	SPA_VERSION_SPARES		SPA_VERSION_3
 #define	SPA_VERSION_RAIDZ2		SPA_VERSION_3
 #define	SPA_VERSION_BPOBJ_ACCOUNT	SPA_VERSION_3
 #define	SPA_VERSION_RAIDZ_DEFLATE	SPA_VERSION_3
 #define	SPA_VERSION_DNODE_BYTES		SPA_VERSION_3
 #define	SPA_VERSION_ZPOOL_HISTORY	SPA_VERSION_4
 #define	SPA_VERSION_GZIP_COMPRESSION	SPA_VERSION_5
 #define	SPA_VERSION_BOOTFS		SPA_VERSION_6
 #define	SPA_VERSION_SLOGS		SPA_VERSION_7
 #define	SPA_VERSION_DELEGATED_PERMS	SPA_VERSION_8
 #define	SPA_VERSION_FUID		SPA_VERSION_9
 #define	SPA_VERSION_REFRESERVATION	SPA_VERSION_9
 #define	SPA_VERSION_REFQUOTA		SPA_VERSION_9
 #define	SPA_VERSION_UNIQUE_ACCURATE	SPA_VERSION_9
 #define	SPA_VERSION_L2CACHE		SPA_VERSION_10
 #define	SPA_VERSION_NEXT_CLONES		SPA_VERSION_11
 #define	SPA_VERSION_ORIGIN		SPA_VERSION_11
 #define	SPA_VERSION_DSL_SCRUB		SPA_VERSION_11
 #define	SPA_VERSION_SNAP_PROPS		SPA_VERSION_12
 #define	SPA_VERSION_USED_BREAKDOWN	SPA_VERSION_13
 #define	SPA_VERSION_PASSTHROUGH_X	SPA_VERSION_14
 #define	SPA_VERSION_USERSPACE		SPA_VERSION_15
 #define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
 #define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
 #define	SPA_VERSION_USERREFS		SPA_VERSION_18
 #define	SPA_VERSION_HOLES		SPA_VERSION_19
 #define	SPA_VERSION_ZLE_COMPRESSION	SPA_VERSION_20
 #define	SPA_VERSION_DEDUP		SPA_VERSION_21
 #define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
 #define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
 #define	SPA_VERSION_SA			SPA_VERSION_24
 #define	SPA_VERSION_SCAN		SPA_VERSION_25
 #define	SPA_VERSION_DIR_CLONES		SPA_VERSION_26
 #define	SPA_VERSION_DEADLISTS		SPA_VERSION_26
 #define	SPA_VERSION_FAST_SNAP		SPA_VERSION_27
 #define	SPA_VERSION_MULTI_REPLACE	SPA_VERSION_28
 #define	SPA_VERSION_BEFORE_FEATURES	SPA_VERSION_28
 #define	SPA_VERSION_FEATURES		SPA_VERSION_5000
 
 #define	SPA_VERSION_IS_SUPPORTED(v) \
 	(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
 	((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
  * occurs.  This is independent of SPA/DMU/ZAP versioning.  You must
  * also update the version_table[] and help message in zfs_prop.c.
  *
  * When changing, be sure to teach GRUB how to read the new format!
  * See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*}
  */
 #define	ZPL_VERSION_1			1ULL
 #define	ZPL_VERSION_2			2ULL
 #define	ZPL_VERSION_3			3ULL
 #define	ZPL_VERSION_4			4ULL
 #define	ZPL_VERSION_5			5ULL
 #define	ZPL_VERSION			ZPL_VERSION_5
 #define	ZPL_VERSION_STRING		"5"
 
 #define	ZPL_VERSION_INITIAL		ZPL_VERSION_1
 #define	ZPL_VERSION_DIRENT_TYPE		ZPL_VERSION_2
 #define	ZPL_VERSION_FUID		ZPL_VERSION_3
 #define	ZPL_VERSION_NORMALIZATION	ZPL_VERSION_3
 #define	ZPL_VERSION_SYSATTR		ZPL_VERSION_3
 #define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
 #define	ZPL_VERSION_SA			ZPL_VERSION_5
 
 /* Rewind request information */
 #define	ZPOOL_NO_REWIND		1  /* No policy - default behavior */
 #define	ZPOOL_NEVER_REWIND	2  /* Do not search for best txg or rewind */
 #define	ZPOOL_TRY_REWIND	4  /* Search for best txg, but do not rewind */
 #define	ZPOOL_DO_REWIND		8  /* Rewind to best txg w/in deferred frees */
 #define	ZPOOL_EXTREME_REWIND	16 /* Allow extreme measures to find best txg */
 #define	ZPOOL_REWIND_MASK	28 /* All the possible rewind bits */
 #define	ZPOOL_REWIND_POLICIES	31 /* All the possible policy bits */
 
 typedef struct zpool_rewind_policy {
 	uint32_t	zrp_request;	/* rewind behavior requested */
 	uint64_t	zrp_maxmeta;	/* max acceptable meta-data errors */
 	uint64_t	zrp_maxdata;	/* max acceptable data errors */
 	uint64_t	zrp_txg;	/* specific txg to load */
 } zpool_rewind_policy_t;
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
  * configuration.
  */
 #define	ZPOOL_CONFIG_VERSION		"version"
 #define	ZPOOL_CONFIG_POOL_NAME		"name"
 #define	ZPOOL_CONFIG_POOL_STATE		"state"
 #define	ZPOOL_CONFIG_POOL_TXG		"txg"
 #define	ZPOOL_CONFIG_POOL_GUID		"pool_guid"
 #define	ZPOOL_CONFIG_CREATE_TXG		"create_txg"
 #define	ZPOOL_CONFIG_TOP_GUID		"top_guid"
 #define	ZPOOL_CONFIG_VDEV_TREE		"vdev_tree"
 #define	ZPOOL_CONFIG_TYPE		"type"
 #define	ZPOOL_CONFIG_CHILDREN		"children"
 #define	ZPOOL_CONFIG_ID			"id"
 #define	ZPOOL_CONFIG_GUID		"guid"
 #define	ZPOOL_CONFIG_PATH		"path"
 #define	ZPOOL_CONFIG_DEVID		"devid"
 #define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
 #define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
 #define	ZPOOL_CONFIG_ASIZE		"asize"
 #define	ZPOOL_CONFIG_DTL		"DTL"
 #define	ZPOOL_CONFIG_SCAN_STATS		"scan_stats"	/* not stored on disk */
 #define	ZPOOL_CONFIG_VDEV_STATS		"vdev_stats"	/* not stored on disk */
 #define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
 #define	ZPOOL_CONFIG_SPARES		"spares"
 #define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
 #define	ZPOOL_CONFIG_NPARITY		"nparity"
 #define	ZPOOL_CONFIG_HOSTID		"hostid"
 #define	ZPOOL_CONFIG_HOSTNAME		"hostname"
 #define	ZPOOL_CONFIG_LOADED_TIME	"initial_load_time"
 #define	ZPOOL_CONFIG_UNSPARE		"unspare"
 #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
 #define	ZPOOL_CONFIG_IS_LOG		"is_log"
 #define	ZPOOL_CONFIG_L2CACHE		"l2cache"
 #define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
 #define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
 #define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
 #define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
 #define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
 #define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
 #define	ZPOOL_CONFIG_SPLIT		"splitcfg"
 #define	ZPOOL_CONFIG_ORIG_GUID		"orig_guid"
 #define	ZPOOL_CONFIG_SPLIT_GUID		"split_guid"
 #define	ZPOOL_CONFIG_SPLIT_LIST		"guid_list"
 #define	ZPOOL_CONFIG_REMOVING		"removing"
 #define	ZPOOL_CONFIG_RESILVERING	"resilvering"
 #define	ZPOOL_CONFIG_COMMENT		"comment"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_MISSING_DEVICES	"missing_vdevs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_LOAD_INFO		"load_info"	/* not stored on disk */
 #define	ZPOOL_CONFIG_REWIND_INFO	"rewind_info"	/* not stored on disk */
 #define	ZPOOL_CONFIG_UNSUP_FEAT		"unsup_feat"	/* not stored on disk */
 #define	ZPOOL_CONFIG_ENABLED_FEAT	"enabled_feat"	/* not stored on disk */
 #define	ZPOOL_CONFIG_CAN_RDONLY		"can_rdonly"	/* not stored on disk */
 #define	ZPOOL_CONFIG_FEATURES_FOR_READ	"features_for_read"
 #define	ZPOOL_CONFIG_FEATURE_STATS	"feature_stats"	/* not stored on disk */
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
  * as offline and degraded.
  */
 #define	ZPOOL_CONFIG_OFFLINE		"offline"
 #define	ZPOOL_CONFIG_FAULTED		"faulted"
 #define	ZPOOL_CONFIG_DEGRADED		"degraded"
 #define	ZPOOL_CONFIG_REMOVED		"removed"
 #define	ZPOOL_CONFIG_FRU		"fru"
 #define	ZPOOL_CONFIG_AUX_STATE		"aux_state"
 
 /* Rewind policy parameters */
 #define	ZPOOL_REWIND_POLICY		"rewind-policy"
 #define	ZPOOL_REWIND_REQUEST		"rewind-request"
 #define	ZPOOL_REWIND_REQUEST_TXG	"rewind-request-txg"
 #define	ZPOOL_REWIND_META_THRESH	"rewind-meta-thresh"
 #define	ZPOOL_REWIND_DATA_THRESH	"rewind-data-thresh"
 
 /* Rewind data discovered */
 #define	ZPOOL_CONFIG_LOAD_TIME		"rewind_txg_ts"
 #define	ZPOOL_CONFIG_LOAD_DATA_ERRORS	"verify_data_errors"
 #define	ZPOOL_CONFIG_REWIND_TIME	"seconds_of_rewind"
 
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
 #define	VDEV_TYPE_REPLACING		"replacing"
 #define	VDEV_TYPE_RAIDZ			"raidz"
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
 #define	VDEV_TYPE_HOLE			"hole"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_LOG			"log"
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 
 /*
  * This is needed in userland to report the minimum necessary device size.
  */
 #define	SPA_MINDEVSIZE		(64ULL << 20)
 
 /*
  * The location of the pool configuration repository, shared between kernel and
  * userland.
  */
 #define	ZPOOL_CACHE		"/etc/zfs/zpool.cache"
 
 /*
  * vdev states are ordered from least to most healthy.
  * A vdev that's CANT_OPEN or below is considered unusable.
  */
 typedef enum vdev_state {
 	VDEV_STATE_UNKNOWN = 0,	/* Uninitialized vdev			*/
 	VDEV_STATE_CLOSED,	/* Not currently open			*/
 	VDEV_STATE_OFFLINE,	/* Not allowed to open			*/
 	VDEV_STATE_REMOVED,	/* Explicitly removed from system	*/
 	VDEV_STATE_CANT_OPEN,	/* Tried to open, but failed		*/
 	VDEV_STATE_FAULTED,	/* External request to fault device	*/
 	VDEV_STATE_DEGRADED,	/* Replicated vdev with unhealthy kids	*/
 	VDEV_STATE_HEALTHY	/* Presumed good			*/
 } vdev_state_t;
 
 #define	VDEV_STATE_ONLINE	VDEV_STATE_HEALTHY
 
 /*
  * vdev aux states.  When a vdev is in the CANT_OPEN state, the aux field
  * of the vdev stats structure uses these constants to distinguish why.
  */
 typedef enum vdev_aux {
 	VDEV_AUX_NONE,		/* no error				*/
 	VDEV_AUX_OPEN_FAILED,	/* ldi_open_*() or vn_open() failed	*/
 	VDEV_AUX_CORRUPT_DATA,	/* bad label or disk contents		*/
 	VDEV_AUX_NO_REPLICAS,	/* insufficient number of replicas	*/
 	VDEV_AUX_BAD_GUID_SUM,	/* vdev guid sum doesn't match		*/
 	VDEV_AUX_TOO_SMALL,	/* vdev size is too small		*/
 	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
 	VDEV_AUX_UNSUP_FEAT,	/* unsupported features			*/
 	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
 	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
 	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
 	VDEV_AUX_BAD_LOG,	/* cannot read log chain(s)		*/
 	VDEV_AUX_EXTERNAL,	/* external diagnosis			*/
 	VDEV_AUX_SPLIT_POOL	/* vdev was split off into another pool	*/
 } vdev_aux_t;
 
 /*
  * pool state.  The following states are written to disk as part of the normal
  * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE.  The remaining
  * states are software abstractions used at various levels to communicate
  * pool state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
 	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
 	POOL_STATE_L2CACHE,		/* Level 2 ARC device		*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
 	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
 } pool_state_t;
 
 /*
  * Scan Functions.
  */
 typedef enum pool_scan_func {
 	POOL_SCAN_NONE,
 	POOL_SCAN_SCRUB,
 	POOL_SCAN_RESILVER,
 	POOL_SCAN_FUNCS
 } pool_scan_func_t;
 
 /*
  * ZIO types.  Needed to interpret vdev statistics below.
  */
 typedef enum zio_type {
 	ZIO_TYPE_NULL = 0,
 	ZIO_TYPE_READ,
 	ZIO_TYPE_WRITE,
 	ZIO_TYPE_FREE,
 	ZIO_TYPE_CLAIM,
 	ZIO_TYPE_IOCTL,
 	ZIO_TYPES
 } zio_type_t;
 
 /*
  * Pool statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct pool_scan_stat {
 	/* values stored on disk */
 	uint64_t	pss_func;	/* pool_scan_func_t */
 	uint64_t	pss_state;	/* dsl_scan_state_t */
 	uint64_t	pss_start_time;	/* scan start time */
 	uint64_t	pss_end_time;	/* scan end time */
 	uint64_t	pss_to_examine;	/* total bytes to scan */
 	uint64_t	pss_examined;	/* total examined bytes	*/
 	uint64_t	pss_to_process; /* total bytes to process */
 	uint64_t	pss_processed;	/* total processed bytes */
 	uint64_t	pss_errors;	/* scan errors	*/
 
 	/* values not stored on disk */
 	uint64_t	pss_pass_exam;	/* examined bytes per scan pass */
 	uint64_t	pss_pass_start;	/* start time of a scan pass */
 } pool_scan_stat_t;
 
 typedef enum dsl_scan_state {
 	DSS_NONE,
 	DSS_SCANNING,
 	DSS_FINISHED,
 	DSS_CANCELED,
 	DSS_NUM_STATES
 } dsl_scan_state_t;
 
 
 /*
  * Vdev statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct vdev_stat {
 	hrtime_t	vs_timestamp;		/* time since vdev load	*/
 	uint64_t	vs_state;		/* vdev state		*/
 	uint64_t	vs_aux;			/* see vdev_aux_t	*/
 	uint64_t	vs_alloc;		/* space allocated	*/
 	uint64_t	vs_space;		/* total capacity	*/
 	uint64_t	vs_dspace;		/* deflated capacity	*/
 	uint64_t	vs_rsize;		/* replaceable dev size */
 	uint64_t	vs_esize;		/* expandable dev size */
 	uint64_t	vs_ops[ZIO_TYPES];	/* operation count	*/
 	uint64_t	vs_bytes[ZIO_TYPES];	/* bytes read/written	*/
 	uint64_t	vs_read_errors;		/* read errors		*/
 	uint64_t	vs_write_errors;	/* write errors		*/
 	uint64_t	vs_checksum_errors;	/* checksum errors	*/
 	uint64_t	vs_self_healed;		/* self-healed bytes	*/
 	uint64_t	vs_scan_removing;	/* removing?	*/
 	uint64_t	vs_scan_processed;	/* scan processed bytes	*/
 } vdev_stat_t;
 
 /*
  * DDT statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct ddt_object {
 	uint64_t	ddo_count;	/* number of elments in ddt 	*/
 	uint64_t	ddo_dspace;	/* size of ddt on disk		*/
 	uint64_t	ddo_mspace;	/* size of ddt in-core		*/
 } ddt_object_t;
 
 typedef struct ddt_stat {
 	uint64_t	dds_blocks;	/* blocks			*/
 	uint64_t	dds_lsize;	/* logical size			*/
 	uint64_t	dds_psize;	/* physical size		*/
 	uint64_t	dds_dsize;	/* deflated allocated size	*/
 	uint64_t	dds_ref_blocks;	/* referenced blocks		*/
 	uint64_t	dds_ref_lsize;	/* referenced lsize * refcnt	*/
 	uint64_t	dds_ref_psize;	/* referenced psize * refcnt	*/
 	uint64_t	dds_ref_dsize;	/* referenced dsize * refcnt	*/
 } ddt_stat_t;
 
 typedef struct ddt_histogram {
 	ddt_stat_t	ddh_stat[64];	/* power-of-two histogram buckets */
 } ddt_histogram_t;
 
 #define	ZVOL_DRIVER	"zvol"
 #define	ZFS_DRIVER	"zfs"
 #define	ZFS_DEV		"/dev/zfs"
 
 /* general zvol path */
 #define	ZVOL_DIR		"/dev/zvol"
 /* expansion */
 #define	ZVOL_PSEUDO_DEV		"/devices/pseudo/zfs@0:"
 /* for dump and swap */
 #define	ZVOL_FULL_DEV_DIR	ZVOL_DIR "/dsk/"
 #define	ZVOL_FULL_RDEV_DIR	ZVOL_DIR "/rdsk/"
 
 #define	ZVOL_PROP_NAME		"name"
 #define	ZVOL_DEFAULT_BLOCKSIZE	8192
 
 /*
  * /dev/zfs ioctl numbers.
  */
 typedef enum zfs_ioc {
 	ZFS_IOC_FIRST =	('Z' << 8),
 	ZFS_IOC = ZFS_IOC_FIRST,
 	ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,
 	ZFS_IOC_POOL_DESTROY,
 	ZFS_IOC_POOL_IMPORT,
 	ZFS_IOC_POOL_EXPORT,
 	ZFS_IOC_POOL_CONFIGS,
 	ZFS_IOC_POOL_STATS,
 	ZFS_IOC_POOL_TRYIMPORT,
 	ZFS_IOC_POOL_SCAN,
 	ZFS_IOC_POOL_FREEZE,
 	ZFS_IOC_POOL_UPGRADE,
 	ZFS_IOC_POOL_GET_HISTORY,
 	ZFS_IOC_VDEV_ADD,
 	ZFS_IOC_VDEV_REMOVE,
 	ZFS_IOC_VDEV_SET_STATE,
 	ZFS_IOC_VDEV_ATTACH,
 	ZFS_IOC_VDEV_DETACH,
 	ZFS_IOC_VDEV_SETPATH,
 	ZFS_IOC_VDEV_SETFRU,
 	ZFS_IOC_OBJSET_STATS,
 	ZFS_IOC_OBJSET_ZPLPROPS,
 	ZFS_IOC_DATASET_LIST_NEXT,
 	ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	ZFS_IOC_SET_PROP,
 	ZFS_IOC_CREATE,
 	ZFS_IOC_DESTROY,
 	ZFS_IOC_ROLLBACK,
 	ZFS_IOC_RENAME,
 	ZFS_IOC_RECV,
 	ZFS_IOC_SEND,
 	ZFS_IOC_INJECT_FAULT,
 	ZFS_IOC_CLEAR_FAULT,
 	ZFS_IOC_INJECT_LIST_NEXT,
 	ZFS_IOC_ERROR_LOG,
 	ZFS_IOC_CLEAR,
 	ZFS_IOC_PROMOTE,
 	ZFS_IOC_SNAPSHOT,
 	ZFS_IOC_DSOBJ_TO_DSNAME,
 	ZFS_IOC_OBJ_TO_PATH,
 	ZFS_IOC_POOL_SET_PROPS,
 	ZFS_IOC_POOL_GET_PROPS,
 	ZFS_IOC_SET_FSACL,
 	ZFS_IOC_GET_FSACL,
 	ZFS_IOC_SHARE,
 	ZFS_IOC_INHERIT_PROP,
 	ZFS_IOC_SMB_ACL,
 	ZFS_IOC_USERSPACE_ONE,
 	ZFS_IOC_USERSPACE_MANY,
 	ZFS_IOC_USERSPACE_UPGRADE,
 	ZFS_IOC_HOLD,
 	ZFS_IOC_RELEASE,
 	ZFS_IOC_GET_HOLDS,
 	ZFS_IOC_OBJSET_RECVD_PROPS,
 	ZFS_IOC_VDEV_SPLIT,
 	ZFS_IOC_NEXT_OBJ,
 	ZFS_IOC_DIFF,
 	ZFS_IOC_TMP_SNAPSHOT,
 	ZFS_IOC_OBJ_TO_STATS,
 	ZFS_IOC_SPACE_WRITTEN,
 	ZFS_IOC_SPACE_SNAPS,
 	ZFS_IOC_DESTROY_SNAPS,
 	ZFS_IOC_POOL_REGUID,
 	ZFS_IOC_POOL_REOPEN,
 	ZFS_IOC_SEND_PROGRESS,
 	ZFS_IOC_LOG_HISTORY,
 	ZFS_IOC_SEND_NEW,
 	ZFS_IOC_SEND_SPACE,
 	ZFS_IOC_CLONE,
 	ZFS_IOC_LAST
 } zfs_ioc_t;
 
 /*
  * Internal SPA load state.  Used by FMA diagnosis engine.
  */
 typedef enum {
 	SPA_LOAD_NONE,		/* no load in progress	*/
 	SPA_LOAD_OPEN,		/* normal open		*/
 	SPA_LOAD_IMPORT,	/* import in progress	*/
 	SPA_LOAD_TRYIMPORT,	/* tryimport in progress */
 	SPA_LOAD_RECOVER,	/* recovery requested	*/
 	SPA_LOAD_ERROR		/* load failed		*/
 } spa_load_state_t;
 
 /*
  * Bookmark name values.
  */
 #define	ZPOOL_ERR_LIST		"error list"
 #define	ZPOOL_ERR_DATASET	"dataset"
 #define	ZPOOL_ERR_OBJECT	"object"
 
 #define	HIS_MAX_RECORD_LEN	(MAXPATHLEN + MAXPATHLEN + 1)
 
 /*
  * The following are names used in the nvlist describing
  * the pool's history log.
  */
 #define	ZPOOL_HIST_RECORD	"history record"
 #define	ZPOOL_HIST_TIME		"history time"
 #define	ZPOOL_HIST_CMD		"history command"
 #define	ZPOOL_HIST_WHO		"history who"
 #define	ZPOOL_HIST_ZONE		"history zone"
 #define	ZPOOL_HIST_HOST		"history hostname"
 #define	ZPOOL_HIST_TXG		"history txg"
 #define	ZPOOL_HIST_INT_EVENT	"history internal event"
 #define	ZPOOL_HIST_INT_STR	"history internal str"
 #define	ZPOOL_HIST_INT_NAME	"internal_name"
 #define	ZPOOL_HIST_IOCTL	"ioctl"
 #define	ZPOOL_HIST_INPUT_NVL	"in_nvl"
 #define	ZPOOL_HIST_OUTPUT_NVL	"out_nvl"
 #define	ZPOOL_HIST_DSNAME	"dsname"
 #define	ZPOOL_HIST_DSID		"dsid"
 
 /*
  * Flags for ZFS_IOC_VDEV_SET_STATE
  */
 #define	ZFS_ONLINE_CHECKREMOVE	0x1
 #define	ZFS_ONLINE_UNSPARE	0x2
 #define	ZFS_ONLINE_FORCEFAULT	0x4
 #define	ZFS_ONLINE_EXPAND	0x8
 #define	ZFS_OFFLINE_TEMPORARY	0x1
 
 /*
  * Flags for ZFS_IOC_POOL_IMPORT
  */
 #define	ZFS_IMPORT_NORMAL	0x0
 #define	ZFS_IMPORT_VERBATIM	0x1
 #define	ZFS_IMPORT_ANY_HOST	0x2
 #define	ZFS_IMPORT_MISSING_LOG	0x4
 #define	ZFS_IMPORT_ONLY		0x8
 
 /*
  * Sysevent payload members.  ZFS will generate the following sysevents with the
  * given payloads:
  *
  *	ESC_ZFS_RESILVER_START
  *	ESC_ZFS_RESILVER_END
  *	ESC_ZFS_POOL_DESTROY
  *	ESC_ZFS_POOL_REGUID
  *
  *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
  *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
  *
  *	ESC_ZFS_VDEV_REMOVE
  *	ESC_ZFS_VDEV_CLEAR
  *	ESC_ZFS_VDEV_CHECK
  *
  *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
  *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
  *		ZFS_EV_VDEV_PATH	DATA_TYPE_STRING	(optional)
  *		ZFS_EV_VDEV_GUID	DATA_TYPE_UINT64
  */
 #define	ZFS_EV_POOL_NAME	"pool_name"
 #define	ZFS_EV_POOL_GUID	"pool_guid"
 #define	ZFS_EV_VDEV_PATH	"vdev_path"
 #define	ZFS_EV_VDEV_GUID	"vdev_guid"
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_FS_ZFS_H */