Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
===================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	(revision 324004)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h	(revision 324005)
@@ -1,368 +1,368 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 #ifndef	_SYS_FS_ZFS_ZNODE_H
 #define	_SYS_FS_ZFS_ZNODE_H
 
 #ifdef _KERNEL
 #include <sys/list.h>
 #include <sys/dmu.h>
 #include <sys/sa.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/rrwlock.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_stat.h>
 #endif
 #include <sys/zfs_acl.h>
 #include <sys/zil.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Additional file level attributes, that are stored
  * in the upper half of zp_flags
  */
 #define	ZFS_READONLY		0x0000000100000000
 #define	ZFS_HIDDEN		0x0000000200000000
 #define	ZFS_SYSTEM		0x0000000400000000
 #define	ZFS_ARCHIVE		0x0000000800000000
 #define	ZFS_IMMUTABLE		0x0000001000000000
 #define	ZFS_NOUNLINK		0x0000002000000000
 #define	ZFS_APPENDONLY		0x0000004000000000
 #define	ZFS_NODUMP		0x0000008000000000
 #define	ZFS_OPAQUE		0x0000010000000000
 #define	ZFS_AV_QUARANTINED 	0x0000020000000000
 #define	ZFS_AV_MODIFIED 	0x0000040000000000
 #define	ZFS_REPARSE		0x0000080000000000
 #define	ZFS_OFFLINE		0x0000100000000000
 #define	ZFS_SPARSE		0x0000200000000000
 
 #define	ZFS_ATTR_SET(zp, attr, value, pflags, tx) \
 { \
 	if (value) \
 		pflags |= attr; \
 	else \
 		pflags &= ~attr; \
 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \
 	    &pflags, sizeof (pflags), tx)); \
 }
 
 /*
  * Define special zfs pflags
  */
 #define	ZFS_XATTR		0x1		/* is an extended attribute */
 #define	ZFS_INHERIT_ACE		0x2		/* ace has inheritable ACEs */
 #define	ZFS_ACL_TRIVIAL 	0x4		/* files ACL is trivial */
 #define	ZFS_ACL_OBJ_ACE 	0x8		/* ACL has CMPLX Object ACE */
 #define	ZFS_ACL_PROTECTED	0x10		/* ACL protected */
 #define	ZFS_ACL_DEFAULTED	0x20		/* ACL should be defaulted */
 #define	ZFS_ACL_AUTO_INHERIT	0x40		/* ACL should be inherited */
 #define	ZFS_BONUS_SCANSTAMP	0x80		/* Scanstamp in bonus area */
 #define	ZFS_NO_EXECS_DENIED	0x100		/* exec was given to everyone */
 
 #define	SA_ZPL_ATIME(z)		z->z_attr_table[ZPL_ATIME]
 #define	SA_ZPL_MTIME(z)		z->z_attr_table[ZPL_MTIME]
 #define	SA_ZPL_CTIME(z)		z->z_attr_table[ZPL_CTIME]
 #define	SA_ZPL_CRTIME(z)	z->z_attr_table[ZPL_CRTIME]
 #define	SA_ZPL_GEN(z)		z->z_attr_table[ZPL_GEN]
 #define	SA_ZPL_DACL_ACES(z)	z->z_attr_table[ZPL_DACL_ACES]
 #define	SA_ZPL_XATTR(z)		z->z_attr_table[ZPL_XATTR]
 #define	SA_ZPL_SYMLINK(z)	z->z_attr_table[ZPL_SYMLINK]
 #define	SA_ZPL_RDEV(z)		z->z_attr_table[ZPL_RDEV]
 #define	SA_ZPL_SCANSTAMP(z)	z->z_attr_table[ZPL_SCANSTAMP]
 #define	SA_ZPL_UID(z)		z->z_attr_table[ZPL_UID]
 #define	SA_ZPL_GID(z)		z->z_attr_table[ZPL_GID]
 #define	SA_ZPL_PARENT(z)	z->z_attr_table[ZPL_PARENT]
 #define	SA_ZPL_LINKS(z)		z->z_attr_table[ZPL_LINKS]
 #define	SA_ZPL_MODE(z)		z->z_attr_table[ZPL_MODE]
 #define	SA_ZPL_DACL_COUNT(z)	z->z_attr_table[ZPL_DACL_COUNT]
 #define	SA_ZPL_FLAGS(z)		z->z_attr_table[ZPL_FLAGS]
 #define	SA_ZPL_SIZE(z)		z->z_attr_table[ZPL_SIZE]
 #define	SA_ZPL_ZNODE_ACL(z)	z->z_attr_table[ZPL_ZNODE_ACL]
 #define	SA_ZPL_PAD(z)		z->z_attr_table[ZPL_PAD]
 
 /*
  * Is ID ephemeral?
  */
 #define	IS_EPHEMERAL(x)		(x > MAXUID)
 
 /*
  * Should we use FUIDs?
  */
 #define	USE_FUIDS(version, os)	(version >= ZPL_VERSION_FUID && \
     spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
 #define	USE_SA(version, os) (version >= ZPL_VERSION_SA && \
     spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA)
 
 #define	MASTER_NODE_OBJ	1
 
 /*
  * Special attributes for master node.
  * "userquota@" and "groupquota@" are also valid (from
  * zfs_userquota_prop_prefixes[]).
  */
 #define	ZFS_FSID		"FSID"
 #define	ZFS_UNLINKED_SET	"DELETE_QUEUE"
 #define	ZFS_ROOT_OBJ		"ROOT"
 #define	ZPL_VERSION_STR		"VERSION"
 #define	ZFS_FUID_TABLES		"FUID"
 #define	ZFS_SHARES_DIR		"SHARES"
 #define	ZFS_SA_ATTRS		"SA_ATTRS"
 
 /*
  * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
  * the directory entries.
  */
 #ifndef IFTODT
 #define	IFTODT(mode) (((mode) & S_IFMT) >> 12)
 #endif
 
 /*
  * The directory entry has the type (currently unused on Solaris) in the
  * top 4 bits, and the object number in the low 48 bits.  The "middle"
  * 12 bits are unused.
  */
 #define	ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 
 /*
  * Directory entry locks control access to directory entries.
  * They are used to protect creates, deletes, and renames.
  * Each directory znode has a mutex and a list of locked names.
  */
 #ifdef _KERNEL
 typedef struct zfs_dirlock {
 	char		*dl_name;	/* directory entry being locked */
 	uint32_t	dl_sharecnt;	/* 0 if exclusive, > 0 if shared */
 	uint8_t		dl_namelock;	/* 1 if z_name_lock is NOT held */
 	uint16_t	dl_namesize;	/* set if dl_name was allocated */
 	kcondvar_t	dl_cv;		/* wait for entry to be unlocked */
 	struct znode	*dl_dzp;	/* directory znode */
 	struct zfs_dirlock *dl_next;	/* next in z_dirlocks list */
 } zfs_dirlock_t;
 
 typedef struct znode {
 	struct zfsvfs	*z_zfsvfs;
 	vnode_t		*z_vnode;
 	uint64_t	z_id;		/* object ID for this znode */
 #ifdef illumos
 	kmutex_t	z_lock;		/* znode modification lock */
 	krwlock_t	z_parent_lock;	/* parent lock for directories */
 	krwlock_t	z_name_lock;	/* "master" lock for dirent locks */
 	zfs_dirlock_t	*z_dirlocks;	/* directory entry lock list */
 #endif
 	kmutex_t	z_range_lock;	/* protects changes to z_range_avl */
 	avl_tree_t	z_range_avl;	/* avl tree of file range locks */
 	uint8_t		z_unlinked;	/* file has been unlinked */
 	uint8_t		z_atime_dirty;	/* atime needs to be synced */
 	uint8_t		z_zn_prefetch;	/* Prefetch znodes? */
 	uint8_t		z_moved;	/* Has this znode been moved? */
 	uint_t		z_blksz;	/* block size in bytes */
 	uint_t		z_seq;		/* modification sequence number */
 	uint64_t	z_mapcnt;	/* number of pages mapped to file */
 	uint64_t	z_gen;		/* generation (cached) */
 	uint64_t	z_size;		/* file size (cached) */
 	uint64_t	z_atime[2];	/* atime (cached) */
 	uint64_t	z_links;	/* file links (cached) */
 	uint64_t	z_pflags;	/* pflags (cached) */
 	uint64_t	z_uid;		/* uid fuid (cached) */
 	uint64_t	z_gid;		/* gid fuid (cached) */
 	mode_t		z_mode;		/* mode (cached) */
 	uint32_t	z_sync_cnt;	/* synchronous open count */
 	kmutex_t	z_acl_lock;	/* acl data lock */
 	zfs_acl_t	*z_acl_cached;	/* cached acl */
 	list_node_t	z_link_node;	/* all znodes in fs link */
 	sa_handle_t	*z_sa_hdl;	/* handle to sa data */
 	boolean_t	z_is_sa;	/* are we native sa? */
 } znode_t;
 
 
 /*
  * Range locking rules
  * --------------------
  * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole
  *    file range needs to be locked as RL_WRITER. Only then can the pages be
  *    freed etc and zp_size reset. zp_size must be set within range lock.
  * 2. For writes and punching holes (zfs_write & zfs_space) just the range
  *    being written or freed needs to be locked as RL_WRITER.
  *    Multiple writes at the end of the file must coordinate zp_size updates
  *    to ensure data isn't lost. A compare and swap loop is currently used
  *    to ensure the file size is at least the offset last written.
  * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being
  *    read needs to be locked as RL_READER. A check against zp_size can then
  *    be made for reading beyond end of file.
  */
 
 /*
  * Convert between znode pointers and vnode pointers
  */
 #ifdef DEBUG
 static __inline vnode_t *
 ZTOV(znode_t *zp)
 {
 	vnode_t *vp = zp->z_vnode;
 
-	ASSERT(vp == NULL || vp->v_data == NULL || vp->v_data == zp);
+	ASSERT(vp != NULL && vp->v_data == zp);
 	return (vp);
 }
 static __inline znode_t *
 VTOZ(vnode_t *vp)
 {
 	znode_t *zp = (znode_t *)vp->v_data;
 
-	ASSERT(zp == NULL || zp->z_vnode == NULL || zp->z_vnode == vp);
+	ASSERT(zp != NULL && zp->z_vnode == vp);
 	return (zp);
 }
 #else
 #define	ZTOV(ZP)	((ZP)->z_vnode)
 #define	VTOZ(VP)	((znode_t *)(VP)->v_data)
 #endif
 
 /* Called on entry to each ZFS vnode and vfs operation  */
 #define	ZFS_ENTER(zfsvfs) \
 	{ \
 		rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
 		if ((zfsvfs)->z_unmounted) { \
 			ZFS_EXIT(zfsvfs); \
 			return (EIO); \
 		} \
 	}
 
 /* Must be called before exiting the vop */
 #define	ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG)
 
 /* Verifies the znode is valid */
 #define	ZFS_VERIFY_ZP(zp) \
 	if ((zp)->z_sa_hdl == NULL) { \
 		ZFS_EXIT((zp)->z_zfsvfs); \
 		return (EIO); \
 	} \
 
 /*
  * Macros for dealing with dmu_buf_hold
  */
 #define	ZFS_OBJ_HASH(obj_num)	((obj_num) & (ZFS_OBJ_MTX_SZ - 1))
 #define	ZFS_OBJ_MUTEX(zfsvfs, obj_num)	\
 	(&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)])
 #define	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \
 	mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
 #define	ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \
 	mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
 #define	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \
 	mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num)))
 
 /* Encode ZFS stored time values from a struct timespec */
 #define	ZFS_TIME_ENCODE(tp, stmp)		\
 {						\
 	(stmp)[0] = (uint64_t)(tp)->tv_sec;	\
 	(stmp)[1] = (uint64_t)(tp)->tv_nsec;	\
 }
 
 /* Decode ZFS stored time values to a struct timespec */
 #define	ZFS_TIME_DECODE(tp, stmp)		\
 {						\
 	(tp)->tv_sec = (time_t)(stmp)[0];		\
 	(tp)->tv_nsec = (long)(stmp)[1];		\
 }
 
 /*
  * Timestamp defines
  */
 #define	ACCESSED		(AT_ATIME)
 #define	STATE_CHANGED		(AT_CTIME)
 #define	CONTENT_MODIFIED	(AT_MTIME | AT_CTIME)
 
 #define	ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \
 	if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \
 		zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE);
 
 extern int	zfs_init_fs(zfsvfs_t *, znode_t **);
 extern void	zfs_set_dataprop(objset_t *);
 extern void	zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
     dmu_tx_t *tx);
 extern void	zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2],
     uint64_t [2], boolean_t);
 extern void	zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *);
 extern int	zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
 extern void	zfs_znode_init(void);
 extern void	zfs_znode_fini(void);
 extern int	zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
 extern int	zfs_rezget(znode_t *);
 extern void	zfs_zinactive(znode_t *);
 extern void	zfs_znode_delete(znode_t *, dmu_tx_t *);
 extern void	zfs_znode_free(znode_t *);
 extern void	zfs_remove_op_tables();
 extern int	zfs_create_op_tables();
 extern dev_t	zfs_cmpldev(uint64_t);
 extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
 extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
 extern void	zfs_znode_dmu_fini(znode_t *);
 
 extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *,
     vattr_t *vap);
 extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp,
     vattr_t *vap);
 extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, char *name, uint64_t foid);
 #define	ZFS_NO_OBJECT	0	/* no object id */
 extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, znode_t *zp, char *name);
 extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *dzp, znode_t *zp, char *name, char *link);
 extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
     znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp);
 extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, offset_t off, ssize_t len, int ioflag);
 extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, uint64_t off, uint64_t len);
 extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
     znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp);
 #ifndef ZFS_NO_ACL
 extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
     vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
 #endif
 extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
 extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
 
 extern zil_get_data_t zfs_get_data;
 extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE];
 extern int zfsfstype;
 
 extern int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf);
 
 #endif /* _KERNEL */
 
 extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_FS_ZFS_ZNODE_H */
Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
===================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	(revision 324004)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c	(revision 324005)
@@ -1,2710 +1,2713 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/resource.h>
 #include <sys/vfs.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/unistd.h>
 #include <sys/sdt.h>
 #include <sys/fs/zfs.h>
 #include <sys/policy.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/dmu.h>
 #include <sys/dnode.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <acl/acl_common.h>
 
 #define	ALLOW	ACE_ACCESS_ALLOWED_ACE_TYPE
 #define	DENY	ACE_ACCESS_DENIED_ACE_TYPE
 #define	MAX_ACE_TYPE	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE
 #define	MIN_ACE_TYPE	ALLOW
 
 #define	OWNING_GROUP		(ACE_GROUP|ACE_IDENTIFIER_GROUP)
 #define	EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \
     ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE)
 #define	EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \
     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
 #define	OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
 
 #define	ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
     ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
     ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
     ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
 
 #define	WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
 #define	WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
     ACE_DELETE|ACE_DELETE_CHILD)
 #define	WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
 
 #define	OGE_CLEAR	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
 
 #define	OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
 
 #define	ALL_INHERIT	(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \
     ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE)
 
 #define	RESTRICTED_CLEAR	(ACE_WRITE_ACL|ACE_WRITE_OWNER)
 
 #define	V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\
     ZFS_ACL_PROTECTED)
 
 #define	ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\
     ZFS_ACL_OBJ_ACE)
 
 #define	ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH)
 
 static uint16_t
 zfs_ace_v0_get_type(void *acep)
 {
 	return (((zfs_oldace_t *)acep)->z_type);
 }
 
 static uint16_t
 zfs_ace_v0_get_flags(void *acep)
 {
 	return (((zfs_oldace_t *)acep)->z_flags);
 }
 
 static uint32_t
 zfs_ace_v0_get_mask(void *acep)
 {
 	return (((zfs_oldace_t *)acep)->z_access_mask);
 }
 
 static uint64_t
 zfs_ace_v0_get_who(void *acep)
 {
 	return (((zfs_oldace_t *)acep)->z_fuid);
 }
 
 static void
 zfs_ace_v0_set_type(void *acep, uint16_t type)
 {
 	((zfs_oldace_t *)acep)->z_type = type;
 }
 
 static void
 zfs_ace_v0_set_flags(void *acep, uint16_t flags)
 {
 	((zfs_oldace_t *)acep)->z_flags = flags;
 }
 
 static void
 zfs_ace_v0_set_mask(void *acep, uint32_t mask)
 {
 	((zfs_oldace_t *)acep)->z_access_mask = mask;
 }
 
 static void
 zfs_ace_v0_set_who(void *acep, uint64_t who)
 {
 	((zfs_oldace_t *)acep)->z_fuid = who;
 }
 
 /*ARGSUSED*/
 static size_t
 zfs_ace_v0_size(void *acep)
 {
 	return (sizeof (zfs_oldace_t));
 }
 
 static size_t
 zfs_ace_v0_abstract_size(void)
 {
 	return (sizeof (zfs_oldace_t));
 }
 
 static int
 zfs_ace_v0_mask_off(void)
 {
 	return (offsetof(zfs_oldace_t, z_access_mask));
 }
 
 /*ARGSUSED*/
 static int
 zfs_ace_v0_data(void *acep, void **datap)
 {
 	*datap = NULL;
 	return (0);
 }
 
 static acl_ops_t zfs_acl_v0_ops = {
 	zfs_ace_v0_get_mask,
 	zfs_ace_v0_set_mask,
 	zfs_ace_v0_get_flags,
 	zfs_ace_v0_set_flags,
 	zfs_ace_v0_get_type,
 	zfs_ace_v0_set_type,
 	zfs_ace_v0_get_who,
 	zfs_ace_v0_set_who,
 	zfs_ace_v0_size,
 	zfs_ace_v0_abstract_size,
 	zfs_ace_v0_mask_off,
 	zfs_ace_v0_data
 };
 
 static uint16_t
 zfs_ace_fuid_get_type(void *acep)
 {
 	return (((zfs_ace_hdr_t *)acep)->z_type);
 }
 
 static uint16_t
 zfs_ace_fuid_get_flags(void *acep)
 {
 	return (((zfs_ace_hdr_t *)acep)->z_flags);
 }
 
 static uint32_t
 zfs_ace_fuid_get_mask(void *acep)
 {
 	return (((zfs_ace_hdr_t *)acep)->z_access_mask);
 }
 
 static uint64_t
 zfs_ace_fuid_get_who(void *args)
 {
 	uint16_t entry_type;
 	zfs_ace_t *acep = args;
 
 	entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 
 	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 	    entry_type == ACE_EVERYONE)
 		return (-1);
 	return (((zfs_ace_t *)acep)->z_fuid);
 }
 
 static void
 zfs_ace_fuid_set_type(void *acep, uint16_t type)
 {
 	((zfs_ace_hdr_t *)acep)->z_type = type;
 }
 
 static void
 zfs_ace_fuid_set_flags(void *acep, uint16_t flags)
 {
 	((zfs_ace_hdr_t *)acep)->z_flags = flags;
 }
 
 static void
 zfs_ace_fuid_set_mask(void *acep, uint32_t mask)
 {
 	((zfs_ace_hdr_t *)acep)->z_access_mask = mask;
 }
 
 static void
 zfs_ace_fuid_set_who(void *arg, uint64_t who)
 {
 	zfs_ace_t *acep = arg;
 
 	uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS;
 
 	if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP ||
 	    entry_type == ACE_EVERYONE)
 		return;
 	acep->z_fuid = who;
 }
 
 static size_t
 zfs_ace_fuid_size(void *acep)
 {
 	zfs_ace_hdr_t *zacep = acep;
 	uint16_t entry_type;
 
 	switch (zacep->z_type) {
 	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 		return (sizeof (zfs_object_ace_t));
 	case ALLOW:
 	case DENY:
 		entry_type =
 		    (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS);
 		if (entry_type == ACE_OWNER ||
 		    entry_type == OWNING_GROUP ||
 		    entry_type == ACE_EVERYONE)
 			return (sizeof (zfs_ace_hdr_t));
 		/*FALLTHROUGH*/
 	default:
 		return (sizeof (zfs_ace_t));
 	}
 }
 
 static size_t
 zfs_ace_fuid_abstract_size(void)
 {
 	return (sizeof (zfs_ace_hdr_t));
 }
 
 static int
 zfs_ace_fuid_mask_off(void)
 {
 	return (offsetof(zfs_ace_hdr_t, z_access_mask));
 }
 
 static int
 zfs_ace_fuid_data(void *acep, void **datap)
 {
 	zfs_ace_t *zacep = acep;
 	zfs_object_ace_t *zobjp;
 
 	switch (zacep->z_hdr.z_type) {
 	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 		zobjp = acep;
 		*datap = (caddr_t)zobjp + sizeof (zfs_ace_t);
 		return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t));
 	default:
 		*datap = NULL;
 		return (0);
 	}
 }
 
 static acl_ops_t zfs_acl_fuid_ops = {
 	zfs_ace_fuid_get_mask,
 	zfs_ace_fuid_set_mask,
 	zfs_ace_fuid_get_flags,
 	zfs_ace_fuid_set_flags,
 	zfs_ace_fuid_get_type,
 	zfs_ace_fuid_set_type,
 	zfs_ace_fuid_get_who,
 	zfs_ace_fuid_set_who,
 	zfs_ace_fuid_size,
 	zfs_ace_fuid_abstract_size,
 	zfs_ace_fuid_mask_off,
 	zfs_ace_fuid_data
 };
 
 /*
  * The following three functions are provided for compatibility with
  * older ZPL version in order to determine if the file use to have
  * an external ACL and what version of ACL previously existed on the
  * file.  Would really be nice to not need this, sigh.
  */
 uint64_t
 zfs_external_acl(znode_t *zp)
 {
 	zfs_acl_phys_t acl_phys;
 	int error;
 
 	if (zp->z_is_sa)
 		return (0);
 
 	/*
 	 * Need to deal with a potential
 	 * race where zfs_sa_upgrade could cause
 	 * z_isa_sa to change.
 	 *
 	 * If the lookup fails then the state of z_is_sa should have
 	 * changed.
 	 */
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 	    &acl_phys, sizeof (acl_phys))) == 0)
 		return (acl_phys.z_acl_extern_obj);
 	else {
 		/*
 		 * after upgrade the SA_ZPL_ZNODE_ACL should have been
 		 * removed
 		 */
 		VERIFY(zp->z_is_sa && error == ENOENT);
 		return (0);
 	}
 }
 
 /*
  * Determine size of ACL in bytes
  *
  * This is more complicated than it should be since we have to deal
  * with old external ACLs.
  */
 static int
 zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
     zfs_acl_phys_t *aclphys)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	uint64_t acl_count;
 	int size;
 	int error;
 
 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 	if (zp->z_is_sa) {
 		if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
 		    &size)) != 0)
 			return (error);
 		*aclsize = size;
 		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
 		    &acl_count, sizeof (acl_count))) != 0)
 			return (error);
 		*aclcount = acl_count;
 	} else {
 		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
 		    aclphys, sizeof (*aclphys))) != 0)
 			return (error);
 
 		if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) {
 			*aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size);
 			*aclcount = aclphys->z_acl_size;
 		} else {
 			*aclsize = aclphys->z_acl_size;
 			*aclcount = aclphys->z_acl_count;
 		}
 	}
 	return (0);
 }
 
 int
 zfs_znode_acl_version(znode_t *zp)
 {
 	zfs_acl_phys_t acl_phys;
 
 	if (zp->z_is_sa)
 		return (ZFS_ACL_VERSION_FUID);
 	else {
 		int error;
 
 		/*
 		 * Need to deal with a potential
 		 * race where zfs_sa_upgrade could cause
 		 * z_isa_sa to change.
 		 *
 		 * If the lookup fails then the state of z_is_sa should have
 		 * changed.
 		 */
 		if ((error = sa_lookup(zp->z_sa_hdl,
 		    SA_ZPL_ZNODE_ACL(zp->z_zfsvfs),
 		    &acl_phys, sizeof (acl_phys))) == 0)
 			return (acl_phys.z_acl_version);
 		else {
 			/*
 			 * After upgrade SA_ZPL_ZNODE_ACL should have
 			 * been removed.
 			 */
 			VERIFY(zp->z_is_sa && error == ENOENT);
 			return (ZFS_ACL_VERSION_FUID);
 		}
 	}
 }
 
 static int
 zfs_acl_version(int version)
 {
 	if (version < ZPL_VERSION_FUID)
 		return (ZFS_ACL_VERSION_INITIAL);
 	else
 		return (ZFS_ACL_VERSION_FUID);
 }
 
 static int
 zfs_acl_version_zp(znode_t *zp)
 {
 	return (zfs_acl_version(zp->z_zfsvfs->z_version));
 }
 
 zfs_acl_t *
 zfs_acl_alloc(int vers)
 {
 	zfs_acl_t *aclp;
 
 	aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP);
 	list_create(&aclp->z_acl, sizeof (zfs_acl_node_t),
 	    offsetof(zfs_acl_node_t, z_next));
 	aclp->z_version = vers;
 	if (vers == ZFS_ACL_VERSION_FUID)
 		aclp->z_ops = zfs_acl_fuid_ops;
 	else
 		aclp->z_ops = zfs_acl_v0_ops;
 	return (aclp);
 }
 
 zfs_acl_node_t *
 zfs_acl_node_alloc(size_t bytes)
 {
 	zfs_acl_node_t *aclnode;
 
 	aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP);
 	if (bytes) {
 		aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP);
 		aclnode->z_allocdata = aclnode->z_acldata;
 		aclnode->z_allocsize = bytes;
 		aclnode->z_size = bytes;
 	}
 
 	return (aclnode);
 }
 
 static void
 zfs_acl_node_free(zfs_acl_node_t *aclnode)
 {
 	if (aclnode->z_allocsize)
 		kmem_free(aclnode->z_allocdata, aclnode->z_allocsize);
 	kmem_free(aclnode, sizeof (zfs_acl_node_t));
 }
 
 static void
 zfs_acl_release_nodes(zfs_acl_t *aclp)
 {
 	zfs_acl_node_t *aclnode;
 
 	while (aclnode = list_head(&aclp->z_acl)) {
 		list_remove(&aclp->z_acl, aclnode);
 		zfs_acl_node_free(aclnode);
 	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
 
 void
 zfs_acl_free(zfs_acl_t *aclp)
 {
 	zfs_acl_release_nodes(aclp);
 	list_destroy(&aclp->z_acl);
 	kmem_free(aclp, sizeof (zfs_acl_t));
 }
 
 static boolean_t
 zfs_acl_valid_ace_type(uint_t type, uint_t flags)
 {
 	uint16_t entry_type;
 
 	switch (type) {
 	case ALLOW:
 	case DENY:
 	case ACE_SYSTEM_AUDIT_ACE_TYPE:
 	case ACE_SYSTEM_ALARM_ACE_TYPE:
 		entry_type = flags & ACE_TYPE_FLAGS;
 		return (entry_type == ACE_OWNER ||
 		    entry_type == OWNING_GROUP ||
 		    entry_type == ACE_EVERYONE || entry_type == 0 ||
 		    entry_type == ACE_IDENTIFIER_GROUP);
 	default:
 		if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 static boolean_t
 zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags)
 {
 	/*
 	 * first check type of entry
 	 */
 
 	if (!zfs_acl_valid_ace_type(type, iflags))
 		return (B_FALSE);
 
 	switch (type) {
 	case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 	case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 	case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 		if (aclp->z_version < ZFS_ACL_VERSION_FUID)
 			return (B_FALSE);
 		aclp->z_hints |= ZFS_ACL_OBJ_ACE;
 	}
 
 	/*
 	 * next check inheritance level flags
 	 */
 
 	if (obj_type == VDIR &&
 	    (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 		aclp->z_hints |= ZFS_INHERIT_ACE;
 
 	if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) {
 		if ((iflags & (ACE_FILE_INHERIT_ACE|
 		    ACE_DIRECTORY_INHERIT_ACE)) == 0) {
 			return (B_FALSE);
 		}
 	}
 
 	return (B_TRUE);
 }
 
 static void *
 zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who,
     uint32_t *access_mask, uint16_t *iflags, uint16_t *type)
 {
 	zfs_acl_node_t *aclnode;
 
 	ASSERT(aclp);
 
 	if (start == NULL) {
 		aclnode = list_head(&aclp->z_acl);
 		if (aclnode == NULL)
 			return (NULL);
 
 		aclp->z_next_ace = aclnode->z_acldata;
 		aclp->z_curr_node = aclnode;
 		aclnode->z_ace_idx = 0;
 	}
 
 	aclnode = aclp->z_curr_node;
 
 	if (aclnode == NULL)
 		return (NULL);
 
 	if (aclnode->z_ace_idx >= aclnode->z_ace_count) {
 		aclnode = list_next(&aclp->z_acl, aclnode);
 		if (aclnode == NULL)
 			return (NULL);
 		else {
 			aclp->z_curr_node = aclnode;
 			aclnode->z_ace_idx = 0;
 			aclp->z_next_ace = aclnode->z_acldata;
 		}
 	}
 
 	if (aclnode->z_ace_idx < aclnode->z_ace_count) {
 		void *acep = aclp->z_next_ace;
 		size_t ace_size;
 
 		/*
 		 * Make sure we don't overstep our bounds
 		 */
 		ace_size = aclp->z_ops.ace_size(acep);
 
 		if (((caddr_t)acep + ace_size) >
 		    ((caddr_t)aclnode->z_acldata + aclnode->z_size)) {
 			return (NULL);
 		}
 
 		*iflags = aclp->z_ops.ace_flags_get(acep);
 		*type = aclp->z_ops.ace_type_get(acep);
 		*access_mask = aclp->z_ops.ace_mask_get(acep);
 		*who = aclp->z_ops.ace_who_get(acep);
 		aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size;
 		aclnode->z_ace_idx++;
 
 		return ((void *)acep);
 	}
 	return (NULL);
 }
 
 /*ARGSUSED*/
 static uint64_t
 zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
     uint16_t *flags, uint16_t *type, uint32_t *mask)
 {
 	zfs_acl_t *aclp = datap;
 	zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie;
 	uint64_t who;
 
 	acep = zfs_acl_next_ace(aclp, acep, &who, mask,
 	    flags, type);
 	return ((uint64_t)(uintptr_t)acep);
 }
 
 static zfs_acl_node_t *
 zfs_acl_curr_node(zfs_acl_t *aclp)
 {
 	ASSERT(aclp->z_curr_node);
 	return (aclp->z_curr_node);
 }
 
 /*
  * Copy ACE to internal ZFS format.
  * While processing the ACL each ACE will be validated for correctness.
  * ACE FUIDs will be created later.
  */
 int
 zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
     void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
     zfs_fuid_info_t **fuidp, cred_t *cr)
 {
 	int i;
 	uint16_t entry_type;
 	zfs_ace_t *aceptr = z_acl;
 	ace_t *acep = datap;
 	zfs_object_ace_t *zobjacep;
 	ace_object_t *aceobjp;
 
 	for (i = 0; i != aclcnt; i++) {
 		aceptr->z_hdr.z_access_mask = acep->a_access_mask;
 		aceptr->z_hdr.z_flags = acep->a_flags;
 		aceptr->z_hdr.z_type = acep->a_type;
 		entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
 		if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
 		    entry_type != ACE_EVERYONE) {
 			aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
 			    cr, (entry_type == 0) ?
 			    ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
 		}
 
 		/*
 		 * Make sure ACE is valid
 		 */
 		if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type,
 		    aceptr->z_hdr.z_flags) != B_TRUE)
 			return (SET_ERROR(EINVAL));
 
 		switch (acep->a_type) {
 		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 			zobjacep = (zfs_object_ace_t *)aceptr;
 			aceobjp = (ace_object_t *)acep;
 
 			bcopy(aceobjp->a_obj_type, zobjacep->z_object_type,
 			    sizeof (aceobjp->a_obj_type));
 			bcopy(aceobjp->a_inherit_obj_type,
 			    zobjacep->z_inherit_type,
 			    sizeof (aceobjp->a_inherit_obj_type));
 			acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t));
 			break;
 		default:
 			acep = (ace_t *)((caddr_t)acep + sizeof (ace_t));
 		}
 
 		aceptr = (zfs_ace_t *)((caddr_t)aceptr +
 		    aclp->z_ops.ace_size(aceptr));
 	}
 
 	*size = (caddr_t)aceptr - (caddr_t)z_acl;
 
 	return (0);
 }
 
 /*
  * Copy ZFS ACEs to fixed size ace_t layout
  */
 static void
 zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
     void *datap, int filter)
 {
 	uint64_t who;
 	uint32_t access_mask;
 	uint16_t iflags, type;
 	zfs_ace_hdr_t *zacep = NULL;
 	ace_t *acep = datap;
 	ace_object_t *objacep;
 	zfs_object_ace_t *zobjacep;
 	size_t ace_size;
 	uint16_t entry_type;
 
 	while (zacep = zfs_acl_next_ace(aclp, zacep,
 	    &who, &access_mask, &iflags, &type)) {
 
 		switch (type) {
 		case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 		case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 		case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 		case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 			if (filter) {
 				continue;
 			}
 			zobjacep = (zfs_object_ace_t *)zacep;
 			objacep = (ace_object_t *)acep;
 			bcopy(zobjacep->z_object_type,
 			    objacep->a_obj_type,
 			    sizeof (zobjacep->z_object_type));
 			bcopy(zobjacep->z_inherit_type,
 			    objacep->a_inherit_obj_type,
 			    sizeof (zobjacep->z_inherit_type));
 			ace_size = sizeof (ace_object_t);
 			break;
 		default:
 			ace_size = sizeof (ace_t);
 			break;
 		}
 
 		entry_type = (iflags & ACE_TYPE_FLAGS);
 		if ((entry_type != ACE_OWNER &&
 		    entry_type != OWNING_GROUP &&
 		    entry_type != ACE_EVERYONE)) {
 			acep->a_who = zfs_fuid_map_id(zfsvfs, who,
 			    cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
 			    ZFS_ACE_GROUP : ZFS_ACE_USER);
 		} else {
 			acep->a_who = (uid_t)(int64_t)who;
 		}
 		acep->a_access_mask = access_mask;
 		acep->a_flags = iflags;
 		acep->a_type = type;
 		acep = (ace_t *)((caddr_t)acep + ace_size);
 	}
 }
 
 static int
 zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
     zfs_oldace_t *z_acl, int aclcnt, size_t *size)
 {
 	int i;
 	zfs_oldace_t *aceptr = z_acl;
 
 	for (i = 0; i != aclcnt; i++, aceptr++) {
 		aceptr->z_access_mask = acep[i].a_access_mask;
 		aceptr->z_type = acep[i].a_type;
 		aceptr->z_flags = acep[i].a_flags;
 		aceptr->z_fuid = acep[i].a_who;
 		/*
 		 * Make sure ACE is valid
 		 */
 		if (zfs_ace_valid(obj_type, aclp, aceptr->z_type,
 		    aceptr->z_flags) != B_TRUE)
 			return (SET_ERROR(EINVAL));
 	}
 	*size = (caddr_t)aceptr - (caddr_t)z_acl;
 	return (0);
 }
 
 /*
  * convert old ACL format to new
  */
 void
 zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
 {
 	zfs_oldace_t *oldaclp;
 	int i;
 	uint16_t type, iflags;
 	uint32_t access_mask;
 	uint64_t who;
 	void *cookie = NULL;
 	zfs_acl_node_t *newaclnode;
 
 	ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL);
 	/*
 	 * First create the ACE in a contiguous piece of memory
 	 * for zfs_copy_ace_2_fuid().
 	 *
 	 * We only convert an ACL once, so this won't happen
 	 * everytime.
 	 */
 	oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count,
 	    KM_SLEEP);
 	i = 0;
 	while (cookie = zfs_acl_next_ace(aclp, cookie, &who,
 	    &access_mask, &iflags, &type)) {
 		oldaclp[i].z_flags = iflags;
 		oldaclp[i].z_type = type;
 		oldaclp[i].z_fuid = who;
 		oldaclp[i++].z_access_mask = access_mask;
 	}
 
 	newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
 	    sizeof (zfs_object_ace_t));
 	aclp->z_ops = zfs_acl_fuid_ops;
 	VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
 	    oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
 	    &newaclnode->z_size, NULL, cr) == 0);
 	newaclnode->z_ace_count = aclp->z_acl_count;
 	aclp->z_version = ZFS_ACL_VERSION;
 	kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
 
 	/*
 	 * Release all previous ACL nodes
 	 */
 
 	zfs_acl_release_nodes(aclp);
 
 	list_insert_head(&aclp->z_acl, newaclnode);
 
 	aclp->z_acl_bytes = newaclnode->z_size;
 	aclp->z_acl_count = newaclnode->z_ace_count;
 
 }
 
 /*
  * Convert unix access mask to v4 access mask
  */
 static uint32_t
 zfs_unix_to_v4(uint32_t access_mask)
 {
 	uint32_t new_mask = 0;
 
 	if (access_mask & S_IXOTH)
 		new_mask |= ACE_EXECUTE;
 	if (access_mask & S_IWOTH)
 		new_mask |= ACE_WRITE_DATA;
 	if (access_mask & S_IROTH)
 		new_mask |= ACE_READ_DATA;
 	return (new_mask);
 }
 
 static void
 zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
     uint16_t access_type, uint64_t fuid, uint16_t entry_type)
 {
 	uint16_t type = entry_type & ACE_TYPE_FLAGS;
 
 	aclp->z_ops.ace_mask_set(acep, access_mask);
 	aclp->z_ops.ace_type_set(acep, access_type);
 	aclp->z_ops.ace_flags_set(acep, entry_type);
 	if ((type != ACE_OWNER && type != OWNING_GROUP &&
 	    type != ACE_EVERYONE))
 		aclp->z_ops.ace_who_set(acep, fuid);
 }
 
 /*
  * Determine mode of file based on ACL.
  * Also, create FUIDs for any User/Group ACEs
  */
 uint64_t
 zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp,
     uint64_t *pflags, uint64_t fuid, uint64_t fgid)
 {
 	int		entry_type;
 	mode_t		mode;
 	mode_t		seen = 0;
 	zfs_ace_hdr_t 	*acep = NULL;
 	uint64_t	who;
 	uint16_t	iflags, type;
 	uint32_t	access_mask;
 	boolean_t	an_exec_denied = B_FALSE;
 
 	mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX));
 
 	while (acep = zfs_acl_next_ace(aclp, acep, &who,
 	    &access_mask, &iflags, &type)) {
 
 		if (!zfs_acl_valid_ace_type(type, iflags))
 			continue;
 
 		entry_type = (iflags & ACE_TYPE_FLAGS);
 
 		/*
 		 * Skip over owner@, group@ or everyone@ inherit only ACEs
 		 */
 		if ((iflags & ACE_INHERIT_ONLY_ACE) &&
 		    (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
 		    entry_type == OWNING_GROUP))
 			continue;
 
 		if (entry_type == ACE_OWNER || (entry_type == 0 &&
 		    who == fuid)) {
 			if ((access_mask & ACE_READ_DATA) &&
 			    (!(seen & S_IRUSR))) {
 				seen |= S_IRUSR;
 				if (type == ALLOW) {
 					mode |= S_IRUSR;
 				}
 			}
 			if ((access_mask & ACE_WRITE_DATA) &&
 			    (!(seen & S_IWUSR))) {
 				seen |= S_IWUSR;
 				if (type == ALLOW) {
 					mode |= S_IWUSR;
 				}
 			}
 			if ((access_mask & ACE_EXECUTE) &&
 			    (!(seen & S_IXUSR))) {
 				seen |= S_IXUSR;
 				if (type == ALLOW) {
 					mode |= S_IXUSR;
 				}
 			}
 		} else if (entry_type == OWNING_GROUP ||
 		    (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) {
 			if ((access_mask & ACE_READ_DATA) &&
 			    (!(seen & S_IRGRP))) {
 				seen |= S_IRGRP;
 				if (type == ALLOW) {
 					mode |= S_IRGRP;
 				}
 			}
 			if ((access_mask & ACE_WRITE_DATA) &&
 			    (!(seen & S_IWGRP))) {
 				seen |= S_IWGRP;
 				if (type == ALLOW) {
 					mode |= S_IWGRP;
 				}
 			}
 			if ((access_mask & ACE_EXECUTE) &&
 			    (!(seen & S_IXGRP))) {
 				seen |= S_IXGRP;
 				if (type == ALLOW) {
 					mode |= S_IXGRP;
 				}
 			}
 		} else if (entry_type == ACE_EVERYONE) {
 			if ((access_mask & ACE_READ_DATA)) {
 				if (!(seen & S_IRUSR)) {
 					seen |= S_IRUSR;
 					if (type == ALLOW) {
 						mode |= S_IRUSR;
 					}
 				}
 				if (!(seen & S_IRGRP)) {
 					seen |= S_IRGRP;
 					if (type == ALLOW) {
 						mode |= S_IRGRP;
 					}
 				}
 				if (!(seen & S_IROTH)) {
 					seen |= S_IROTH;
 					if (type == ALLOW) {
 						mode |= S_IROTH;
 					}
 				}
 			}
 			if ((access_mask & ACE_WRITE_DATA)) {
 				if (!(seen & S_IWUSR)) {
 					seen |= S_IWUSR;
 					if (type == ALLOW) {
 						mode |= S_IWUSR;
 					}
 				}
 				if (!(seen & S_IWGRP)) {
 					seen |= S_IWGRP;
 					if (type == ALLOW) {
 						mode |= S_IWGRP;
 					}
 				}
 				if (!(seen & S_IWOTH)) {
 					seen |= S_IWOTH;
 					if (type == ALLOW) {
 						mode |= S_IWOTH;
 					}
 				}
 			}
 			if ((access_mask & ACE_EXECUTE)) {
 				if (!(seen & S_IXUSR)) {
 					seen |= S_IXUSR;
 					if (type == ALLOW) {
 						mode |= S_IXUSR;
 					}
 				}
 				if (!(seen & S_IXGRP)) {
 					seen |= S_IXGRP;
 					if (type == ALLOW) {
 						mode |= S_IXGRP;
 					}
 				}
 				if (!(seen & S_IXOTH)) {
 					seen |= S_IXOTH;
 					if (type == ALLOW) {
 						mode |= S_IXOTH;
 					}
 				}
 			}
 		} else {
 			/*
 			 * Only care if this IDENTIFIER_GROUP or
 			 * USER ACE denies execute access to someone,
 			 * mode is not affected
 			 */
 			if ((access_mask & ACE_EXECUTE) && type == DENY)
 				an_exec_denied = B_TRUE;
 		}
 	}
 
 	/*
 	 * Failure to allow is effectively a deny, so execute permission
 	 * is denied if it was never mentioned or if we explicitly
 	 * weren't allowed it.
 	 */
 	if (!an_exec_denied &&
 	    ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS ||
 	    (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS))
 		an_exec_denied = B_TRUE;
 
 	if (an_exec_denied)
 		*pflags &= ~ZFS_NO_EXECS_DENIED;
 	else
 		*pflags |= ZFS_NO_EXECS_DENIED;
 
 	return (mode);
 }
 
 /*
  * Read an external acl object.  If the intent is to modify, always
  * create a new acl and leave any cached acl in place.
  */
 static int
 zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
 {
 	zfs_acl_t	*aclp;
 	int		aclsize;
 	int		acl_count;
 	zfs_acl_node_t	*aclnode;
 	zfs_acl_phys_t	znode_acl;
 	int		version;
 	int		error;
 
 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 
 	if (zp->z_acl_cached && !will_modify) {
 		*aclpp = zp->z_acl_cached;
 		return (0);
 	}
 
 	version = zfs_znode_acl_version(zp);
 
 	if ((error = zfs_acl_znode_info(zp, &aclsize,
 	    &acl_count, &znode_acl)) != 0) {
 		goto done;
 	}
 
 	aclp = zfs_acl_alloc(version);
 
 	aclp->z_acl_count = acl_count;
 	aclp->z_acl_bytes = aclsize;
 
 	aclnode = zfs_acl_node_alloc(aclsize);
 	aclnode->z_ace_count = aclp->z_acl_count;
 	aclnode->z_size = aclsize;
 
 	if (!zp->z_is_sa) {
 		if (znode_acl.z_acl_extern_obj) {
 			error = dmu_read(zp->z_zfsvfs->z_os,
 			    znode_acl.z_acl_extern_obj, 0, aclnode->z_size,
 			    aclnode->z_acldata, DMU_READ_PREFETCH);
 		} else {
 			bcopy(znode_acl.z_ace_data, aclnode->z_acldata,
 			    aclnode->z_size);
 		}
 	} else {
 		error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs),
 		    aclnode->z_acldata, aclnode->z_size);
 	}
 
 	if (error != 0) {
 		zfs_acl_free(aclp);
 		zfs_acl_node_free(aclnode);
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
 			error = SET_ERROR(EIO);
 		goto done;
 	}
 
 	list_insert_head(&aclp->z_acl, aclnode);
 
 	*aclpp = aclp;
 	if (!will_modify)
 		zp->z_acl_cached = aclp;
 done:
 	return (error);
 }
 
 /*ARGSUSED*/
 void
 zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen,
     boolean_t start, void *userdata)
 {
 	zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata;
 
 	if (start) {
 		cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl);
 	} else {
 		cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl,
 		    cb->cb_acl_node);
 	}
 	*dataptr = cb->cb_acl_node->z_acldata;
 	*length = cb->cb_acl_node->z_size;
 }
 
 int
 zfs_acl_chown_setattr(znode_t *zp)
 {
 	int error;
 	zfs_acl_t *aclp;
 
 	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 	ASSERT(MUTEX_HELD(&zp->z_acl_lock));
 
 	if ((error = zfs_acl_node_read(zp, &aclp, B_FALSE)) == 0)
 		zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
 		    &zp->z_pflags, zp->z_uid, zp->z_gid);
 	return (error);
 }
 
 /*
  * common code for setting ACLs.
  *
  * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl.
  * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's
  * already checked the acl and knows whether to inherit.
  */
 int
 zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
 {
 	int			error;
 	zfsvfs_t		*zfsvfs = zp->z_zfsvfs;
 	dmu_object_type_t	otype;
 	zfs_acl_locator_cb_t	locate = { 0 };
 	uint64_t		mode;
 	sa_bulk_attr_t		bulk[5];
 	uint64_t		ctime[2];
 	int			count = 0;
 
 	mode = zp->z_mode;
 
 	mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
 	    zp->z_uid, zp->z_gid);
 
 	zp->z_mode = mode;
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 	    &mode, sizeof (mode));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 	    &ctime, sizeof (ctime));
 
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 
 	/*
 	 * Upgrade needed?
 	 */
 	if (!zfsvfs->z_use_fuids) {
 		otype = DMU_OT_OLDACL;
 	} else {
 		if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
 		    (zfsvfs->z_version >= ZPL_VERSION_FUID))
 			zfs_acl_xform(zp, aclp, cr);
 		ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
 		otype = DMU_OT_ACL;
 	}
 
 	/*
 	 * Arrgh, we have to handle old on disk format
 	 * as well as newer (preferred) SA format.
 	 */
 
 	if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
 		locate.cb_aclp = aclp;
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
 		    zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
 		    NULL, &aclp->z_acl_count, sizeof (uint64_t));
 	} else { /* Painful legacy way */
 		zfs_acl_node_t *aclnode;
 		uint64_t off = 0;
 		zfs_acl_phys_t acl_phys;
 		uint64_t aoid;
 
 		if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
 		    &acl_phys, sizeof (acl_phys))) != 0)
 			return (error);
 
 		aoid = acl_phys.z_acl_extern_obj;
 
 		if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			/*
 			 * If ACL was previously external and we are now
 			 * converting to new ACL format then release old
 			 * ACL object and create a new one.
 			 */
 			if (aoid &&
 			    aclp->z_version != acl_phys.z_acl_version) {
 				error = dmu_object_free(zfsvfs->z_os, aoid, tx);
 				if (error)
 					return (error);
 				aoid = 0;
 			}
 			if (aoid == 0) {
 				aoid = dmu_object_alloc(zfsvfs->z_os,
 				    otype, aclp->z_acl_bytes,
 				    otype == DMU_OT_ACL ?
 				    DMU_OT_SYSACL : DMU_OT_NONE,
 				    otype == DMU_OT_ACL ?
 				    DN_MAX_BONUSLEN : 0, tx);
 			} else {
 				(void) dmu_object_set_blocksize(zfsvfs->z_os,
 				    aoid, aclp->z_acl_bytes, 0, tx);
 			}
 			acl_phys.z_acl_extern_obj = aoid;
 			for (aclnode = list_head(&aclp->z_acl); aclnode;
 			    aclnode = list_next(&aclp->z_acl, aclnode)) {
 				if (aclnode->z_ace_count == 0)
 					continue;
 				dmu_write(zfsvfs->z_os, aoid, off,
 				    aclnode->z_size, aclnode->z_acldata, tx);
 				off += aclnode->z_size;
 			}
 		} else {
 			void *start = acl_phys.z_ace_data;
 			/*
 			 * Migrating back embedded?
 			 */
 			if (acl_phys.z_acl_extern_obj) {
 				error = dmu_object_free(zfsvfs->z_os,
 				    acl_phys.z_acl_extern_obj, tx);
 				if (error)
 					return (error);
 				acl_phys.z_acl_extern_obj = 0;
 			}
 
 			for (aclnode = list_head(&aclp->z_acl); aclnode;
 			    aclnode = list_next(&aclp->z_acl, aclnode)) {
 				if (aclnode->z_ace_count == 0)
 					continue;
 				bcopy(aclnode->z_acldata, start,
 				    aclnode->z_size);
 				start = (caddr_t)start + aclnode->z_size;
 			}
 		}
 		/*
 		 * If Old version then swap count/bytes to match old
 		 * layout of znode_acl_phys_t.
 		 */
 		if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
 			acl_phys.z_acl_size = aclp->z_acl_count;
 			acl_phys.z_acl_count = aclp->z_acl_bytes;
 		} else {
 			acl_phys.z_acl_size = aclp->z_acl_bytes;
 			acl_phys.z_acl_count = aclp->z_acl_count;
 		}
 		acl_phys.z_acl_version = aclp->z_version;
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
 		    &acl_phys, sizeof (acl_phys));
 	}
 
 	/*
 	 * Replace ACL wide bits, but first clear them.
 	 */
 	zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS;
 
 	zp->z_pflags |= aclp->z_hints;
 
 	if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
 		zp->z_pflags |= ZFS_ACL_TRIVIAL;
 
 	zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE);
 	return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
 }
 
 static void
 zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
 {
 	void		*acep = NULL;
 	uint64_t	who;
 	int		new_count, new_bytes;
 	int		ace_size;
 	int 		entry_type;
 	uint16_t	iflags, type;
 	uint32_t	access_mask;
 	zfs_acl_node_t	*newnode;
 	size_t 		abstract_size = aclp->z_ops.ace_abstract_size();
 	void 		*zacep;
 	boolean_t	isdir;
 	trivial_acl_t	masks;
 
 	new_count = new_bytes = 0;
 
 	isdir = (vtype == VDIR);
 
 	acl_trivial_access_masks((mode_t)mode, isdir, &masks);
 
 	newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
 
 	zacep = newnode->z_acldata;
 	if (masks.allow0) {
 		zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
 		zacep = (void *)((uintptr_t)zacep + abstract_size);
 		new_count++;
 		new_bytes += abstract_size;
 	}
 	if (masks.deny1) {
 		zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
 		zacep = (void *)((uintptr_t)zacep + abstract_size);
 		new_count++;
 		new_bytes += abstract_size;
 	}
 	if (masks.deny2) {
 		zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
 		zacep = (void *)((uintptr_t)zacep + abstract_size);
 		new_count++;
 		new_bytes += abstract_size;
 	}
 
 	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 	    &iflags, &type)) {
 		uint16_t inherit_flags;
 
 		entry_type = (iflags & ACE_TYPE_FLAGS);
 		inherit_flags = (iflags & ALL_INHERIT);
 
 		if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE ||
 		    (entry_type == OWNING_GROUP)) &&
 		    ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) {
 			continue;
 		}
 
 		/*
 		 * If this ACL has any inheritable ACEs, mark that in
 		 * the hints (which are later masked into the pflags)
 		 * so create knows to do inheritance.
 		 */
 		if (isdir && (inherit_flags &
 		    (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
 			aclp->z_hints |= ZFS_INHERIT_ACE;
 
 		if ((type != ALLOW && type != DENY) ||
 		    (inherit_flags & ACE_INHERIT_ONLY_ACE)) {
 			switch (type) {
 			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 				aclp->z_hints |= ZFS_ACL_OBJ_ACE;
 				break;
 			}
 		} else {
 
 			/*
 			 * Limit permissions to be no greater than
 			 * group permissions.
 			 * The "aclinherit" and "aclmode" properties
 			 * affect policy for create and chmod(2),
 			 * respectively.
 			 */
 			if ((type == ALLOW) && trim)
 				access_mask &= masks.group;
 		}
 		zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
 		ace_size = aclp->z_ops.ace_size(acep);
 		zacep = (void *)((uintptr_t)zacep + ace_size);
 		new_count++;
 		new_bytes += ace_size;
 	}
 	zfs_set_ace(aclp, zacep, masks.owner, 0, -1, ACE_OWNER);
 	zacep = (void *)((uintptr_t)zacep + abstract_size);
 	zfs_set_ace(aclp, zacep, masks.group, 0, -1, OWNING_GROUP);
 	zacep = (void *)((uintptr_t)zacep + abstract_size);
 	zfs_set_ace(aclp, zacep, masks.everyone, 0, -1, ACE_EVERYONE);
 
 	new_count += 3;
 	new_bytes += abstract_size * 3;
 	zfs_acl_release_nodes(aclp);
 	aclp->z_acl_count = new_count;
 	aclp->z_acl_bytes = new_bytes;
 	newnode->z_ace_count = new_count;
 	newnode->z_size = new_bytes;
 	list_insert_tail(&aclp->z_acl, newnode);
 }
 
 int
 zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
 {
 	int error = 0;
 
 	mutex_enter(&zp->z_acl_lock);
 	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 	if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
 		*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
 	else
 		error = zfs_acl_node_read(zp, aclp, B_TRUE);
 
 	if (error == 0) {
 		(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
 		zfs_acl_chmod(ZTOV(zp)->v_type, mode,
 		    (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
 	}
 	mutex_exit(&zp->z_acl_lock);
 
 	return (error);
 }
 
 /*
  * strip off write_owner and write_acl
  */
 static void
 zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
 {
 	uint32_t mask = aclp->z_ops.ace_mask_get(acep);
 
 	if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
 	    (aclp->z_ops.ace_type_get(acep) == ALLOW)) {
 		mask &= ~RESTRICTED_CLEAR;
 		aclp->z_ops.ace_mask_set(acep, mask);
 	}
 }
 
 /*
  * Should ACE be inherited?
  */
 static int
 zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
 {
 	int	iflags = (acep_flags & 0xf);
 
 	if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
 		return (1);
 	else if (iflags & ACE_FILE_INHERIT_ACE)
 		return (!((vtype == VDIR) &&
 		    (iflags & ACE_NO_PROPAGATE_INHERIT_ACE)));
 	return (0);
 }
 
 /*
  * inherit inheritable ACEs from parent
  */
 static zfs_acl_t *
 zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
     uint64_t mode, boolean_t *need_chmod)
 {
 	void		*pacep;
 	void		*acep;
 	zfs_acl_node_t  *aclnode;
 	zfs_acl_t	*aclp = NULL;
 	uint64_t	who;
 	uint32_t	access_mask;
 	uint16_t	iflags, newflags, type;
 	size_t		ace_size;
 	void		*data1, *data2;
 	size_t		data1sz, data2sz;
 	boolean_t	vdir = vtype == VDIR;
 	boolean_t	vreg = vtype == VREG;
 	boolean_t	passthrough, passthrough_x, noallow;
 
 	passthrough_x =
 	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
 	passthrough = passthrough_x ||
 	    zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
 	noallow =
 	    zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW;
 
 	*need_chmod = B_TRUE;
 	pacep = NULL;
 	aclp = zfs_acl_alloc(paclp->z_version);
 	if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || vtype == VLNK)
 		return (aclp);
 	while (pacep = zfs_acl_next_ace(paclp, pacep, &who,
 	    &access_mask, &iflags, &type)) {
 
 		/*
 		 * don't inherit bogus ACEs
 		 */
 		if (!zfs_acl_valid_ace_type(type, iflags))
 			continue;
 
 		if (noallow && type == ALLOW)
 			continue;
 
 		ace_size = aclp->z_ops.ace_size(pacep);
 
 		if (!zfs_ace_can_use(vtype, iflags))
 			continue;
 
 		/*
 		 * If owner@, group@, or everyone@ inheritable
 		 * then zfs_acl_chmod() isn't needed.
 		 */
 		if (passthrough &&
 		    ((iflags & (ACE_OWNER|ACE_EVERYONE)) ||
 		    ((iflags & OWNING_GROUP) ==
 		    OWNING_GROUP)) && (vreg || (vdir && (iflags &
 		    ACE_DIRECTORY_INHERIT_ACE)))) {
 			*need_chmod = B_FALSE;
 		}
 
 		if (!vdir && passthrough_x &&
 		    ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
 			access_mask &= ~ACE_EXECUTE;
 		}
 
 		aclnode = zfs_acl_node_alloc(ace_size);
 		list_insert_tail(&aclp->z_acl, aclnode);
 		acep = aclnode->z_acldata;
 
 		zfs_set_ace(aclp, acep, access_mask, type,
 		    who, iflags|ACE_INHERITED_ACE);
 
 		/*
 		 * Copy special opaque data if any
 		 */
 		if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) {
 			VERIFY((data2sz = aclp->z_ops.ace_data(acep,
 			    &data2)) == data1sz);
 			bcopy(data1, data2, data2sz);
 		}
 
 		aclp->z_acl_count++;
 		aclnode->z_ace_count++;
 		aclp->z_acl_bytes += aclnode->z_size;
 		newflags = aclp->z_ops.ace_flags_get(acep);
 
 		if (vdir)
 			aclp->z_hints |= ZFS_INHERIT_ACE;
 
 		if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) {
 			newflags &= ~ALL_INHERIT;
 			aclp->z_ops.ace_flags_set(acep,
 			    newflags|ACE_INHERITED_ACE);
 			zfs_restricted_update(zfsvfs, aclp, acep);
 			continue;
 		}
 
 		ASSERT(vdir);
 
 		/*
 		 * If only FILE_INHERIT is set then turn on
 		 * inherit_only
 		 */
 		if ((iflags & (ACE_FILE_INHERIT_ACE |
 		    ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) {
 			newflags |= ACE_INHERIT_ONLY_ACE;
 			aclp->z_ops.ace_flags_set(acep,
 			    newflags|ACE_INHERITED_ACE);
 		} else {
 			newflags &= ~ACE_INHERIT_ONLY_ACE;
 			aclp->z_ops.ace_flags_set(acep,
 			    newflags|ACE_INHERITED_ACE);
 		}
 	}
 	return (aclp);
 }
 
 /*
  * Create file system object initial permissions
  * including inheritable ACEs.
  */
 int
 zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
     vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
 {
 	int		error;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zfs_acl_t	*paclp;
 	gid_t		gid;
 	boolean_t	need_chmod = B_TRUE;
 	boolean_t	inherited = B_FALSE;
 
-	ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+	if ((flag & IS_ROOT_NODE) == 0)
+		ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__);
+	else
+		ASSERT(dzp->z_vnode == NULL);
 	bzero(acl_ids, sizeof (zfs_acl_ids_t));
 	acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
 
 	if (vsecp)
 		if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
 		    &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
 			return (error);
 	/*
 	 * Determine uid and gid.
 	 */
 	if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
 	    ((flag & IS_XATTR) && (vap->va_type == VDIR))) {
 		acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
 		    (uint64_t)vap->va_uid, cr,
 		    ZFS_OWNER, &acl_ids->z_fuidp);
 		acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
 		    (uint64_t)vap->va_gid, cr,
 		    ZFS_GROUP, &acl_ids->z_fuidp);
 		gid = vap->va_gid;
 	} else {
 		acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
 		    cr, &acl_ids->z_fuidp);
 		acl_ids->z_fgid = 0;
 		if (vap->va_mask & AT_GID)  {
 			acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &acl_ids->z_fuidp);
 			gid = vap->va_gid;
 			if (acl_ids->z_fgid != dzp->z_gid &&
 			    !groupmember(vap->va_gid, cr) &&
 			    secpolicy_vnode_create_gid(cr) != 0)
 				acl_ids->z_fgid = 0;
 		}
 		if (acl_ids->z_fgid == 0) {
 			if (dzp->z_mode & S_ISGID) {
 				char		*domain;
 				uint32_t	rid;
 
 				acl_ids->z_fgid = dzp->z_gid;
 				gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
 				    cr, ZFS_GROUP);
 
 				if (zfsvfs->z_use_fuids &&
 				    IS_EPHEMERAL(acl_ids->z_fgid)) {
 					domain = zfs_fuid_idx_domain(
 					    &zfsvfs->z_fuid_idx,
 					    FUID_INDEX(acl_ids->z_fgid));
 					rid = FUID_RID(acl_ids->z_fgid);
 					zfs_fuid_node_add(&acl_ids->z_fuidp,
 					    domain, rid,
 					    FUID_INDEX(acl_ids->z_fgid),
 					    acl_ids->z_fgid, ZFS_GROUP);
 				}
 			} else {
 				acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
 				    ZFS_GROUP, cr, &acl_ids->z_fuidp);
 #ifdef __FreeBSD_kernel__
 				gid = acl_ids->z_fgid = dzp->z_gid;
 #else
 				gid = crgetgid(cr);
 #endif
 			}
 		}
 	}
 
 	/*
 	 * If we're creating a directory, and the parent directory has the
 	 * set-GID bit set, set in on the new directory.
 	 * Otherwise, if the user is neither privileged nor a member of the
 	 * file's new group, clear the file's set-GID bit.
 	 */
 
 	if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) &&
 	    (vap->va_type == VDIR)) {
 		acl_ids->z_mode |= S_ISGID;
 	} else {
 		if ((acl_ids->z_mode & S_ISGID) &&
 		    secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0)
 			acl_ids->z_mode &= ~S_ISGID;
 	}
 
 	if (acl_ids->z_aclp == NULL) {
 		mutex_enter(&dzp->z_acl_lock);
 		if (!(flag & IS_ROOT_NODE) &&
 		    (dzp->z_pflags & ZFS_INHERIT_ACE) &&
 		    !(dzp->z_pflags & ZFS_XATTR)) {
 			VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
 			acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
 			    vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
 			inherited = B_TRUE;
 		} else {
 			acl_ids->z_aclp =
 			    zfs_acl_alloc(zfs_acl_version_zp(dzp));
 			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
 		}
 		mutex_exit(&dzp->z_acl_lock);
 		if (need_chmod) {
 			acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ?
 			    ZFS_ACL_AUTO_INHERIT : 0;
 			zfs_acl_chmod(vap->va_type, acl_ids->z_mode,
 			    (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED),
 			    acl_ids->z_aclp);
 		}
 	}
 
 	if (inherited || vsecp) {
 		acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode,
 		    acl_ids->z_aclp, &acl_ids->z_aclp->z_hints,
 		    acl_ids->z_fuid, acl_ids->z_fgid);
 		if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0)
 			acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL;
 	}
 
 	return (0);
 }
 
 /*
  * Free ACL and fuid_infop, but not the acl_ids structure
  */
 void
 zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
 {
 	if (acl_ids->z_aclp)
 		zfs_acl_free(acl_ids->z_aclp);
 	if (acl_ids->z_fuidp)
 		zfs_fuid_info_free(acl_ids->z_fuidp);
 	acl_ids->z_aclp = NULL;
 	acl_ids->z_fuidp = NULL;
 }
 
 boolean_t
 zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
 {
 	return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
 	    zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
 }
 
 /*
  * Retrieve a file's ACL
  */
 int
 zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 {
 	zfs_acl_t	*aclp;
 	ulong_t		mask;
 	int		error;
 	int 		count = 0;
 	int		largeace = 0;
 
 	mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT |
 	    VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES);
 
 	if (mask == 0)
 		return (SET_ERROR(ENOSYS));
 
 	if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr))
 		return (error);
 
 	mutex_enter(&zp->z_acl_lock);
 
 	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
 	if (error != 0) {
 		mutex_exit(&zp->z_acl_lock);
 		return (error);
 	}
 
 	/*
 	 * Scan ACL to determine number of ACEs
 	 */
 	if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) {
 		void *zacep = NULL;
 		uint64_t who;
 		uint32_t access_mask;
 		uint16_t type, iflags;
 
 		while (zacep = zfs_acl_next_ace(aclp, zacep,
 		    &who, &access_mask, &iflags, &type)) {
 			switch (type) {
 			case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
 			case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
 			case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE:
 			case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE:
 				largeace++;
 				continue;
 			default:
 				count++;
 			}
 		}
 		vsecp->vsa_aclcnt = count;
 	} else
 		count = (int)aclp->z_acl_count;
 
 	if (mask & VSA_ACECNT) {
 		vsecp->vsa_aclcnt = count;
 	}
 
 	if (mask & VSA_ACE) {
 		size_t aclsz;
 
 		aclsz = count * sizeof (ace_t) +
 		    sizeof (ace_object_t) * largeace;
 
 		vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP);
 		vsecp->vsa_aclentsz = aclsz;
 
 		if (aclp->z_version == ZFS_ACL_VERSION_FUID)
 			zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr,
 			    vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES));
 		else {
 			zfs_acl_node_t *aclnode;
 			void *start = vsecp->vsa_aclentp;
 
 			for (aclnode = list_head(&aclp->z_acl); aclnode;
 			    aclnode = list_next(&aclp->z_acl, aclnode)) {
 				bcopy(aclnode->z_acldata, start,
 				    aclnode->z_size);
 				start = (caddr_t)start + aclnode->z_size;
 			}
 			ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp ==
 			    aclp->z_acl_bytes);
 		}
 	}
 	if (mask & VSA_ACE_ACLFLAGS) {
 		vsecp->vsa_aclflags = 0;
 		if (zp->z_pflags & ZFS_ACL_DEFAULTED)
 			vsecp->vsa_aclflags |= ACL_DEFAULTED;
 		if (zp->z_pflags & ZFS_ACL_PROTECTED)
 			vsecp->vsa_aclflags |= ACL_PROTECTED;
 		if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT)
 			vsecp->vsa_aclflags |= ACL_AUTO_INHERIT;
 	}
 
 	mutex_exit(&zp->z_acl_lock);
 
 	return (0);
 }
 
 int
 zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
     vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
 {
 	zfs_acl_t *aclp;
 	zfs_acl_node_t *aclnode;
 	int aclcnt = vsecp->vsa_aclcnt;
 	int error;
 
 	if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
 		return (SET_ERROR(EINVAL));
 
 	aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
 
 	aclp->z_hints = 0;
 	aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
 	if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) {
 		if ((error = zfs_copy_ace_2_oldace(obj_type, aclp,
 		    (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata,
 		    aclcnt, &aclnode->z_size)) != 0) {
 			zfs_acl_free(aclp);
 			zfs_acl_node_free(aclnode);
 			return (error);
 		}
 	} else {
 		if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
 		    vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
 		    &aclnode->z_size, fuidp, cr)) != 0) {
 			zfs_acl_free(aclp);
 			zfs_acl_node_free(aclnode);
 			return (error);
 		}
 	}
 	aclp->z_acl_bytes = aclnode->z_size;
 	aclnode->z_ace_count = aclcnt;
 	aclp->z_acl_count = aclcnt;
 	list_insert_head(&aclp->z_acl, aclnode);
 
 	/*
 	 * If flags are being set then add them to z_hints
 	 */
 	if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) {
 		if (vsecp->vsa_aclflags & ACL_PROTECTED)
 			aclp->z_hints |= ZFS_ACL_PROTECTED;
 		if (vsecp->vsa_aclflags & ACL_DEFAULTED)
 			aclp->z_hints |= ZFS_ACL_DEFAULTED;
 		if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT)
 			aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
 	}
 
 	*zaclp = aclp;
 
 	return (0);
 }
 
 /*
  * Set a file's ACL
  */
 int
 zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
 {
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zilog_t		*zilog = zfsvfs->z_log;
 	ulong_t		mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
 	dmu_tx_t	*tx;
 	int		error;
 	zfs_acl_t	*aclp;
 	zfs_fuid_info_t	*fuidp = NULL;
 	boolean_t	fuid_dirtied;
 	uint64_t	acl_obj;
 
 	ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 	if (mask == 0)
 		return (SET_ERROR(ENOSYS));
 
 	if (zp->z_pflags & ZFS_IMMUTABLE)
 		return (SET_ERROR(EPERM));
 
 	if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
 		return (error);
 
 	error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
 	    &aclp);
 	if (error)
 		return (error);
 
 	/*
 	 * If ACL wide flags aren't being set then preserve any
 	 * existing flags.
 	 */
 	if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) {
 		aclp->z_hints |=
 		    (zp->z_pflags & V4_ACL_WIDE_FLAGS);
 	}
 top:
 	mutex_enter(&zp->z_acl_lock);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
 	/*
 	 * If old version and ACL won't fit in bonus and we aren't
 	 * upgrading then take out necessary DMU holds
 	 */
 
 	if ((acl_obj = zfs_external_acl(zp)) != 0) {
 		if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 		    zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
 			dmu_tx_hold_free(tx, acl_obj, 0,
 			    DMU_OBJECT_END);
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 			    aclp->z_acl_bytes);
 		} else {
 			dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes);
 		}
 	} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
 	}
 
 	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (error) {
 		mutex_exit(&zp->z_acl_lock);
 
 		if (error == ERESTART) {
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zfs_acl_free(aclp);
 		return (error);
 	}
 
 	error = zfs_aclset_common(zp, aclp, cr, tx);
 	ASSERT(error == 0);
 	ASSERT(zp->z_acl_cached == NULL);
 	zp->z_acl_cached = aclp;
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
 
 	if (fuidp)
 		zfs_fuid_info_free(fuidp);
 	dmu_tx_commit(tx);
 	mutex_exit(&zp->z_acl_lock);
 
 	return (error);
 }
 
 /*
  * Check accesses of interest (AoI) against attributes of the dataset
  * such as read-only.  Returns zero if no AoI conflict with dataset
  * attributes, otherwise an appropriate errno is returned.
  */
 static int
 zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
 {
 	if ((v4_mode & WRITE_MASK) &&
 	    (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 	    (!IS_DEVVP(ZTOV(zp)) ||
 	    (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
 		return (SET_ERROR(EROFS));
 	}
 
 	/*
 	 * Only check for READONLY on non-directories.
 	 */
 	if ((v4_mode & WRITE_MASK_DATA) &&
 	    (((ZTOV(zp)->v_type != VDIR) &&
 	    (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
 	    (ZTOV(zp)->v_type == VDIR &&
 	    (zp->z_pflags & ZFS_IMMUTABLE)))) {
 		return (SET_ERROR(EPERM));
 	}
 
 #ifdef illumos
 	if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
 	    (zp->z_pflags & ZFS_NOUNLINK)) {
 		return (SET_ERROR(EPERM));
 	}
 #else
 	/*
 	 * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK
 	 * (sunlnk) is set. We just don't allow directory removal, which is
 	 * handled in zfs_zaccess_delete().
 	 */
 	if ((v4_mode & ACE_DELETE) &&
 	    (zp->z_pflags & ZFS_NOUNLINK)) {
 		return (EPERM);
 	}
 #endif
 
 	if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED))) {
 		return (SET_ERROR(EACCES));
 	}
 
 	return (0);
 }
 
 /*
  * The primary usage of this function is to loop through all of the
  * ACEs in the znode, determining what accesses of interest (AoI) to
  * the caller are allowed or denied.  The AoI are expressed as bits in
  * the working_mode parameter.  As each ACE is processed, bits covered
  * by that ACE are removed from the working_mode.  This removal
  * facilitates two things.  The first is that when the working mode is
  * empty (= 0), we know we've looked at all the AoI. The second is
  * that the ACE interpretation rules don't allow a later ACE to undo
  * something granted or denied by an earlier ACE.  Removing the
  * discovered access or denial enforces this rule.  At the end of
  * processing the ACEs, all AoI that were found to be denied are
  * placed into the working_mode, giving the caller a mask of denied
  * accesses.  Returns:
  *	0		if all AoI granted
  *	EACCESS 	if the denied mask is non-zero
  *	other error	if abnormal failure (e.g., IO error)
  *
  * A secondary usage of the function is to determine if any of the
  * AoI are granted.  If an ACE grants any access in
  * the working_mode, we immediately short circuit out of the function.
  * This mode is chosen by setting anyaccess to B_TRUE.  The
  * working_mode is not a denied access mask upon exit if the function
  * is used in this manner.
  */
 static int
 zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
     boolean_t anyaccess, cred_t *cr)
 {
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zfs_acl_t	*aclp;
 	int		error;
 	uid_t		uid = crgetuid(cr);
 	uint64_t 	who;
 	uint16_t	type, iflags;
 	uint16_t	entry_type;
 	uint32_t	access_mask;
 	uint32_t	deny_mask = 0;
 	zfs_ace_hdr_t	*acep = NULL;
 	boolean_t	checkit;
 	uid_t		gowner;
 	uid_t		fowner;
 
 	zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
 
 	mutex_enter(&zp->z_acl_lock);
 
 	ASSERT_VOP_LOCKED(ZTOV(zp), __func__);
 	error = zfs_acl_node_read(zp, &aclp, B_FALSE);
 	if (error != 0) {
 		mutex_exit(&zp->z_acl_lock);
 		return (error);
 	}
 
 	ASSERT(zp->z_acl_cached);
 
 	while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
 	    &iflags, &type)) {
 		uint32_t mask_matched;
 
 		if (!zfs_acl_valid_ace_type(type, iflags))
 			continue;
 
 		if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
 			continue;
 
 		/* Skip ACE if it does not affect any AoI */
 		mask_matched = (access_mask & *working_mode);
 		if (!mask_matched)
 			continue;
 
 		entry_type = (iflags & ACE_TYPE_FLAGS);
 
 		checkit = B_FALSE;
 
 		switch (entry_type) {
 		case ACE_OWNER:
 			if (uid == fowner)
 				checkit = B_TRUE;
 			break;
 		case OWNING_GROUP:
 			who = gowner;
 			/*FALLTHROUGH*/
 		case ACE_IDENTIFIER_GROUP:
 			checkit = zfs_groupmember(zfsvfs, who, cr);
 			break;
 		case ACE_EVERYONE:
 			checkit = B_TRUE;
 			break;
 
 		/* USER Entry */
 		default:
 			if (entry_type == 0) {
 				uid_t newid;
 
 				newid = zfs_fuid_map_id(zfsvfs, who, cr,
 				    ZFS_ACE_USER);
 				if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
 				    uid == newid)
 					checkit = B_TRUE;
 				break;
 			} else {
 				mutex_exit(&zp->z_acl_lock);
 				return (SET_ERROR(EIO));
 			}
 		}
 
 		if (checkit) {
 			if (type == DENY) {
 				DTRACE_PROBE3(zfs__ace__denies,
 				    znode_t *, zp,
 				    zfs_ace_hdr_t *, acep,
 				    uint32_t, mask_matched);
 				deny_mask |= mask_matched;
 			} else {
 				DTRACE_PROBE3(zfs__ace__allows,
 				    znode_t *, zp,
 				    zfs_ace_hdr_t *, acep,
 				    uint32_t, mask_matched);
 				if (anyaccess) {
 					mutex_exit(&zp->z_acl_lock);
 					return (0);
 				}
 			}
 			*working_mode &= ~mask_matched;
 		}
 
 		/* Are we done? */
 		if (*working_mode == 0)
 			break;
 	}
 
 	mutex_exit(&zp->z_acl_lock);
 
 	/* Put the found 'denies' back on the working mode */
 	if (deny_mask) {
 		*working_mode |= deny_mask;
 		return (SET_ERROR(EACCES));
 	} else if (*working_mode) {
 		return (-1);
 	}
 
 	return (0);
 }
 
 /*
  * Return true if any access whatsoever granted, we don't actually
  * care what access is granted.
  */
 boolean_t
 zfs_has_access(znode_t *zp, cred_t *cr)
 {
 	uint32_t have = ACE_ALL_PERMS;
 
 	if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
 		uid_t owner;
 
 		owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
 		return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0);
 	}
 	return (B_TRUE);
 }
 
 static int
 zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
     boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int err;
 
 	*working_mode = v4_mode;
 	*check_privs = B_TRUE;
 
 	/*
 	 * Short circuit empty requests
 	 */
 	if (v4_mode == 0 || zfsvfs->z_replay) {
 		*working_mode = 0;
 		return (0);
 	}
 
 	if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
 		*check_privs = B_FALSE;
 		return (err);
 	}
 
 	/*
 	 * The caller requested that the ACL check be skipped.  This
 	 * would only happen if the caller checked VOP_ACCESS() with a
 	 * 32 bit ACE mask and already had the appropriate permissions.
 	 */
 	if (skipaclchk) {
 		*working_mode = 0;
 		return (0);
 	}
 
 	return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
 }
 
 static int
 zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
     cred_t *cr)
 {
 	if (*working_mode != ACE_WRITE_DATA)
 		return (SET_ERROR(EACCES));
 
 	return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode,
 	    check_privs, B_FALSE, cr));
 }
 
 int
 zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
 {
 	boolean_t owner = B_FALSE;
 	boolean_t groupmbr = B_FALSE;
 	boolean_t is_attr;
 	uid_t uid = crgetuid(cr);
 	int error;
 
 	if (zdp->z_pflags & ZFS_AV_QUARANTINED)
 		return (SET_ERROR(EACCES));
 
 	is_attr = ((zdp->z_pflags & ZFS_XATTR) &&
 	    (ZTOV(zdp)->v_type == VDIR));
 	if (is_attr)
 		goto slow;
 
 
 	mutex_enter(&zdp->z_acl_lock);
 
 	if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) {
 		mutex_exit(&zdp->z_acl_lock);
 		return (0);
 	}
 
 	if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
 		mutex_exit(&zdp->z_acl_lock);
 		goto slow;
 	}
 
 	if (uid == zdp->z_uid) {
 		owner = B_TRUE;
 		if (zdp->z_mode & S_IXUSR) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		} else {
 			mutex_exit(&zdp->z_acl_lock);
 			goto slow;
 		}
 	}
 	if (groupmember(zdp->z_gid, cr)) {
 		groupmbr = B_TRUE;
 		if (zdp->z_mode & S_IXGRP) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		} else {
 			mutex_exit(&zdp->z_acl_lock);
 			goto slow;
 		}
 	}
 	if (!owner && !groupmbr) {
 		if (zdp->z_mode & S_IXOTH) {
 			mutex_exit(&zdp->z_acl_lock);
 			return (0);
 		}
 	}
 
 	mutex_exit(&zdp->z_acl_lock);
 
 slow:
 	DTRACE_PROBE(zfs__fastpath__execute__access__miss);
 	ZFS_ENTER(zdp->z_zfsvfs);
 	error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr);
 	ZFS_EXIT(zdp->z_zfsvfs);
 	return (error);
 }
 
 /*
  * Determine whether Access should be granted/denied.
  *
  * The least priv subsytem is always consulted as a basic privilege
  * can define any form of access.
  */
 int
 zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
 {
 	uint32_t	working_mode;
 	int		error;
 	int		is_attr;
 	boolean_t 	check_privs;
 	znode_t		*xzp;
 	znode_t 	*check_zp = zp;
 	mode_t		needed_bits;
 	uid_t		owner;
 
 	is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR));
 
 #ifdef __FreeBSD_kernel__
 	/*
 	 * In FreeBSD, we don't care about permissions of individual ADS.
 	 * Note that not checking them is not just an optimization - without
 	 * this shortcut, EA operations may bogusly fail with EACCES.
 	 */
 	if (zp->z_pflags & ZFS_XATTR)
 		return (0);
 #else
 	/*
 	 * If attribute then validate against base file
 	 */
 	if (is_attr) {
 		uint64_t	parent;
 
 		if ((error = sa_lookup(zp->z_sa_hdl,
 		    SA_ZPL_PARENT(zp->z_zfsvfs), &parent,
 		    sizeof (parent))) != 0)
 			return (error);
 
 		if ((error = zfs_zget(zp->z_zfsvfs,
 		    parent, &xzp)) != 0)	{
 			return (error);
 		}
 
 		check_zp = xzp;
 
 		/*
 		 * fixup mode to map to xattr perms
 		 */
 
 		if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) {
 			mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA);
 			mode |= ACE_WRITE_NAMED_ATTRS;
 		}
 
 		if (mode & (ACE_READ_DATA|ACE_EXECUTE)) {
 			mode &= ~(ACE_READ_DATA|ACE_EXECUTE);
 			mode |= ACE_READ_NAMED_ATTRS;
 		}
 	}
 #endif
 
 	owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
 	/*
 	 * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC
 	 * in needed_bits.  Map the bits mapped by working_mode (currently
 	 * missing) in missing_bits.
 	 * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode),
 	 * needed_bits.
 	 */
 	needed_bits = 0;
 
 	working_mode = mode;
 	if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) &&
 	    owner == crgetuid(cr))
 		working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
 
 	if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
 	    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
 		needed_bits |= VREAD;
 	if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
 	    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
 		needed_bits |= VWRITE;
 	if (working_mode & ACE_EXECUTE)
 		needed_bits |= VEXEC;
 
 	if ((error = zfs_zaccess_common(check_zp, mode, &working_mode,
 	    &check_privs, skipaclchk, cr)) == 0) {
 		if (is_attr)
 			VN_RELE(ZTOV(xzp));
 		return (secpolicy_vnode_access2(cr, ZTOV(zp), owner,
 		    needed_bits, needed_bits));
 	}
 
 	if (error && !check_privs) {
 		if (is_attr)
 			VN_RELE(ZTOV(xzp));
 		return (error);
 	}
 
 	if (error && (flags & V_APPEND)) {
 		error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr);
 	}
 
 	if (error && check_privs) {
 		mode_t		checkmode = 0;
 
 		/*
 		 * First check for implicit owner permission on
 		 * read_acl/read_attributes
 		 */
 
 		error = 0;
 		ASSERT(working_mode != 0);
 
 		if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) &&
 		    owner == crgetuid(cr)))
 			working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES);
 
 		if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS|
 		    ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE))
 			checkmode |= VREAD;
 		if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS|
 		    ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE))
 			checkmode |= VWRITE;
 		if (working_mode & ACE_EXECUTE)
 			checkmode |= VEXEC;
 
 		error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner,
 		    needed_bits & ~checkmode, needed_bits);
 
 		if (error == 0 && (working_mode & ACE_WRITE_OWNER))
 			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
 		if (error == 0 && (working_mode & ACE_WRITE_ACL))
 			error = secpolicy_vnode_setdac(ZTOV(check_zp), cr, owner);
 
 		if (error == 0 && (working_mode &
 		    (ACE_DELETE|ACE_DELETE_CHILD)))
 			error = secpolicy_vnode_remove(ZTOV(check_zp), cr);
 
 		if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) {
 			error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner);
 		}
 		if (error == 0) {
 			/*
 			 * See if any bits other than those already checked
 			 * for are still present.  If so then return EACCES
 			 */
 			if (working_mode & ~(ZFS_CHECKED_MASKS)) {
 				error = SET_ERROR(EACCES);
 			}
 		}
 	} else if (error == 0) {
 		error = secpolicy_vnode_access2(cr, ZTOV(zp), owner,
 		    needed_bits, needed_bits);
 	}
 
 
 	if (is_attr)
 		VN_RELE(ZTOV(xzp));
 
 	return (error);
 }
 
 /*
  * Translate traditional unix VREAD/VWRITE/VEXEC mode into
  * native ACL format and call zfs_zaccess()
  */
 int
 zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr)
 {
 	return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr));
 }
 
 /*
  * Access function for secpolicy_vnode_setattr
  */
 int
 zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr)
 {
 	int v4_mode = zfs_unix_to_v4(mode >> 6);
 
 	return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr));
 }
 
 static int
 zfs_delete_final_check(znode_t *zp, znode_t *dzp,
     mode_t available_perms, cred_t *cr)
 {
 	int error;
 	uid_t downer;
 
 	downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER);
 
 	error = secpolicy_vnode_access2(cr, ZTOV(dzp),
 	    downer, available_perms, VWRITE|VEXEC);
 
 	if (error == 0)
 		error = zfs_sticky_remove_access(dzp, zp, cr);
 
 	return (error);
 }
 
 /*
  * Determine whether Access should be granted/deny, without
  * consulting least priv subsystem.
  *
  * The following chart is the recommended NFSv4 enforcement for
  * ability to delete an object.
  *
  *      -------------------------------------------------------
  *      |   Parent Dir  |           Target Object Permissions |
  *      |  permissions  |                                     |
  *      -------------------------------------------------------
  *      |               | ACL Allows | ACL Denies| Delete     |
  *      |               |  Delete    |  Delete   | unspecified|
  *      -------------------------------------------------------
  *      |  ACL Allows   | Permit     | Permit    | Permit     |
  *      |  DELETE_CHILD |                                     |
  *      -------------------------------------------------------
  *      |  ACL Denies   | Permit     | Deny      | Deny       |
  *      |  DELETE_CHILD |            |           |            |
  *      -------------------------------------------------------
  *      | ACL specifies |            |           |            |
  *      | only allow    | Permit     | Permit    | Permit     |
  *      | write and     |            |           |            |
  *      | execute       |            |           |            |
  *      -------------------------------------------------------
  *      | ACL denies    |            |           |            |
  *      | write and     | Permit     | Deny      | Deny       |
  *      | execute       |            |           |            |
  *      -------------------------------------------------------
  *         ^
  *         |
  *         No search privilege, can't even look up file?
  *
  */
 int
 zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr)
 {
 	uint32_t dzp_working_mode = 0;
 	uint32_t zp_working_mode = 0;
 	int dzp_error, zp_error;
 	mode_t available_perms;
 	boolean_t dzpcheck_privs = B_TRUE;
 	boolean_t zpcheck_privs = B_TRUE;
 
 	/*
 	 * We want specific DELETE permissions to
 	 * take precedence over WRITE/EXECUTE.  We don't
 	 * want an ACL such as this to mess us up.
 	 * user:joe:write_data:deny,user:joe:delete:allow
 	 *
 	 * However, deny permissions may ultimately be overridden
 	 * by secpolicy_vnode_access().
 	 *
 	 * We will ask for all of the necessary permissions and then
 	 * look at the working modes from the directory and target object
 	 * to determine what was found.
 	 */
 
 	if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK))
 		return (SET_ERROR(EPERM));
 
 	/*
 	 * First row
 	 * If the directory permissions allow the delete, we are done.
 	 */
 	if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD,
 	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0)
 		return (0);
 
 	/*
 	 * If target object has delete permission then we are done
 	 */
 	if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode,
 	    &zpcheck_privs, B_FALSE, cr)) == 0)
 		return (0);
 
 	ASSERT(dzp_error && zp_error);
 
 	if (!dzpcheck_privs)
 		return (dzp_error);
 	if (!zpcheck_privs)
 		return (zp_error);
 
 	/*
 	 * Second row
 	 *
 	 * If directory returns EACCES then delete_child was denied
 	 * due to deny delete_child.  In this case send the request through
 	 * secpolicy_vnode_remove().  We don't use zfs_delete_final_check()
 	 * since that *could* allow the delete based on write/execute permission
 	 * and we want delete permissions to override write/execute.
 	 */
 
 	if (dzp_error == EACCES)
 		return (secpolicy_vnode_remove(ZTOV(dzp), cr));	/* XXXPJD: s/dzp/zp/ ? */
 
 	/*
 	 * Third Row
 	 * only need to see if we have write/execute on directory.
 	 */
 
 	dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA,
 	    &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr);
 
 	if (dzp_error != 0 && !dzpcheck_privs)
 		return (dzp_error);
 
 	/*
 	 * Fourth row
 	 */
 
 	available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE;
 	available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC;
 
 	return (zfs_delete_final_check(zp, dzp, available_perms, cr));
 
 }
 
 int
 zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp,
     znode_t *tzp, cred_t *cr)
 {
 	int add_perm;
 	int error;
 
 	if (szp->z_pflags & ZFS_AV_QUARANTINED)
 		return (SET_ERROR(EACCES));
 
 	add_perm = (ZTOV(szp)->v_type == VDIR) ?
 	    ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE;
 
 	/*
 	 * Rename permissions are combination of delete permission +
 	 * add file/subdir permission.
 	 *
 	 * BSD operating systems also require write permission
 	 * on the directory being moved from one parent directory
 	 * to another.
 	 */
 	if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) {
 		if (error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr))
 			return (error);
 	}
 
 	/*
 	 * first make sure we do the delete portion.
 	 *
 	 * If that succeeds then check for add_file/add_subdir permissions
 	 */
 
 	if (error = zfs_zaccess_delete(sdzp, szp, cr))
 		return (error);
 
 	/*
 	 * If we have a tzp, see if we can delete it?
 	 */
 	if (tzp) {
 		if (error = zfs_zaccess_delete(tdzp, tzp, cr))
 			return (error);
 	}
 
 	/*
 	 * Now check for add permissions
 	 */
 	error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr);
 
 	return (error);
 }
Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
===================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	(revision 324004)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c	(revision 324005)
@@ -1,2218 +1,2217 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org> */
 
 #ifdef _KERNEL
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/resource.h>
 #include <sys/mntent.h>
 #include <sys/u8_textprep.h>
 #include <sys/dsl_dataset.h>
 #include <sys/vfs.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/errno.h>
 #include <sys/unistd.h>
 #include <sys/atomic.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_rlock.h>
 #include <sys/zfs_fuid.h>
 #include <sys/dnode.h>
 #include <sys/fs/zfs.h>
 #include <sys/kidmap.h>
 #endif /* _KERNEL */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/refcount.h>
 #include <sys/stat.h>
 #include <sys/zap.h>
 #include <sys/zfs_znode.h>
 #include <sys/sa.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_stat.h>
 #include <sys/refcount.h>
 
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
 /* Used by fstat(1). */
 SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)");
 
 /*
  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
  * turned on when DEBUG is also defined.
  */
 #ifdef	DEBUG
 #define	ZNODE_STATS
 #endif	/* DEBUG */
 
 #ifdef	ZNODE_STATS
 #define	ZNODE_STAT_ADD(stat)			((stat)++)
 #else
 #define	ZNODE_STAT_ADD(stat)			/* nothing */
 #endif	/* ZNODE_STATS */
 
 /*
  * Functions needed for userland (ie: libzpool) are not put under
  * #ifdef_KERNEL; the rest of the functions have dependencies
  * (such as VFS logic) that will not compile easily in userland.
  */
 #ifdef _KERNEL
 /*
  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
  * be freed before it can be safely accessed.
  */
 krwlock_t zfsvfs_lock;
 
 static kmem_cache_t *znode_cache = NULL;
 
 /*ARGSUSED*/
 static void
 znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
 {
 	/*
 	 * We should never drop all dbuf refs without first clearing
 	 * the eviction callback.
 	 */
 	panic("evicting znode %p\n", user_ptr);
 }
 
 extern struct vop_vector zfs_vnodeops;
 extern struct vop_vector zfs_fifoops;
 extern struct vop_vector zfs_shareops;
 
 static int
 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
 {
 	znode_t *zp = buf;
 
 	POINTER_INVALIDATE(&zp->z_zfsvfs);
 
 	list_link_init(&zp->z_link_node);
 
 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
 	avl_create(&zp->z_range_avl, zfs_range_compare,
 	    sizeof (rl_t), offsetof(rl_t, r_node));
 
 	zp->z_acl_cached = NULL;
 	zp->z_vnode = NULL;
 	zp->z_moved = 0;
 	return (0);
 }
 
 /*ARGSUSED*/
 static void
 zfs_znode_cache_destructor(void *buf, void *arg)
 {
 	znode_t *zp = buf;
 
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
-	ASSERT(ZTOV(zp) == NULL);
-	vn_free(ZTOV(zp));
+	ASSERT3P(zp->z_vnode, ==, NULL);
 	ASSERT(!list_link_active(&zp->z_link_node));
 	mutex_destroy(&zp->z_acl_lock);
 	avl_destroy(&zp->z_range_avl);
 	mutex_destroy(&zp->z_range_lock);
 
 	ASSERT(zp->z_acl_cached == NULL);
 }
 
 #ifdef	ZNODE_STATS
 static struct {
 	uint64_t zms_zfsvfs_invalid;
 	uint64_t zms_zfsvfs_recheck1;
 	uint64_t zms_zfsvfs_unmounted;
 	uint64_t zms_zfsvfs_recheck2;
 	uint64_t zms_obj_held;
 	uint64_t zms_vnode_locked;
 	uint64_t zms_not_only_dnlc;
 } znode_move_stats;
 #endif	/* ZNODE_STATS */
 
 #ifdef illumos
 static void
 zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
 {
 	vnode_t *vp;
 
 	/* Copy fields. */
 	nzp->z_zfsvfs = ozp->z_zfsvfs;
 
 	/* Swap vnodes. */
 	vp = nzp->z_vnode;
 	nzp->z_vnode = ozp->z_vnode;
 	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
 	ZTOV(ozp)->v_data = ozp;
 	ZTOV(nzp)->v_data = nzp;
 
 	nzp->z_id = ozp->z_id;
 	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
 	ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
 	nzp->z_unlinked = ozp->z_unlinked;
 	nzp->z_atime_dirty = ozp->z_atime_dirty;
 	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
 	nzp->z_blksz = ozp->z_blksz;
 	nzp->z_seq = ozp->z_seq;
 	nzp->z_mapcnt = ozp->z_mapcnt;
 	nzp->z_gen = ozp->z_gen;
 	nzp->z_sync_cnt = ozp->z_sync_cnt;
 	nzp->z_is_sa = ozp->z_is_sa;
 	nzp->z_sa_hdl = ozp->z_sa_hdl;
 	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
 	nzp->z_links = ozp->z_links;
 	nzp->z_size = ozp->z_size;
 	nzp->z_pflags = ozp->z_pflags;
 	nzp->z_uid = ozp->z_uid;
 	nzp->z_gid = ozp->z_gid;
 	nzp->z_mode = ozp->z_mode;
 
 	/*
 	 * Since this is just an idle znode and kmem is already dealing with
 	 * memory pressure, release any cached ACL.
 	 */
 	if (ozp->z_acl_cached) {
 		zfs_acl_free(ozp->z_acl_cached);
 		ozp->z_acl_cached = NULL;
 	}
 
 	sa_set_userp(nzp->z_sa_hdl, nzp);
 
 	/*
 	 * Invalidate the original znode by clearing fields that provide a
 	 * pointer back to the znode. Set the low bit of the vfs pointer to
 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
 	 * subsequent callback.
 	 */
 	ozp->z_sa_hdl = NULL;
 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
 
 	/*
 	 * Mark the znode.
 	 */
 	nzp->z_moved = 1;
 	ozp->z_moved = (uint8_t)-1;
 }
 
 /*ARGSUSED*/
 static kmem_cbrc_t
 zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
 {
 	znode_t *ozp = buf, *nzp = newbuf;
 	zfsvfs_t *zfsvfs;
 	vnode_t *vp;
 
 	/*
 	 * The znode is on the file system's list of known znodes if the vfs
 	 * pointer is valid. We set the low bit of the vfs pointer when freeing
 	 * the znode to invalidate it, and the memory patterns written by kmem
 	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
 	 * created znode sets the vfs pointer last of all to indicate that the
 	 * znode is known and in a valid state to be moved by this function.
 	 */
 	zfsvfs = ozp->z_zfsvfs;
 	if (!POINTER_IS_VALID(zfsvfs)) {
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * Close a small window in which it's possible that the filesystem could
 	 * be unmounted and freed, and zfsvfs, though valid in the previous
 	 * statement, could point to unrelated memory by the time we try to
 	 * prevent the filesystem from being unmounted.
 	 */
 	rw_enter(&zfsvfs_lock, RW_WRITER);
 	if (zfsvfs != ozp->z_zfsvfs) {
 		rw_exit(&zfsvfs_lock);
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * If the znode is still valid, then so is the file system. We know that
 	 * no valid file system can be freed while we hold zfsvfs_lock, so we
 	 * can safely ensure that the filesystem is not and will not be
 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
 	 */
 	rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
 	if (zfsvfs->z_unmounted) {
 		ZFS_EXIT(zfsvfs);
 		rw_exit(&zfsvfs_lock);
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 	rw_exit(&zfsvfs_lock);
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	/*
 	 * Recheck the vfs pointer in case the znode was removed just before
 	 * acquiring the lock.
 	 */
 	if (zfsvfs != ozp->z_zfsvfs) {
 		mutex_exit(&zfsvfs->z_znodes_lock);
 		ZFS_EXIT(zfsvfs);
 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * At this point we know that as long as we hold z_znodes_lock, the
 	 * znode cannot be freed and fields within the znode can be safely
 	 * accessed. Now, prevent a race with zfs_zget().
 	 */
 	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
 		mutex_exit(&zfsvfs->z_znodes_lock);
 		ZFS_EXIT(zfsvfs);
 		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
 		return (KMEM_CBRC_LATER);
 	}
 
 	vp = ZTOV(ozp);
 	if (mutex_tryenter(&vp->v_lock) == 0) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
 		mutex_exit(&zfsvfs->z_znodes_lock);
 		ZFS_EXIT(zfsvfs);
 		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/* Only move znodes that are referenced _only_ by the DNLC. */
 	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
 		mutex_exit(&vp->v_lock);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
 		mutex_exit(&zfsvfs->z_znodes_lock);
 		ZFS_EXIT(zfsvfs);
 		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/*
 	 * The znode is known and in a valid state to move. We're holding the
 	 * locks needed to execute the critical section.
 	 */
 	zfs_znode_move_impl(ozp, nzp);
 	mutex_exit(&vp->v_lock);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
 
 	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
 	mutex_exit(&zfsvfs->z_znodes_lock);
 	ZFS_EXIT(zfsvfs);
 
 	return (KMEM_CBRC_YES);
 }
 #endif /* illumos */
 
 void
 zfs_znode_init(void)
 {
 	/*
 	 * Initialize zcache
 	 */
 	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
 	ASSERT(znode_cache == NULL);
 	znode_cache = kmem_cache_create("zfs_znode_cache",
 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
 	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
 	kmem_cache_set_move(znode_cache, zfs_znode_move);
 }
 
 void
 zfs_znode_fini(void)
 {
 #ifdef illumos
 	/*
 	 * Cleanup vfs & vnode ops
 	 */
 	zfs_remove_op_tables();
 #endif
 
 	/*
 	 * Cleanup zcache
 	 */
 	if (znode_cache)
 		kmem_cache_destroy(znode_cache);
 	znode_cache = NULL;
 	rw_destroy(&zfsvfs_lock);
 }
 
 #ifdef illumos
 struct vnodeops *zfs_dvnodeops;
 struct vnodeops *zfs_fvnodeops;
 struct vnodeops *zfs_symvnodeops;
 struct vnodeops *zfs_xdvnodeops;
 struct vnodeops *zfs_evnodeops;
 struct vnodeops *zfs_sharevnodeops;
 
 void
 zfs_remove_op_tables()
 {
 	/*
 	 * Remove vfs ops
 	 */
 	ASSERT(zfsfstype);
 	(void) vfs_freevfsops_by_type(zfsfstype);
 	zfsfstype = 0;
 
 	/*
 	 * Remove vnode ops
 	 */
 	if (zfs_dvnodeops)
 		vn_freevnodeops(zfs_dvnodeops);
 	if (zfs_fvnodeops)
 		vn_freevnodeops(zfs_fvnodeops);
 	if (zfs_symvnodeops)
 		vn_freevnodeops(zfs_symvnodeops);
 	if (zfs_xdvnodeops)
 		vn_freevnodeops(zfs_xdvnodeops);
 	if (zfs_evnodeops)
 		vn_freevnodeops(zfs_evnodeops);
 	if (zfs_sharevnodeops)
 		vn_freevnodeops(zfs_sharevnodeops);
 
 	zfs_dvnodeops = NULL;
 	zfs_fvnodeops = NULL;
 	zfs_symvnodeops = NULL;
 	zfs_xdvnodeops = NULL;
 	zfs_evnodeops = NULL;
 	zfs_sharevnodeops = NULL;
 }
 
 extern const fs_operation_def_t zfs_dvnodeops_template[];
 extern const fs_operation_def_t zfs_fvnodeops_template[];
 extern const fs_operation_def_t zfs_xdvnodeops_template[];
 extern const fs_operation_def_t zfs_symvnodeops_template[];
 extern const fs_operation_def_t zfs_evnodeops_template[];
 extern const fs_operation_def_t zfs_sharevnodeops_template[];
 
 int
 zfs_create_op_tables()
 {
 	int error;
 
 	/*
 	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
 	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
 	 * In this case we just return as the ops vectors are already set up.
 	 */
 	if (zfs_dvnodeops)
 		return (0);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
 	    &zfs_dvnodeops);
 	if (error)
 		return (error);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
 	    &zfs_fvnodeops);
 	if (error)
 		return (error);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
 	    &zfs_symvnodeops);
 	if (error)
 		return (error);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
 	    &zfs_xdvnodeops);
 	if (error)
 		return (error);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
 	    &zfs_evnodeops);
 	if (error)
 		return (error);
 
 	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
 	    &zfs_sharevnodeops);
 
 	return (error);
 }
 #endif	/* illumos */
 
 int
 zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
 {
 	zfs_acl_ids_t acl_ids;
 	vattr_t vattr;
 	znode_t *sharezp;
 	znode_t *zp;
 	int error;
 
 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
 	vattr.va_type = VDIR;
 	vattr.va_mode = S_IFDIR|0555;
 	vattr.va_uid = crgetuid(kcred);
 	vattr.va_gid = crgetgid(kcred);
 
 	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
 	sharezp->z_moved = 0;
 	sharezp->z_unlinked = 0;
 	sharezp->z_atime_dirty = 0;
 	sharezp->z_zfsvfs = zfsvfs;
 	sharezp->z_is_sa = zfsvfs->z_use_sa;
 
 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
 	    kcred, NULL, &acl_ids));
 	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, sharezp);
 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
 	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
 	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
 	zfsvfs->z_shares_dir = sharezp->z_id;
 
 	zfs_acl_ids_free(&acl_ids);
 	sa_handle_destroy(sharezp->z_sa_hdl);
 	kmem_cache_free(znode_cache, sharezp);
 
 	return (error);
 }
 
 /*
  * define a couple of values we need available
  * for both 64 and 32 bit environments.
  */
 #ifndef NBITSMINOR64
 #define	NBITSMINOR64	32
 #endif
 #ifndef MAXMAJ64
 #define	MAXMAJ64	0xffffffffUL
 #endif
 #ifndef	MAXMIN64
 #define	MAXMIN64	0xffffffffUL
 #endif
 
 /*
  * Create special expldev for ZFS private use.
  * Can't use standard expldev since it doesn't do
  * what we want.  The standard expldev() takes a
  * dev32_t in LP64 and expands it to a long dev_t.
  * We need an interface that takes a dev32_t in ILP32
  * and expands it to a long dev_t.
  */
 static uint64_t
 zfs_expldev(dev_t dev)
 {
 	return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev));
 }
 /*
  * Special cmpldev for ZFS private use.
  * Can't use standard cmpldev since it takes
  * a long dev_t and compresses it to dev32_t in
  * LP64.  We need to do a compaction of a long dev_t
  * to a dev32_t in ILP32.
  */
 dev_t
 zfs_cmpldev(uint64_t dev)
 {
 	return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64)));
 }
 
 static void
 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
 {
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
 
 	ASSERT(zp->z_sa_hdl == NULL);
 	ASSERT(zp->z_acl_cached == NULL);
 	if (sa_hdl == NULL) {
 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
 		    SA_HDL_SHARED, &zp->z_sa_hdl));
 	} else {
 		zp->z_sa_hdl = sa_hdl;
 		sa_set_userp(sa_hdl, zp);
 	}
 
 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
 
 	/*
 	 * Slap on VROOT if we are the root znode unless we are the root
 	 * node of a snapshot mounted under .zfs.
 	 */
 	if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs)
 		ZTOV(zp)->v_flag |= VROOT;
 
 	vn_exists(ZTOV(zp));
 }
 
 void
 zfs_znode_dmu_fini(znode_t *zp)
 {
 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
 	    zp->z_unlinked ||
 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
 
 	sa_handle_destroy(zp->z_sa_hdl);
 	zp->z_sa_hdl = NULL;
 }
 
 static void
 zfs_vnode_forget(vnode_t *vp)
 {
 
 	/* copied from insmntque_stddtr */
 	vp->v_data = NULL;
 	vp->v_op = &dead_vnodeops;
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * Construct a new znode/vnode and intialize.
  *
  * This does not do a call to dmu_set_user() that is
  * up to the caller to do, in case you don't want to
  * return the znode
  */
 static znode_t *
 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
     dmu_object_type_t obj_type, sa_handle_t *hdl)
 {
 	znode_t	*zp;
 	vnode_t *vp;
 	uint64_t mode;
 	uint64_t parent;
 	sa_bulk_attr_t bulk[9];
 	int count = 0;
 	int error;
 
 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 
 	KASSERT(curthread->td_vp_reserv > 0,
 	    ("zfs_znode_alloc: getnewvnode without any vnodes reserved"));
 	error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp);
 	if (error != 0) {
 		kmem_cache_free(znode_cache, zp);
 		return (NULL);
 	}
 	zp->z_vnode = vp;
 	vp->v_data = zp;
 
 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
 	zp->z_moved = 0;
 
 	/*
 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
 	 * the zfs_znode_move() callback.
 	 */
 	zp->z_sa_hdl = NULL;
 	zp->z_unlinked = 0;
 	zp->z_atime_dirty = 0;
 	zp->z_mapcnt = 0;
 	zp->z_id = db->db_object;
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
 
 	vp = ZTOV(zp);
 
 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
 	    &zp->z_links, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 	    &zp->z_atime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 	    &zp->z_uid, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
 	    &zp->z_gid, 8);
 
 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
 		if (hdl == NULL)
 			sa_handle_destroy(zp->z_sa_hdl);
 		zfs_vnode_forget(vp);
 		zp->z_vnode = NULL;
 		kmem_cache_free(znode_cache, zp);
 		return (NULL);
 	}
 
 	zp->z_mode = mode;
 
 	vp->v_type = IFTOVT((mode_t)mode);
 
 	switch (vp->v_type) {
 	case VDIR:
 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
 		break;
 #ifdef illumos
 	case VBLK:
 	case VCHR:
 		{
 			uint64_t rdev;
 			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
 			    &rdev, sizeof (rdev)) == 0);
 
 			vp->v_rdev = zfs_cmpldev(rdev);
 		}
 		break;
 #endif
 	case VFIFO:
 #ifdef illumos
 	case VSOCK:
 	case VDOOR:
 #endif
 		vp->v_op = &zfs_fifoops;
 		break;
 	case VREG:
 		if (parent == zfsvfs->z_shares_dir) {
 			ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
 			vp->v_op = &zfs_shareops;
 		}
 		break;
 #ifdef illumos
 	case VLNK:
 		vn_setops(vp, zfs_symvnodeops);
 		break;
 	default:
 		vn_setops(vp, zfs_evnodeops);
 		break;
 #endif
 	}
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
 	membar_producer();
 	/*
 	 * Everything else must be valid before assigning z_zfsvfs makes the
 	 * znode eligible for zfs_znode_move().
 	 */
 	zp->z_zfsvfs = zfsvfs;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	/*
 	 * Acquire vnode lock before making it available to the world.
 	 */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VN_LOCK_AREC(vp);
 	if (vp->v_type != VFIFO)
 		VN_LOCK_ASHARE(vp);
 
 #ifdef illumos
 	VFS_HOLD(zfsvfs->z_vfs);
 #endif
 	return (zp);
 }
 
 static uint64_t empty_xattr;
 static uint64_t pad[4];
 static zfs_acl_phys_t acl_phys;
 /*
  * Create a new DMU object to hold a zfs znode.
  *
  *	IN:	dzp	- parent directory for new znode
  *		vap	- file attributes for new znode
  *		tx	- dmu transaction id for zap operations
  *		cr	- credentials of caller
  *		flag	- flags:
  *			  IS_ROOT_NODE	- new object will be root
  *			  IS_XATTR	- new object is an attribute
  *		bonuslen - length of bonus buffer
  *		setaclp  - File/Dir initial ACL
  *		fuidp	 - Tracks fuid allocation.
  *
  *	OUT:	zpp	- allocated znode
  *
  */
 void
 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
 {
 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
 	uint64_t	mode, size, links, parent, pflags;
 	uint64_t	dzp_pflags = 0;
 	uint64_t	rdev = 0;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	dmu_buf_t	*db;
 	timestruc_t	now;
 	uint64_t	gen, obj;
 	int		err;
 	int		bonuslen;
 	sa_handle_t	*sa_hdl;
 	dmu_object_type_t obj_type;
 	sa_bulk_attr_t	sa_attrs[ZPL_END];
 	int		cnt = 0;
 	zfs_acl_locator_cb_t locate = { 0 };
 
 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
 
 	if (zfsvfs->z_replay) {
 		obj = vap->va_nodeid;
 		now = vap->va_ctime;		/* see zfs_replay_create() */
 		gen = vap->va_nblocks;		/* ditto */
 	} else {
 		obj = 0;
 		vfs_timestamp(&now);
 		gen = dmu_tx_get_txg(tx);
 	}
 
 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
 	bonuslen = (obj_type == DMU_OT_SA) ?
 	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
 
 	/*
 	 * Create a new DMU object.
 	 */
 	/*
 	 * There's currently no mechanism for pre-reading the blocks that will
 	 * be needed to allocate a new object, so we accept the small chance
 	 * that there will be an i/o error and we will fail one of the
 	 * assertions below.
 	 */
 	if (vap->va_type == VDIR) {
 		if (zfsvfs->z_replay) {
 			VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    obj_type, bonuslen, tx));
 		} else {
 			obj = zap_create_norm(zfsvfs->z_os,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    obj_type, bonuslen, tx);
 		}
 	} else {
 		if (zfsvfs->z_replay) {
 			VERIFY0(dmu_object_claim(zfsvfs->z_os, obj,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    obj_type, bonuslen, tx));
 		} else {
 			obj = dmu_object_alloc(zfsvfs->z_os,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    obj_type, bonuslen, tx);
 		}
 	}
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
 	VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
 
 	/*
 	 * If this is the root, fix up the half-initialized parent pointer
 	 * to reference the just-allocated physical data area.
 	 */
 	if (flag & IS_ROOT_NODE) {
 		dzp->z_id = obj;
 	} else {
 		dzp_pflags = dzp->z_pflags;
 	}
 
 	/*
 	 * If parent is an xattr, so am I.
 	 */
 	if (dzp_pflags & ZFS_XATTR) {
 		flag |= IS_XATTR;
 	}
 
 	if (zfsvfs->z_use_fuids)
 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
 	else
 		pflags = 0;
 
 	if (vap->va_type == VDIR) {
 		size = 2;		/* contents ("." and "..") */
 		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
 	} else {
 		size = links = 0;
 	}
 
 	if (vap->va_type == VBLK || vap->va_type == VCHR) {
 		rdev = zfs_expldev(vap->va_rdev);
 	}
 
 	parent = dzp->z_id;
 	mode = acl_ids->z_mode;
 	if (flag & IS_XATTR)
 		pflags |= ZFS_XATTR;
 
 	/*
 	 * No execs denied will be deterimed when zfs_mode_compute() is called.
 	 */
 	pflags |= acl_ids->z_aclp->z_hints &
 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
 
 	ZFS_TIME_ENCODE(&now, crtime);
 	ZFS_TIME_ENCODE(&now, ctime);
 
 	if (vap->va_mask & AT_ATIME) {
 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
 	} else {
 		ZFS_TIME_ENCODE(&now, atime);
 	}
 
 	if (vap->va_mask & AT_MTIME) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 	} else {
 		ZFS_TIME_ENCODE(&now, mtime);
 	}
 
 	/* Now add in all of the "SA" attributes */
 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
 	    &sa_hdl));
 
 	/*
 	 * Setup the array of attributes to be replaced/set on the new file
 	 *
 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
 	 * in the old znode_phys_t format.  Don't change this ordering
 	 */
 
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
 		    NULL, &atime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
 		    NULL, &mtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
 		    NULL, &ctime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
 		    NULL, &crtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
 		    NULL, &gen, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
 		    NULL, &mode, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
 		    NULL, &size, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
 		    NULL, &parent, 8);
 	} else {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
 		    NULL, &mode, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
 		    NULL, &size, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
 		    NULL, &gen, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
 		    &acl_ids->z_fuid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
 		    &acl_ids->z_fgid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
 		    NULL, &parent, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &pflags, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
 		    NULL, &atime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
 		    NULL, &mtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
 		    NULL, &ctime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
 		    NULL, &crtime, 16);
 	}
 
 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
 
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
 		    &empty_xattr, 8);
 	}
 	if (obj_type == DMU_OT_ZNODE ||
 	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
 		    NULL, &rdev, 8);
 
 	}
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &pflags, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
 		    &acl_ids->z_fuid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
 		    &acl_ids->z_fgid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
 		    sizeof (uint64_t) * 4);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
 		    &acl_phys, sizeof (zfs_acl_phys_t));
 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
 		    &acl_ids->z_aclp->z_acl_count, 8);
 		locate.cb_aclp = acl_ids->z_aclp;
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
 		    zfs_acl_data_locator, &locate,
 		    acl_ids->z_aclp->z_acl_bytes);
 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
 		    acl_ids->z_fuid, acl_ids->z_fgid);
 	}
 
 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
 
 	if (!(flag & IS_ROOT_NODE)) {
 		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
 		ASSERT(*zpp != NULL);
 	} else {
 		/*
 		 * If we are creating the root node, the "parent" we
 		 * passed in is the znode for the root.
 		 */
 		*zpp = dzp;
 
 		(*zpp)->z_sa_hdl = sa_hdl;
 	}
 
 	(*zpp)->z_pflags = pflags;
 	(*zpp)->z_mode = mode;
 
 	if (vap->va_mask & AT_XVATTR)
 		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
 
 	if (obj_type == DMU_OT_ZNODE ||
 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
 		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
 	}
 	if (!(flag & IS_ROOT_NODE)) {
 		vnode_t *vp;
 
 		vp = ZTOV(*zpp);
 		vp->v_vflag |= VV_FORCEINSMQ;
 		err = insmntque(vp, zfsvfs->z_vfs);
 		vp->v_vflag &= ~VV_FORCEINSMQ;
 		KASSERT(err == 0, ("insmntque() failed: error %d", err));
 	}
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
 }
 
 /*
  * Update in-core attributes.  It is assumed the caller will be doing an
  * sa_bulk_update to push the changes out.
  */
 void
 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 {
 	xoptattr_t *xoap;
 
 	xoap = xva_getxoptattr(xvap);
 	ASSERT(xoap);
 
 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
 		uint64_t times[2];
 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
 		    &times, sizeof (times), tx);
 		XVA_SET_RTN(xvap, XAT_CREATETIME);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_READONLY);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_HIDDEN);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_SYSTEM);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NODUMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_OPAQUE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
 		zfs_sa_set_scanstamp(zp, xvap, tx);
 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_REPARSE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_OFFLINE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_SPARSE);
 	}
 }
 
 int
 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
 {
 	dmu_object_info_t doi;
 	dmu_buf_t	*db;
 	znode_t		*zp;
 	vnode_t		*vp;
 	sa_handle_t	*hdl;
 	struct thread	*td;
 	int locked;
 	int err;
 
 	td = curthread;
 	getnewvnode_reserve(1);
 again:
 	*zpp = NULL;
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
 
 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		getnewvnode_drop_reserve();
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	if (doi.doi_bonus_type != DMU_OT_SA &&
 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 #ifdef __FreeBSD__
 		getnewvnode_drop_reserve();
 #endif
 		return (SET_ERROR(EINVAL));
 	}
 
 	hdl = dmu_buf_get_user(db);
 	if (hdl != NULL) {
 		zp  = sa_get_userdata(hdl);
 
 		/*
 		 * Since "SA" does immediate eviction we
 		 * should never find a sa handle that doesn't
 		 * know about the znode.
 		 */
 		ASSERT3P(zp, !=, NULL);
 		ASSERT3U(zp->z_id, ==, obj_num);
 		*zpp = zp;
 		vp = ZTOV(zp);
 
 		/* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */
 		VN_HOLD(vp);
 
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 
 		locked = VOP_ISLOCKED(vp);
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_DOOMED) != 0 &&
 		    locked != LK_EXCLUSIVE) {
 			/*
 			 * The vnode is doomed and this thread doesn't
 			 * hold the exclusive lock on it, so the vnode
 			 * must be being reclaimed by another thread.
 			 * Otherwise the doomed vnode is being reclaimed
 			 * by this thread and zfs_zget is called from
 			 * ZIL internals.
 			 */
 			VI_UNLOCK(vp);
 
 			/*
 			 * XXX vrele() locks the vnode when the last reference
 			 * is dropped.  Although in this case the vnode is
 			 * doomed / dead and so no inactivation is required,
 			 * the vnode lock is still acquired.  That could result
 			 * in a LOR with z_teardown_lock if another thread holds
 			 * the vnode's lock and tries to take z_teardown_lock.
 			 * But that is only possible if the other thread peforms
 			 * a ZFS vnode operation on the vnode.  That either
 			 * should not happen if the vnode is dead or the thread
 			 * should also have a refrence to the vnode and thus
 			 * our reference is not last.
 			 */
 			VN_RELE(vp);
 			goto again;
 		}
 		VI_UNLOCK(vp);
 		getnewvnode_drop_reserve();
 		return (0);
 	}
 
 	/*
 	 * Not found create new znode/vnode
 	 * but only if file exists.
 	 *
 	 * There is a small window where zfs_vget() could
 	 * find this object while a file create is still in
 	 * progress.  This is checked for in zfs_znode_alloc()
 	 *
 	 * if zfs_znode_alloc() fails it will drop the hold on the
 	 * bonus buffer.
 	 */
 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
 	    doi.doi_bonus_type, NULL);
 	if (zp == NULL) {
 		err = SET_ERROR(ENOENT);
 	} else {
 		*zpp = zp;
 	}
 	if (err == 0) {
 		vnode_t *vp = ZTOV(zp);
 
 		err = insmntque(vp, zfsvfs->z_vfs);
 		if (err == 0) {
 			vp->v_hash = obj_num;
 			VOP_UNLOCK(vp, 0);
 		} else {
 			zp->z_vnode = NULL;
 			zfs_znode_dmu_fini(zp);
 			zfs_znode_free(zp);
 			*zpp = NULL;
 		}
 	}
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 	getnewvnode_drop_reserve();
 	return (err);
 }
 
 int
 zfs_rezget(znode_t *zp)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	vnode_t *vp;
 	uint64_t obj_num = zp->z_id;
 	uint64_t mode, size;
 	sa_bulk_attr_t bulk[8];
 	int err;
 	int count = 0;
 	uint64_t gen;
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
 
 	mutex_enter(&zp->z_acl_lock);
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 
 	mutex_exit(&zp->z_acl_lock);
 	ASSERT(zp->z_sa_hdl == NULL);
 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	if (doi.doi_bonus_type != DMU_OT_SA &&
 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (SET_ERROR(EINVAL));
 	}
 
 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
 	size = zp->z_size;
 
 	/* reload cached values */
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
 	    &gen, sizeof (gen));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, sizeof (zp->z_size));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
 	    &zp->z_links, sizeof (zp->z_links));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 	    &zp->z_atime, sizeof (zp->z_atime));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 	    &zp->z_uid, sizeof (zp->z_uid));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
 	    &zp->z_gid, sizeof (zp->z_gid));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 	    &mode, sizeof (mode));
 
 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
 		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (SET_ERROR(EIO));
 	}
 
 	zp->z_mode = mode;
 
 	if (gen != zp->z_gen) {
 		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (SET_ERROR(EIO));
 	}
 
 	/*
 	 * It is highly improbable but still quite possible that two
 	 * objects in different datasets are created with the same
 	 * object numbers and in transaction groups with the same
 	 * numbers.  znodes corresponding to those objects would
 	 * have the same z_id and z_gen, but their other attributes
 	 * may be different.
 	 * zfs recv -F may replace one of such objects with the other.
 	 * As a result file properties recorded in the replaced
 	 * object's vnode may no longer match the received object's
 	 * properties.  At present the only cached property is the
 	 * files type recorded in v_type.
 	 * So, handle this case by leaving the old vnode and znode
 	 * disassociated from the actual object.  A new vnode and a
 	 * znode will be created if the object is accessed
 	 * (e.g. via a look-up).  The old vnode and znode will be
 	 * recycled when the last vnode reference is dropped.
 	 */
 	vp = ZTOV(zp);
 	if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) {
 		zfs_znode_dmu_fini(zp);
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 		return (EIO);
 	}
 
 	zp->z_unlinked = (zp->z_links == 0);
 	zp->z_blksz = doi.doi_data_block_size;
 	vn_pages_remove(vp, 0, 0);
 	if (zp->z_size != size)
 		vnode_pager_setsize(vp, zp->z_size);
 
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
 
 	return (0);
 }
 
 void
 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	objset_t *os = zfsvfs->z_os;
 	uint64_t obj = zp->z_id;
 	uint64_t acl_obj = zfs_external_acl(zp);
 
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
 	if (acl_obj) {
 		VERIFY(!zp->z_is_sa);
 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
 	}
 	VERIFY(0 == dmu_object_free(os, obj, tx));
 	zfs_znode_dmu_fini(zp);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
 	zfs_znode_free(zp);
 }
 
 void
 zfs_zinactive(znode_t *zp)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	uint64_t z_id = zp->z_id;
 
 	ASSERT(zp->z_sa_hdl);
 
 	/*
 	 * Don't allow a zfs_zget() while were trying to release this znode
 	 */
 	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
 
 	/*
 	 * If this was the last reference to a file with no links,
 	 * remove the file from the file system.
 	 */
 	if (zp->z_unlinked) {
 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
 		zfs_rmnode(zp);
 		return;
 	}
 
 	zfs_znode_dmu_fini(zp);
 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
 	zfs_znode_free(zp);
 }
 
 void
 zfs_znode_free(znode_t *zp)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ASSERT(zp->z_sa_hdl == NULL);
 	zp->z_vnode = NULL;
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	POINTER_INVALIDATE(&zp->z_zfsvfs);
 	list_remove(&zfsvfs->z_all_znodes, zp);
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 
 	kmem_cache_free(znode_cache, zp);
 
 #ifdef illumos
 	VFS_RELE(zfsvfs->z_vfs);
 #endif
 }
 
 void
 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
     uint64_t ctime[2], boolean_t have_tx)
 {
 	timestruc_t	now;
 
 	vfs_timestamp(&now);
 
 	if (have_tx) {	/* will sa_bulk_update happen really soon? */
 		zp->z_atime_dirty = 0;
 		zp->z_seq++;
 	} else {
 		zp->z_atime_dirty = 1;
 	}
 
 	if (flag & AT_ATIME) {
 		ZFS_TIME_ENCODE(&now, zp->z_atime);
 	}
 
 	if (flag & AT_MTIME) {
 		ZFS_TIME_ENCODE(&now, mtime);
 		if (zp->z_zfsvfs->z_use_fuids) {
 			zp->z_pflags |= (ZFS_ARCHIVE |
 			    ZFS_AV_MODIFIED);
 		}
 	}
 
 	if (flag & AT_CTIME) {
 		ZFS_TIME_ENCODE(&now, ctime);
 		if (zp->z_zfsvfs->z_use_fuids)
 			zp->z_pflags |= ZFS_ARCHIVE;
 	}
 }
 
 /*
  * Grow the block size for a file.
  *
  *	IN:	zp	- znode of file to free data in.
  *		size	- requested block size
  *		tx	- open transaction.
  *
  * NOTE: this function assumes that the znode is write locked.
  */
 void
 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
 {
 	int		error;
 	u_longlong_t	dummy;
 
 	if (size <= zp->z_blksz)
 		return;
 	/*
 	 * If the file size is already greater than the current blocksize,
 	 * we will not grow.  If there is more than one block in a file,
 	 * the blocksize cannot change.
 	 */
 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
 		return;
 
 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
 	    size, 0, tx);
 
 	if (error == ENOTSUP)
 		return;
 	ASSERT0(error);
 
 	/* What blocksize did we actually get? */
 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
 }
 
 #ifdef illumos
 /*
  * This is a dummy interface used when pvn_vplist_dirty() should *not*
  * be calling back into the fs for a putpage().  E.g.: when truncating
  * a file, the pages being "thrown away* don't need to be written out.
  */
 /* ARGSUSED */
 static int
 zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
     int flags, cred_t *cr)
 {
 	ASSERT(0);
 	return (0);
 }
 #endif
 
 /*
  * Increase the file length
  *
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_extend(znode_t *zp, uint64_t end)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	dmu_tx_t *tx;
 	rl_t *rl;
 	uint64_t newblksz;
 	int error;
 
 	/*
 	 * We will change zp_size, lock the whole file.
 	 */
 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (end <= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	if (end > zp->z_blksz &&
 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
 		/*
 		 * We are growing the file past the current block size.
 		 */
 		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
 			/*
 			 * File's blocksize is already larger than the
 			 * "recordsize" property.  Only let it grow to
 			 * the next power of 2.
 			 */
 			ASSERT(!ISP2(zp->z_blksz));
 			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
 		} else {
 			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
 		}
 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
 	} else {
 		newblksz = 0;
 	}
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		zfs_range_unlock(rl);
 		return (error);
 	}
 
 	if (newblksz)
 		zfs_grow_blocksize(zp, newblksz, tx);
 
 	zp->z_size = end;
 
 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
 	    &zp->z_size, sizeof (zp->z_size), tx));
 
 	vnode_pager_setsize(ZTOV(zp), end);
 
 	zfs_range_unlock(rl);
 
 	dmu_tx_commit(tx);
 
 	return (0);
 }
 
 /*
  * Free space in a file.
  *
  *	IN:	zp	- znode of file to free data in.
  *		off	- start of section to free.
  *		len	- length of section to free.
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	rl_t *rl;
 	int error;
 
 	/*
 	 * Lock the range being freed.
 	 */
 	rl = zfs_range_lock(zp, off, len, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (off >= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
 
 	if (off + len > zp->z_size)
 		len = zp->z_size - off;
 
 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
 
 	if (error == 0) {
 		/*
 		 * In FreeBSD we cannot free block in the middle of a file,
 		 * but only at the end of a file, so this code path should
 		 * never happen.
 		 */
 		vnode_pager_setsize(ZTOV(zp), off);
 	}
 
 	zfs_range_unlock(rl);
 
 	return (error);
 }
 
 /*
  * Truncate a file
  *
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file.
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_trunc(znode_t *zp, uint64_t end)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	vnode_t *vp = ZTOV(zp);
 	dmu_tx_t *tx;
 	rl_t *rl;
 	int error;
 	sa_bulk_attr_t bulk[2];
 	int count = 0;
 
 	/*
 	 * We will change zp_size, lock the whole file.
 	 */
 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (end >= zp->z_size) {
 		zfs_range_unlock(rl);
 		return (0);
 	}
 
 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
 	if (error) {
 		zfs_range_unlock(rl);
 		return (error);
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		zfs_range_unlock(rl);
 		return (error);
 	}
 
 	zp->z_size = end;
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
 	    NULL, &zp->z_size, sizeof (zp->z_size));
 
 	if (end == 0) {
 		zp->z_pflags &= ~ZFS_SPARSE;
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &zp->z_pflags, 8);
 	}
 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Clear any mapped pages in the truncated region.  This has to
 	 * happen outside of the transaction to avoid the possibility of
 	 * a deadlock with someone trying to push a page that we are
 	 * about to invalidate.
 	 */
 	vnode_pager_setsize(vp, end);
 
 	zfs_range_unlock(rl);
 
 	return (0);
 }
 
 /*
  * Free space in a file
  *
  *	IN:	zp	- znode of file to free data in.
  *		off	- start of range
  *		len	- end of range (0 => EOF)
  *		flag	- current file open mode flags.
  *		log	- TRUE if this action should be logged
  *
  *	RETURN:	0 on success, error code on failure
  */
 int
 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
 {
 	vnode_t *vp = ZTOV(zp);
 	dmu_tx_t *tx;
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	zilog_t *zilog = zfsvfs->z_log;
 	uint64_t mode;
 	uint64_t mtime[2], ctime[2];
 	sa_bulk_attr_t bulk[3];
 	int count = 0;
 	int error;
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
 	    sizeof (mode))) != 0)
 		return (error);
 
 	if (off > zp->z_size) {
 		error =  zfs_extend(zp, off+len);
 		if (error == 0 && log)
 			goto log;
 		else
 			return (error);
 	}
 
 	/*
 	 * Check for any locks in the region to be freed.
 	 */
 
 	if (MANDLOCK(vp, (mode_t)mode)) {
 		uint64_t length = (len ? len : zp->z_size - off);
 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
 			return (error);
 	}
 
 	if (len == 0) {
 		error = zfs_trunc(zp, off);
 	} else {
 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
 		    off + len > zp->z_size)
 			error = zfs_extend(zp, off+len);
 	}
 	if (error || !log)
 		return (error);
 log:
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 	    NULL, &zp->z_pflags, 8);
 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 	ASSERT(error == 0);
 
 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
 
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 void
 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 {
 	uint64_t	moid, obj, sa_obj, version;
 	uint64_t	sense = ZFS_CASE_SENSITIVE;
 	uint64_t	norm = 0;
 	nvpair_t	*elem;
 	int		error;
 	int		i;
 	znode_t		*rootzp = NULL;
 	zfsvfs_t	*zfsvfs;
 	vattr_t		vattr;
 	znode_t		*zp;
 	zfs_acl_ids_t	acl_ids;
 
 	/*
 	 * First attempt to create master node.
 	 */
 	/*
 	 * In an empty objset, there are no blocks to read and thus
 	 * there can be no i/o errors (which we assert below).
 	 */
 	moid = MASTER_NODE_OBJ;
 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
 	    DMU_OT_NONE, 0, tx);
 	ASSERT(error == 0);
 
 	/*
 	 * Set starting attributes.
 	 */
 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
 		/* For the moment we expect all zpl props to be uint64_ts */
 		uint64_t val;
 		char *name;
 
 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
 		name = nvpair_name(elem);
 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
 			if (val < version)
 				version = val;
 		} else {
 			error = zap_update(os, moid, name, 8, 1, &val, tx);
 		}
 		ASSERT(error == 0);
 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
 			norm = val;
 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
 			sense = val;
 	}
 	ASSERT(version != 0);
 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
 
 	/*
 	 * Create zap object used for SA attribute registration
 	 */
 
 	if (version >= ZPL_VERSION_SA) {
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT(error == 0);
 	} else {
 		sa_obj = 0;
 	}
 	/*
 	 * Create a delete queue.
 	 */
 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
 
 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
 	ASSERT(error == 0);
 
 	/*
 	 * Create root znode.  Create minimal znode/vnode/zfsvfs
 	 * to allow zfs_mknode to work.
 	 */
 	VATTR_NULL(&vattr);
 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
 	vattr.va_type = VDIR;
 	vattr.va_mode = S_IFDIR|0755;
 	vattr.va_uid = crgetuid(cr);
 	vattr.va_gid = crgetgid(cr);
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
 	rootzp->z_moved = 0;
 	rootzp->z_unlinked = 0;
 	rootzp->z_atime_dirty = 0;
 	rootzp->z_is_sa = USE_SA(version, os);
 
 	zfsvfs->z_os = os;
 	zfsvfs->z_parent = zfsvfs;
 	zfsvfs->z_version = version;
 	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
 	zfsvfs->z_use_sa = USE_SA(version, os);
 	zfsvfs->z_norm = norm;
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zfsvfs->z_attr_table);
 
 	ASSERT(error == 0);
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	rootzp->z_zfsvfs = zfsvfs;
 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
 	    cr, NULL, &acl_ids));
 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, rootzp);
 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
 	ASSERT(error == 0);
 	zfs_acl_ids_free(&acl_ids);
 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
 
 	sa_handle_destroy(rootzp->z_sa_hdl);
 	kmem_cache_free(znode_cache, rootzp);
 
 	/*
 	 * Create shares directory
 	 */
 
 	error = zfs_create_share_dir(zfsvfs, tx);
 
 	ASSERT(error == 0);
 
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 #endif /* _KERNEL */
 
 static int
 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
 {
 	uint64_t sa_obj = 0;
 	int error;
 
 	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
 	if (error != 0 && error != ENOENT)
 		return (error);
 
 	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
 	return (error);
 }
 
 static int
 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
     dmu_buf_t **db, void *tag)
 {
 	dmu_object_info_t doi;
 	int error;
 
 	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
 		return (error);
 
 	dmu_object_info_from_db(*db, &doi);
 	if ((doi.doi_bonus_type != DMU_OT_SA &&
 	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
 	    doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
 		sa_buf_rele(*db, tag);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
 	if (error != 0) {
 		sa_buf_rele(*db, tag);
 		return (error);
 	}
 
 	return (0);
 }
 
 void
 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
 {
 	sa_handle_destroy(hdl);
 	sa_buf_rele(db, tag);
 }
 
 /*
  * Given an object number, return its parent object number and whether
  * or not the object is an extended attribute directory.
  */
 static int
 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
     uint64_t *pobjp, int *is_xattrdir)
 {
 	uint64_t parent;
 	uint64_t pflags;
 	uint64_t mode;
 	uint64_t parent_mode;
 	sa_bulk_attr_t bulk[3];
 	sa_handle_t *sa_hdl;
 	dmu_buf_t *sa_db;
 	int count = 0;
 	int error;
 
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
 	    &parent, sizeof (parent));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
 	    &pflags, sizeof (pflags));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
 	    &mode, sizeof (mode));
 
 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
 		return (error);
 
 	/*
 	 * When a link is removed its parent pointer is not changed and will
 	 * be invalid.  There are two cases where a link is removed but the
 	 * file stays around, when it goes to the delete queue and when there
 	 * are additional links.
 	 */
 	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
 	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
 	if (error != 0)
 		return (error);
 
 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
 
 	/*
 	 * Extended attributes can be applied to files, directories, etc.
 	 * Otherwise the parent must be a directory.
 	 */
 	if (!*is_xattrdir && !S_ISDIR(parent_mode))
 		return (SET_ERROR(EINVAL));
 
 	*pobjp = parent;
 
 	return (0);
 }
 
 /*
  * Given an object number, return some zpl level statistics
  */
 static int
 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
     zfs_stat_t *sb)
 {
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
 	    &sb->zs_mode, sizeof (sb->zs_mode));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
 	    &sb->zs_gen, sizeof (sb->zs_gen));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
 	    &sb->zs_links, sizeof (sb->zs_links));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
 	    &sb->zs_ctime, sizeof (sb->zs_ctime));
 
 	return (sa_bulk_lookup(hdl, bulk, count));
 }
 
 static int
 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
     sa_attr_type_t *sa_table, char *buf, int len)
 {
 	sa_handle_t *sa_hdl;
 	sa_handle_t *prevhdl = NULL;
 	dmu_buf_t *prevdb = NULL;
 	dmu_buf_t *sa_db = NULL;
 	char *path = buf + len - 1;
 	int error;
 
 	*path = '\0';
 	sa_hdl = hdl;
 
 	for (;;) {
 		uint64_t pobj;
 		char component[MAXNAMELEN + 2];
 		size_t complen;
 		int is_xattrdir;
 
 		if (prevdb)
 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
 
 		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
 		    &is_xattrdir)) != 0)
 			break;
 
 		if (pobj == obj) {
 			if (path[0] != '/')
 				*--path = '/';
 			break;
 		}
 
 		component[0] = '/';
 		if (is_xattrdir) {
 			(void) sprintf(component + 1, "<xattrdir>");
 		} else {
 			error = zap_value_search(osp, pobj, obj,
 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
 			if (error != 0)
 				break;
 		}
 
 		complen = strlen(component);
 		path -= complen;
 		ASSERT(path >= buf);
 		bcopy(component, path, complen);
 		obj = pobj;
 
 		if (sa_hdl != hdl) {
 			prevhdl = sa_hdl;
 			prevdb = sa_db;
 		}
 		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
 		if (error != 0) {
 			sa_hdl = prevhdl;
 			sa_db = prevdb;
 			break;
 		}
 	}
 
 	if (sa_hdl != NULL && sa_hdl != hdl) {
 		ASSERT(sa_db != NULL);
 		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
 	}
 
 	if (error == 0)
 		(void) memmove(buf, path, buf + len - path);
 
 	return (error);
 }
 
 int
 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
 {
 	sa_attr_type_t *sa_table;
 	sa_handle_t *hdl;
 	dmu_buf_t *db;
 	int error;
 
 	error = zfs_sa_setup(osp, &sa_table);
 	if (error != 0)
 		return (error);
 
 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
 
 	zfs_release_sa_handle(hdl, db, FTAG);
 	return (error);
 }
 
 int
 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
     char *buf, int len)
 {
 	char *path = buf + len - 1;
 	sa_attr_type_t *sa_table;
 	sa_handle_t *hdl;
 	dmu_buf_t *db;
 	int error;
 
 	*path = '\0';
 
 	error = zfs_sa_setup(osp, &sa_table);
 	if (error != 0)
 		return (error);
 
 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
 	if (error != 0) {
 		zfs_release_sa_handle(hdl, db, FTAG);
 		return (error);
 	}
 
 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
 
 	zfs_release_sa_handle(hdl, db, FTAG);
 	return (error);
 }
 
 #ifdef _KERNEL
 int
 zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf)
 {
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	uint64_t parent;
 	int is_xattrdir;
 	int err;
 
 	/* Extended attributes should not be visible as regular files. */
 	if ((zp->z_pflags & ZFS_XATTR) != 0)
 		return (SET_ERROR(EINVAL));
 
 	err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table,
 	    &parent, &is_xattrdir);
 	if (err != 0)
 		return (err);
 	ASSERT0(is_xattrdir);
 
 	/* No name as this is a root object. */
 	if (parent == zp->z_id)
 		return (SET_ERROR(EINVAL));
 
 	err = zap_value_search(zfsvfs->z_os, parent, zp->z_id,
 	    ZFS_DIRENT_OBJ(-1ULL), buf);
 	if (err != 0)
 		return (err);
 	err = zfs_zget(zfsvfs, parent, dzpp);
 	return (err);
 }
 #endif /* _KERNEL */
Index: stable/10
===================================================================
--- stable/10	(revision 324004)
+++ stable/10	(revision 324005)

Property changes on: stable/10
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r323479,323491