diff --git a/config/kernel-inode-times.m4 b/config/kernel-inode-times.m4
index aae95abf1720..4d861596ed0b 100644
--- a/config/kernel-inode-times.m4
+++ b/config/kernel-inode-times.m4
@@ -1,93 +1,171 @@
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
 
 	dnl #
 	dnl # 5.6 API change
 	dnl # timespec64_trunc() replaced by timestamp_truncate() interface.
 	dnl #
 	ZFS_LINUX_TEST_SRC([timestamp_truncate], [
 		#include <linux/fs.h>
 	],[
 		struct timespec64 ts;
 		struct inode ip;
 
 		memset(&ts, 0, sizeof(ts));
 		ts = timestamp_truncate(ts, &ip);
 	])
 
 	dnl #
 	dnl # 4.18 API change
 	dnl # i_atime, i_mtime, and i_ctime changed from timespec to timespec64.
 	dnl #
 	ZFS_LINUX_TEST_SRC([inode_times], [
 		#include <linux/fs.h>
 	],[
 		struct inode ip;
 		struct timespec ts;
 
 		memset(&ip, 0, sizeof(ip));
 		ts = ip.i_mtime;
 	])
 
 	dnl #
 	dnl # 6.6 API change
 	dnl # i_ctime no longer directly accessible, must use
 	dnl # inode_get_ctime(ip), inode_set_ctime*(ip) to
 	dnl # read/write.
 	dnl #
 	ZFS_LINUX_TEST_SRC([inode_get_ctime], [
 		#include <linux/fs.h>
 	],[
 		struct inode ip;
 
 		memset(&ip, 0, sizeof(ip));
 		inode_get_ctime(&ip);
 	])
 
 	ZFS_LINUX_TEST_SRC([inode_set_ctime_to_ts], [
 		#include <linux/fs.h>
 	],[
 		struct inode ip;
 		struct timespec64 ts = {0};
 
 		memset(&ip, 0, sizeof(ip));
 		inode_set_ctime_to_ts(&ip, ts);
 	])
+
+	dnl #
+	dnl # 6.7 API change
+	dnl # i_atime/i_mtime no longer directly accessible, must use
+	dnl # inode_get_mtime(ip), inode_set_mtime*(ip) to
+	dnl # read/write.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_get_atime], [
+		#include <linux/fs.h>
+	],[
+		struct inode ip;
+
+		memset(&ip, 0, sizeof(ip));
+		inode_get_atime(&ip);
+	])
+	ZFS_LINUX_TEST_SRC([inode_get_mtime], [
+		#include <linux/fs.h>
+	],[
+		struct inode ip;
+
+		memset(&ip, 0, sizeof(ip));
+		inode_get_mtime(&ip);
+	])
+
+	ZFS_LINUX_TEST_SRC([inode_set_atime_to_ts], [
+		#include <linux/fs.h>
+	],[
+		struct inode ip;
+		struct timespec64 ts = {0};
+
+		memset(&ip, 0, sizeof(ip));
+		inode_set_atime_to_ts(&ip, ts);
+	])
+	ZFS_LINUX_TEST_SRC([inode_set_mtime_to_ts], [
+		#include <linux/fs.h>
+	],[
+		struct inode ip;
+		struct timespec64 ts = {0};
+
+		memset(&ip, 0, sizeof(ip));
+		inode_set_mtime_to_ts(&ip, ts);
+	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
 	AC_MSG_CHECKING([whether timestamp_truncate() exists])
 	ZFS_LINUX_TEST_RESULT([timestamp_truncate], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_INODE_TIMESTAMP_TRUNCATE, 1,
 		    [timestamp_truncate() exists])
 	],[
 		AC_MSG_RESULT(no)
 	])
 
 	AC_MSG_CHECKING([whether inode->i_*time's are timespec64])
 	ZFS_LINUX_TEST_RESULT([inode_times], [
 		AC_MSG_RESULT(no)
 	],[
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1,
 		    [inode->i_*time's are timespec64])
 	])
 
 	AC_MSG_CHECKING([whether inode_get_ctime() exists])
 	ZFS_LINUX_TEST_RESULT([inode_get_ctime], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_INODE_GET_CTIME, 1,
 		    [inode_get_ctime() exists in linux/fs.h])
 	],[
 		AC_MSG_RESULT(no)
 	])
 
 	AC_MSG_CHECKING([whether inode_set_ctime_to_ts() exists])
 	ZFS_LINUX_TEST_RESULT([inode_set_ctime_to_ts], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_INODE_SET_CTIME_TO_TS, 1,
 		    [inode_set_ctime_to_ts() exists in linux/fs.h])
 	],[
 		AC_MSG_RESULT(no)
 	])
+
+	AC_MSG_CHECKING([whether inode_get_atime() exists])
+	ZFS_LINUX_TEST_RESULT([inode_get_atime], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_GET_ATIME, 1,
+		    [inode_get_atime() exists in linux/fs.h])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
+	AC_MSG_CHECKING([whether inode_set_atime_to_ts() exists])
+	ZFS_LINUX_TEST_RESULT([inode_set_atime_to_ts], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_SET_ATIME_TO_TS, 1,
+		    [inode_set_atime_to_ts() exists in linux/fs.h])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
+	AC_MSG_CHECKING([whether inode_get_mtime() exists])
+	ZFS_LINUX_TEST_RESULT([inode_get_mtime], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_GET_MTIME, 1,
+		    [inode_get_mtime() exists in linux/fs.h])
+	],[
+		AC_MSG_RESULT(no)
+	])
+
+	AC_MSG_CHECKING([whether inode_set_mtime_to_ts() exists])
+	ZFS_LINUX_TEST_RESULT([inode_set_mtime_to_ts], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_INODE_SET_MTIME_TO_TS, 1,
+		    [inode_set_mtime_to_ts() exists in linux/fs.h])
+	],[
+		AC_MSG_RESULT(no)
+	])
 ])
diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h
index bd1462b16c4b..69b6c1185bb1 100644
--- a/include/os/linux/zfs/sys/zpl.h
+++ b/include/os/linux/zfs/sys/zpl.h
@@ -1,221 +1,241 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  */
 
 #ifndef	_SYS_ZPL_H
 #define	_SYS_ZPL_H
 
 #include <sys/mntent.h>
 #include <sys/vfs.h>
 #include <linux/aio.h>
 #include <linux/dcache_compat.h>
 #include <linux/exportfs.h>
 #include <linux/falloc.h>
 #include <linux/parser.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/vfs_compat.h>
 #include <linux/writeback.h>
 #include <linux/xattr_compat.h>
 
 /* zpl_inode.c */
 extern void zpl_vap_init(vattr_t *vap, struct inode *dir,
     umode_t mode, cred_t *cr);
 
 extern const struct inode_operations zpl_inode_operations;
 extern const struct inode_operations zpl_dir_inode_operations;
 extern const struct inode_operations zpl_symlink_inode_operations;
 extern const struct inode_operations zpl_special_inode_operations;
 
 /* zpl_file.c */
 extern const struct address_space_operations zpl_address_space_operations;
 extern const struct file_operations zpl_file_operations;
 extern const struct file_operations zpl_dir_file_operations;
 
 /* zpl_super.c */
 extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
 
 extern const struct super_operations zpl_super_operations;
 extern const struct export_operations zpl_export_operations;
 extern struct file_system_type zpl_fs_type;
 
 /* zpl_xattr.c */
 extern ssize_t zpl_xattr_list(struct dentry *dentry, char *buf, size_t size);
 extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
     const struct qstr *qstr);
 #if defined(CONFIG_FS_POSIX_ACL)
 #if defined(HAVE_SET_ACL)
 #if defined(HAVE_SET_ACL_IDMAP_DENTRY)
 extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
     struct posix_acl *acl, int type);
 #elif defined(HAVE_SET_ACL_USERNS)
 extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
     struct posix_acl *acl, int type);
 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
 extern int zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
     struct posix_acl *acl, int type);
 #else
 extern int zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type);
 #endif /* HAVE_SET_ACL_USERNS */
 #endif /* HAVE_SET_ACL */
 #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
 extern struct posix_acl *zpl_get_acl(struct inode *ip, int type, bool rcu);
 #elif defined(HAVE_GET_ACL)
 extern struct posix_acl *zpl_get_acl(struct inode *ip, int type);
 #endif
 extern int zpl_init_acl(struct inode *ip, struct inode *dir);
 extern int zpl_chmod_acl(struct inode *ip);
 #else
 static inline int
 zpl_init_acl(struct inode *ip, struct inode *dir)
 {
 	return (0);
 }
 
 static inline int
 zpl_chmod_acl(struct inode *ip)
 {
 	return (0);
 }
 #endif /* CONFIG_FS_POSIX_ACL */
 
 extern xattr_handler_t *zpl_xattr_handlers[];
 
 /* zpl_ctldir.c */
 extern const struct file_operations zpl_fops_root;
 extern const struct inode_operations zpl_ops_root;
 
 extern const struct file_operations zpl_fops_snapdir;
 extern const struct inode_operations zpl_ops_snapdir;
 extern const struct dentry_operations zpl_dops_snapdirs;
 
 extern const struct file_operations zpl_fops_shares;
 extern const struct inode_operations zpl_ops_shares;
 
 #if defined(HAVE_VFS_ITERATE) || defined(HAVE_VFS_ITERATE_SHARED)
 
 #define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
 	.actor = _actor,				\
 	.pos = _pos,					\
 }
 
 typedef struct dir_context zpl_dir_context_t;
 
 #define	zpl_dir_emit		dir_emit
 #define	zpl_dir_emit_dot	dir_emit_dot
 #define	zpl_dir_emit_dotdot	dir_emit_dotdot
 #define	zpl_dir_emit_dots	dir_emit_dots
 
 #else
 
 typedef struct zpl_dir_context {
 	void *dirent;
 	const filldir_t actor;
 	loff_t pos;
 } zpl_dir_context_t;
 
 #define	ZPL_DIR_CONTEXT_INIT(_dirent, _actor, _pos) {	\
 	.dirent = _dirent,				\
 	.actor = _actor,				\
 	.pos = _pos,					\
 }
 
 static inline bool
 zpl_dir_emit(zpl_dir_context_t *ctx, const char *name, int namelen,
     uint64_t ino, unsigned type)
 {
 	return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type));
 }
 
 static inline bool
 zpl_dir_emit_dot(struct file *file, zpl_dir_context_t *ctx)
 {
 	return (ctx->actor(ctx->dirent, ".", 1, ctx->pos,
 	    file_inode(file)->i_ino, DT_DIR) == 0);
 }
 
 static inline bool
 zpl_dir_emit_dotdot(struct file *file, zpl_dir_context_t *ctx)
 {
 	return (ctx->actor(ctx->dirent, "..", 2, ctx->pos,
 	    parent_ino(file_dentry(file)), DT_DIR) == 0);
 }
 
 static inline bool
 zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
 {
 	if (ctx->pos == 0) {
 		if (!zpl_dir_emit_dot(file, ctx))
 			return (false);
 		ctx->pos = 1;
 	}
 	if (ctx->pos == 1) {
 		if (!zpl_dir_emit_dotdot(file, ctx))
 			return (false);
 		ctx->pos = 2;
 	}
 	return (true);
 }
 #endif /* HAVE_VFS_ITERATE */
 
 #if defined(HAVE_INODE_TIMESTAMP_TRUNCATE)
 #define	zpl_inode_timestamp_truncate(ts, ip)	timestamp_truncate(ts, ip)
 #elif defined(HAVE_INODE_TIMESPEC64_TIMES)
 #define	zpl_inode_timestamp_truncate(ts, ip)	\
 	timespec64_trunc(ts, (ip)->i_sb->s_time_gran)
 #else
 #define	zpl_inode_timestamp_truncate(ts, ip)	\
 	timespec_trunc(ts, (ip)->i_sb->s_time_gran)
 #endif
 
 #if defined(HAVE_INODE_OWNER_OR_CAPABLE)
 #define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ip)
 #elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS)
 #define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ns, ip)
 #elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP)
 #define	zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip)
 #else
 #error "Unsupported kernel"
 #endif
 
 #if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP)
 #define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(ns, dentry, ia)
 #else
 /*
  * Use kernel-provided version, or our own from
  * linux/vfs_compat.h
  */
 #define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(dentry, ia)
 #endif
 
 #ifdef HAVE_INODE_GET_CTIME
 #define	zpl_inode_get_ctime(ip)	inode_get_ctime(ip)
 #else
 #define	zpl_inode_get_ctime(ip)	(ip->i_ctime)
 #endif
 #ifdef HAVE_INODE_SET_CTIME_TO_TS
 #define	zpl_inode_set_ctime_to_ts(ip, ts)	inode_set_ctime_to_ts(ip, ts)
 #else
 #define	zpl_inode_set_ctime_to_ts(ip, ts)	(ip->i_ctime = ts)
 #endif
+#ifdef HAVE_INODE_GET_ATIME
+#define	zpl_inode_get_atime(ip)	inode_get_atime(ip)
+#else
+#define	zpl_inode_get_atime(ip)	(ip->i_atime)
+#endif
+#ifdef HAVE_INODE_SET_ATIME_TO_TS
+#define	zpl_inode_set_atime_to_ts(ip, ts)	inode_set_atime_to_ts(ip, ts)
+#else
+#define	zpl_inode_set_atime_to_ts(ip, ts)	(ip->i_atime = ts)
+#endif
+#ifdef HAVE_INODE_GET_MTIME
+#define	zpl_inode_get_mtime(ip)	inode_get_mtime(ip)
+#else
+#define	zpl_inode_get_mtime(ip)	(ip->i_mtime)
+#endif
+#ifdef HAVE_INODE_SET_MTIME_TO_TS
+#define	zpl_inode_set_mtime_to_ts(ip, ts)	inode_set_mtime_to_ts(ip, ts)
+#else
+#define	zpl_inode_set_mtime_to_ts(ip, ts)	(ip->i_mtime = ts)
+#endif
 
 #endif	/* _SYS_ZPL_H */
diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c
index 3ce18bbfacc0..b51df6a2ebe5 100644
--- a/module/os/linux/zfs/zfs_ctldir.c
+++ b/module/os/linux/zfs/zfs_ctldir.c
@@ -1,1294 +1,1294 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  *
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * LLNL-CODE-403049.
  * Rewritten for Linux by:
  *   Rohan Puri <rohan.puri15@gmail.com>
  *   Brian Behlendorf <behlendorf1@llnl.gov>
  * Copyright (c) 2013 by Delphix. All rights reserved.
  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright (c) 2018 George Melikov. All Rights Reserved.
  * Copyright (c) 2019 Datto, Inc. All rights reserved.
  * Copyright (c) 2020 The MathWorks, Inc. All rights reserved.
  */
 
 /*
  * ZFS control directory (a.k.a. ".zfs")
  *
  * This directory provides a common location for all ZFS meta-objects.
  * Currently, this is only the 'snapshot' and 'shares' directory, but this may
  * expand in the future.  The elements are built dynamically, as the hierarchy
  * does not actually exist on disk.
  *
  * For 'snapshot', we don't want to have all snapshots always mounted, because
  * this would take up a huge amount of space in /etc/mnttab.  We have three
  * types of objects:
  *
  *	ctldir ------> snapshotdir -------> snapshot
  *                                             |
  *                                             |
  *                                             V
  *                                         mounted fs
  *
  * The 'snapshot' node contains just enough information to lookup '..' and act
  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  * perform an automount of the underlying filesystem and return the
  * corresponding inode.
  *
  * All mounts are handled automatically by an user mode helper which invokes
  * the mount procedure.  Unmounts are handled by allowing the mount
  * point to expire so the kernel may automatically unmount it.
  *
  * The '.zfs', '.zfs/snapshot', and all directories created under
  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
  * zfsvfs_t as the head filesystem (what '.zfs' lives under).
  *
  * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
  * (ie: snapshots) are complete ZFS filesystems and have their own unique
  * zfsvfs_t.  However, the fsid reported by these mounts will be the same
  * as that used by the parent zfsvfs_t to make NFS happy.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/pathname.h>
 #include <sys/vfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
 #include <sys/stat.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_deleg.h>
 #include <sys/zpl.h>
 #include <sys/mntent.h>
 #include "zfs_namecheck.h"
 
 /*
  * Two AVL trees are maintained which contain all currently automounted
  * snapshots.  Every automounted snapshots maps to a single zfs_snapentry_t
  * entry which MUST:
  *
  *   - be attached to both trees, and
  *   - be unique, no duplicate entries are allowed.
  *
  * The zfs_snapshots_by_name tree is indexed by the full dataset name
  * while the zfs_snapshots_by_objsetid tree is indexed by the unique
  * objsetid.  This allows for fast lookups either by name or objsetid.
  */
 static avl_tree_t zfs_snapshots_by_name;
 static avl_tree_t zfs_snapshots_by_objsetid;
 static krwlock_t zfs_snapshot_lock;
 
 /*
  * Control Directory Tunables (.zfs)
  */
 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
 int zfs_admin_snapshot = 0;
 
 typedef struct {
 	char		*se_name;	/* full snapshot name */
 	char		*se_path;	/* full mount path */
 	spa_t		*se_spa;	/* pool spa */
 	uint64_t	se_objsetid;	/* snapshot objset id */
 	struct dentry   *se_root_dentry; /* snapshot root dentry */
 	krwlock_t	se_taskqid_lock;  /* scheduled unmount taskqid lock */
 	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
 	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
 	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
 	zfs_refcount_t	se_refcount;	/* reference count */
 } zfs_snapentry_t;
 
 static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
 
 /*
  * Allocate a new zfs_snapentry_t being careful to make a copy of the
  * the snapshot name and provided mount point.  No reference is taken.
  */
 static zfs_snapentry_t *
 zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
     uint64_t objsetid, struct dentry *root_dentry)
 {
 	zfs_snapentry_t *se;
 
 	se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 
 	se->se_name = kmem_strdup(full_name);
 	se->se_path = kmem_strdup(full_path);
 	se->se_spa = spa;
 	se->se_objsetid = objsetid;
 	se->se_root_dentry = root_dentry;
 	se->se_taskqid = TASKQID_INVALID;
 	rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL);
 
 	zfs_refcount_create(&se->se_refcount);
 
 	return (se);
 }
 
 /*
  * Free a zfs_snapentry_t the caller must ensure there are no active
  * references.
  */
 static void
 zfsctl_snapshot_free(zfs_snapentry_t *se)
 {
 	zfs_refcount_destroy(&se->se_refcount);
 	kmem_strfree(se->se_name);
 	kmem_strfree(se->se_path);
 	rw_destroy(se->se_taskqid_lock);
 
 	kmem_free(se, sizeof (zfs_snapentry_t));
 }
 
 /*
  * Hold a reference on the zfs_snapentry_t.
  */
 static void
 zfsctl_snapshot_hold(zfs_snapentry_t *se)
 {
 	zfs_refcount_add(&se->se_refcount, NULL);
 }
 
 /*
  * Release a reference on the zfs_snapentry_t.  When the number of
  * references drops to zero the structure will be freed.
  */
 static void
 zfsctl_snapshot_rele(zfs_snapentry_t *se)
 {
 	if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
 		zfsctl_snapshot_free(se);
 }
 
 /*
  * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
  * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part
  * of the trees a reference is held.
  */
 static void
 zfsctl_snapshot_add(zfs_snapentry_t *se)
 {
 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
 	zfsctl_snapshot_hold(se);
 	avl_add(&zfs_snapshots_by_name, se);
 	avl_add(&zfs_snapshots_by_objsetid, se);
 }
 
 /*
  * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
  * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,
  * this can result in the structure being freed if that was the last
  * remaining reference.
  */
 static void
 zfsctl_snapshot_remove(zfs_snapentry_t *se)
 {
 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
 	avl_remove(&zfs_snapshots_by_name, se);
 	avl_remove(&zfs_snapshots_by_objsetid, se);
 	zfsctl_snapshot_rele(se);
 }
 
 /*
  * Snapshot name comparison function for the zfs_snapshots_by_name.
  */
 static int
 snapentry_compare_by_name(const void *a, const void *b)
 {
 	const zfs_snapentry_t *se_a = a;
 	const zfs_snapentry_t *se_b = b;
 	int ret;
 
 	ret = strcmp(se_a->se_name, se_b->se_name);
 
 	if (ret < 0)
 		return (-1);
 	else if (ret > 0)
 		return (1);
 	else
 		return (0);
 }
 
 /*
  * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
  */
 static int
 snapentry_compare_by_objsetid(const void *a, const void *b)
 {
 	const zfs_snapentry_t *se_a = a;
 	const zfs_snapentry_t *se_b = b;
 
 	if (se_a->se_spa != se_b->se_spa)
 		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
 
 	if (se_a->se_objsetid < se_b->se_objsetid)
 		return (-1);
 	else if (se_a->se_objsetid > se_b->se_objsetid)
 		return (1);
 	else
 		return (0);
 }
 
 /*
  * Find a zfs_snapentry_t in zfs_snapshots_by_name.  If the snapname
  * is found a pointer to the zfs_snapentry_t is returned and a reference
  * taken on the structure.  The caller is responsible for dropping the
  * reference with zfsctl_snapshot_rele().  If the snapname is not found
  * NULL will be returned.
  */
 static zfs_snapentry_t *
 zfsctl_snapshot_find_by_name(const char *snapname)
 {
 	zfs_snapentry_t *se, search;
 
 	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
 
 	search.se_name = (char *)snapname;
 	se = avl_find(&zfs_snapshots_by_name, &search, NULL);
 	if (se)
 		zfsctl_snapshot_hold(se);
 
 	return (se);
 }
 
 /*
  * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
  * rather than the snapname.  In all other respects it behaves the same
  * as zfsctl_snapshot_find_by_name().
  */
 static zfs_snapentry_t *
 zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
 {
 	zfs_snapentry_t *se, search;
 
 	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
 
 	search.se_spa = spa;
 	search.se_objsetid = objsetid;
 	se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
 	if (se)
 		zfsctl_snapshot_hold(se);
 
 	return (se);
 }
 
 /*
  * Rename a zfs_snapentry_t in the zfs_snapshots_by_name.  The structure is
  * removed, renamed, and added back to the new correct location in the tree.
  */
 static int
 zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
 {
 	zfs_snapentry_t *se;
 
 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
 
 	se = zfsctl_snapshot_find_by_name(old_snapname);
 	if (se == NULL)
 		return (SET_ERROR(ENOENT));
 
 	zfsctl_snapshot_remove(se);
 	kmem_strfree(se->se_name);
 	se->se_name = kmem_strdup(new_snapname);
 	zfsctl_snapshot_add(se);
 	zfsctl_snapshot_rele(se);
 
 	return (0);
 }
 
 /*
  * Delayed task responsible for unmounting an expired automounted snapshot.
  */
 static void
 snapentry_expire(void *data)
 {
 	zfs_snapentry_t *se = (zfs_snapentry_t *)data;
 	spa_t *spa = se->se_spa;
 	uint64_t objsetid = se->se_objsetid;
 
 	if (zfs_expire_snapshot <= 0) {
 		zfsctl_snapshot_rele(se);
 		return;
 	}
 
 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
 	se->se_taskqid = TASKQID_INVALID;
 	rw_exit(&se->se_taskqid_lock);
 	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
 	zfsctl_snapshot_rele(se);
 
 	/*
 	 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
 	 * This can occur when the snapshot is busy.
 	 */
 	rw_enter(&zfs_snapshot_lock, RW_READER);
 	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
 		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
 		zfsctl_snapshot_rele(se);
 	}
 	rw_exit(&zfs_snapshot_lock);
 }
 
 /*
  * Cancel an automatic unmount of a snapname.  This callback is responsible
  * for dropping the reference on the zfs_snapentry_t which was taken when
  * during dispatch.
  */
 static void
 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
 {
 	int err = 0;
 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
 	err = taskq_cancel_id(system_delay_taskq, se->se_taskqid);
 	/*
 	 * if we get ENOENT, the taskq couldn't be found to be
 	 * canceled, so we can just mark it as invalid because
 	 * it's already gone. If we got EBUSY, then we already
 	 * blocked until it was gone _anyway_, so we don't care.
 	 */
 	se->se_taskqid = TASKQID_INVALID;
 	rw_exit(&se->se_taskqid_lock);
 	if (err == 0) {
 		zfsctl_snapshot_rele(se);
 	}
 }
 
 /*
  * Dispatch the unmount task for delayed handling with a hold protecting it.
  */
 static void
 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
 {
 
 	if (delay <= 0)
 		return;
 
 	zfsctl_snapshot_hold(se);
 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
 	/*
 	 * If this condition happens, we managed to:
 	 * - dispatch once
 	 * - want to dispatch _again_ before it returned
 	 *
 	 * So let's just return - if that task fails at unmounting,
 	 * we'll eventually dispatch again, and if it succeeds,
 	 * no problem.
 	 */
 	if (se->se_taskqid != TASKQID_INVALID) {
 		rw_exit(&se->se_taskqid_lock);
 		zfsctl_snapshot_rele(se);
 		return;
 	}
 	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
 	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
 	rw_exit(&se->se_taskqid_lock);
 }
 
 /*
  * Schedule an automatic unmount of objset id to occur in delay seconds from
  * now.  Any previous delayed unmount will be cancelled in favor of the
  * updated deadline.  A reference is taken by zfsctl_snapshot_find_by_name()
  * and held until the outstanding task is handled or cancelled.
  */
 int
 zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
 {
 	zfs_snapentry_t *se;
 	int error = ENOENT;
 
 	rw_enter(&zfs_snapshot_lock, RW_READER);
 	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
 		zfsctl_snapshot_unmount_cancel(se);
 		zfsctl_snapshot_unmount_delay_impl(se, delay);
 		zfsctl_snapshot_rele(se);
 		error = 0;
 	}
 	rw_exit(&zfs_snapshot_lock);
 
 	return (error);
 }
 
 /*
  * Check if snapname is currently mounted.  Returned non-zero when mounted
  * and zero when unmounted.
  */
 static boolean_t
 zfsctl_snapshot_ismounted(const char *snapname)
 {
 	zfs_snapentry_t *se;
 	boolean_t ismounted = B_FALSE;
 
 	rw_enter(&zfs_snapshot_lock, RW_READER);
 	if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
 		zfsctl_snapshot_rele(se);
 		ismounted = B_TRUE;
 	}
 	rw_exit(&zfs_snapshot_lock);
 
 	return (ismounted);
 }
 
 /*
  * Check if the given inode is a part of the virtual .zfs directory.
  */
 boolean_t
 zfsctl_is_node(struct inode *ip)
 {
 	return (ITOZ(ip)->z_is_ctldir);
 }
 
 /*
  * Check if the given inode is a .zfs/snapshots/snapname directory.
  */
 boolean_t
 zfsctl_is_snapdir(struct inode *ip)
 {
 	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
 }
 
 /*
  * Allocate a new inode with the passed id and ops.
  */
 static struct inode *
 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 	inode_timespec_t now;
 	struct inode *ip;
 	znode_t *zp;
 
 	ip = new_inode(zfsvfs->z_sb);
 	if (ip == NULL)
 		return (NULL);
 
 	now = current_time(ip);
 	zp = ITOZ(ip);
 	ASSERT3P(zp->z_dirlocks, ==, NULL);
 	ASSERT3P(zp->z_acl_cached, ==, NULL);
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_id = id;
 	zp->z_unlinked = B_FALSE;
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_zn_prefetch = B_FALSE;
 	zp->z_is_sa = B_FALSE;
 #if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
 #endif
 	zp->z_is_ctldir = B_TRUE;
 	zp->z_sa_hdl = NULL;
 	zp->z_blksz = 0;
 	zp->z_seq = 0;
 	zp->z_mapcnt = 0;
 	zp->z_size = 0;
 	zp->z_pflags = 0;
 	zp->z_mode = 0;
 	zp->z_sync_cnt = 0;
 	zp->z_sync_writes_cnt = 0;
 	zp->z_async_writes_cnt = 0;
 	ip->i_generation = 0;
 	ip->i_ino = id;
 	ip->i_mode = (S_IFDIR | S_IRWXUGO);
 	ip->i_uid = SUID_TO_KUID(0);
 	ip->i_gid = SGID_TO_KGID(0);
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
-	ip->i_atime = now;
-	ip->i_mtime = now;
+	zpl_inode_set_atime_to_ts(ip, now);
+	zpl_inode_set_mtime_to_ts(ip, now);
 	zpl_inode_set_ctime_to_ts(ip, now);
 	ip->i_fop = fops;
 	ip->i_op = ops;
 #if defined(IOP_XATTR)
 	ip->i_opflags &= ~IOP_XATTR;
 #endif
 
 	if (insert_inode_locked(ip)) {
 		unlock_new_inode(ip);
 		iput(ip);
 		return (NULL);
 	}
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
 	zfsvfs->z_nr_znodes++;
 	membar_producer();
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	unlock_new_inode(ip);
 
 	return (ip);
 }
 
 /*
  * Lookup the inode with given id, it will be allocated if needed.
  */
 static struct inode *
 zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 	struct inode *ip = NULL;
 
 	while (ip == NULL) {
 		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
 		if (ip)
 			break;
 
 		/* May fail due to concurrent zfsctl_inode_alloc() */
 		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
 	}
 
 	return (ip);
 }
 
 /*
  * Create the '.zfs' directory.  This directory is cached as part of the VFS
  * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
  * therefore checks against a vfs_count of 2 instead of 1.  This reference
  * is removed when the ctldir is destroyed in the unmount.  All other entities
  * under the '.zfs' directory are created dynamically as needed.
  *
  * Because the dynamically created '.zfs' directory entries assume the use
  * of 64-bit inode numbers this support must be disabled on 32-bit systems.
  */
 int
 zfsctl_create(zfsvfs_t *zfsvfs)
 {
 	ASSERT(zfsvfs->z_ctldir == NULL);
 
 	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
 	    &zpl_fops_root, &zpl_ops_root);
 	if (zfsvfs->z_ctldir == NULL)
 		return (SET_ERROR(ENOENT));
 
 	return (0);
 }
 
 /*
  * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
  * Only called when the filesystem is unmounted.
  */
 void
 zfsctl_destroy(zfsvfs_t *zfsvfs)
 {
 	if (zfsvfs->z_issnap) {
 		zfs_snapentry_t *se;
 		spa_t *spa = zfsvfs->z_os->os_spa;
 		uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
 
 		rw_enter(&zfs_snapshot_lock, RW_WRITER);
 		se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
 		if (se != NULL)
 			zfsctl_snapshot_remove(se);
 		rw_exit(&zfs_snapshot_lock);
 		if (se != NULL) {
 			zfsctl_snapshot_unmount_cancel(se);
 			zfsctl_snapshot_rele(se);
 		}
 	} else if (zfsvfs->z_ctldir) {
 		iput(zfsvfs->z_ctldir);
 		zfsvfs->z_ctldir = NULL;
 	}
 }
 
 /*
  * Given a root znode, retrieve the associated .zfs directory.
  * Add a hold to the vnode and return it.
  */
 struct inode *
 zfsctl_root(znode_t *zp)
 {
 	ASSERT(zfs_has_ctldir(zp));
 	/* Must have an existing ref, so igrab() cannot return NULL */
 	VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
 	return (ZTOZSB(zp)->z_ctldir);
 }
 
 /*
  * Generate a long fid to indicate a snapdir. We encode whether snapdir is
  * already mounted in gen field. We do this because nfsd lookup will not
  * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
  * this and do automount and return ESTALE to force nfsd revalidate and follow
  * mount.
  */
 static int
 zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
 {
 	zfid_short_t *zfid = (zfid_short_t *)fidp;
 	zfid_long_t *zlfid = (zfid_long_t *)fidp;
 	uint32_t gen = 0;
 	uint64_t object;
 	uint64_t objsetid;
 	int i;
 	struct dentry *dentry;
 
 	if (fidp->fid_len < LONG_FID_LEN) {
 		fidp->fid_len = LONG_FID_LEN;
 		return (SET_ERROR(ENOSPC));
 	}
 
 	object = ip->i_ino;
 	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
 	zfid->zf_len = LONG_FID_LEN;
 
 	dentry = d_obtain_alias(igrab(ip));
 	if (!IS_ERR(dentry)) {
 		gen = !!d_mountpoint(dentry);
 		dput(dentry);
 	}
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
 	for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 		zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
 
 	for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 		zlfid->zf_setgen[i] = 0;
 
 	return (0);
 }
 
 /*
  * Generate an appropriate fid for an entry in the .zfs directory.
  */
 int
 zfsctl_fid(struct inode *ip, fid_t *fidp)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		i;
 
 	ZFS_ENTER(zfsvfs);
 
 	if (zfsctl_is_snapdir(ip)) {
 		ZFS_EXIT(zfsvfs);
 		return (zfsctl_snapdir_fid(ip, fidp));
 	}
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOSPC));
 	}
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = SHORT_FID_LEN;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* .zfs znodes always have a generation number of 0 */
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = 0;
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Construct a full dataset name in full_name: "pool/dataset@snap_name"
  */
 static int
 zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
     char *full_name)
 {
 	objset_t *os = zfsvfs->z_os;
 
 	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
 		return (SET_ERROR(EILSEQ));
 
 	dmu_objset_name(os, full_name);
 	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
 		return (SET_ERROR(ENAMETOOLONG));
 
 	(void) strcat(full_name, "@");
 	(void) strcat(full_name, snap_name);
 
 	return (0);
 }
 
 /*
  * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
  */
 static int
 zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
     int path_len, char *full_path)
 {
 	objset_t *os = zfsvfs->z_os;
 	fstrans_cookie_t cookie;
 	char *snapname;
 	boolean_t case_conflict;
 	uint64_t id, pos = 0;
 	int error = 0;
 
 	if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
 		return (SET_ERROR(ENOENT));
 
 	cookie = spl_fstrans_mark();
 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	while (error == 0) {
 		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 		error = dmu_snapshot_list_next(zfsvfs->z_os,
 		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
 		    &case_conflict);
 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		if (error)
 			goto out;
 
 		if (id == objsetid)
 			break;
 	}
 
 	snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
 	    zfsvfs->z_vfs->vfs_mntpoint, snapname);
 out:
 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
 	spl_fstrans_unmark(cookie);
 
 	return (error);
 }
 
 /*
  * Special case the handling of "..".
  */
 int
 zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	int error = 0;
 
 	ZFS_ENTER(zfsvfs);
 
 	if (strcmp(name, "..") == 0) {
 		*ipp = dip->i_sb->s_root->d_inode;
 	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
 		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
 		    &zpl_fops_snapdir, &zpl_ops_snapdir);
 	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
 		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
 		    &zpl_fops_shares, &zpl_ops_shares);
 	} else {
 		*ipp = NULL;
 	}
 
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Lookup entry point for the 'snapshot' directory.  Try to open the
  * snapshot if it exist, creating the pseudo filesystem inode as necessary.
  */
 int
 zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	uint64_t id;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 
 	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
 	    &simple_dir_operations, &simple_dir_inode_operations);
 	if (*ipp == NULL)
 		error = SET_ERROR(ENOENT);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Renaming a directory under '.zfs/snapshot' will automatically trigger
  * a rename of the snapshot to the new given name.  The rename is confined
  * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
  */
 int
 zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
     struct inode *tdip, const char *tnm, cred_t *cr, int flags)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(sdip);
 	char *to, *from, *real, *fsname;
 	int error;
 
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
 	ZFS_ENTER(zfsvfs);
 
 	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
 		    ZFS_MAX_DATASET_NAME_LEN, NULL);
 		if (error == 0) {
 			snm = real;
 		} else if (error != ENOTSUP) {
 			goto out;
 		}
 	}
 
 	dmu_objset_name(zfsvfs->z_os, fsname);
 
 	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
 	    ZFS_MAX_DATASET_NAME_LEN, from);
 	if (error == 0)
 		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
 		    ZFS_MAX_DATASET_NAME_LEN, to);
 	if (error == 0)
 		error = zfs_secpolicy_rename_perms(from, to, cr);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Cannot move snapshots out of the snapdir.
 	 */
 	if (sdip != tdip) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	/*
 	 * No-op when names are identical.
 	 */
 	if (strcmp(snm, tnm) == 0) {
 		error = 0;
 		goto out;
 	}
 
 	rw_enter(&zfs_snapshot_lock, RW_WRITER);
 
 	error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
 	if (error == 0)
 		(void) zfsctl_snapshot_rename(snm, tnm);
 
 	rw_exit(&zfs_snapshot_lock);
 out:
 	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Removing a directory under '.zfs/snapshot' will automatically trigger
  * the removal of the snapshot with the given name.
  */
 int
 zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
     int flags)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	char *snapname, *real;
 	int error;
 
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
 	ZFS_ENTER(zfsvfs);
 
 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
 		    ZFS_MAX_DATASET_NAME_LEN, NULL);
 		if (error == 0) {
 			name = real;
 		} else if (error != ENOTSUP) {
 			goto out;
 		}
 	}
 
 	error = zfsctl_snapshot_name(ITOZSB(dip), name,
 	    ZFS_MAX_DATASET_NAME_LEN, snapname);
 	if (error == 0)
 		error = zfs_secpolicy_destroy_perms(snapname, cr);
 	if (error != 0)
 		goto out;
 
 	error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
 	if ((error == 0) || (error == ENOENT))
 		error = dsl_destroy_snapshot(snapname, B_FALSE);
 out:
 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Creating a directory under '.zfs/snapshot' will automatically trigger
  * the creation of a new snapshot with the given name.
  */
 int
 zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap,
     struct inode **ipp, cred_t *cr, int flags)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	char *dsname;
 	int error;
 
 	if (!zfs_admin_snapshot)
 		return (SET_ERROR(EACCES));
 
 	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 
 	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
 		error = SET_ERROR(EILSEQ);
 		goto out;
 	}
 
 	dmu_objset_name(zfsvfs->z_os, dsname);
 
 	error = zfs_secpolicy_snapshot_perms(dsname, cr);
 	if (error != 0)
 		goto out;
 
 	if (error == 0) {
 		error = dmu_objset_snapshot_one(dsname, dirname);
 		if (error != 0)
 			goto out;
 
 		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
 		    0, cr, NULL, NULL);
 	}
 out:
 	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
 
 	return (error);
 }
 
 /*
  * Flush everything out of the kernel's export table and such.
  * This is needed as once the snapshot is used over NFS, its
  * entries in svc_export and svc_expkey caches hold reference
  * to the snapshot mount point. There is no known way of flushing
  * only the entries related to the snapshot.
  */
 static void
 exportfs_flush(void)
 {
 	char *argv[] = { "/usr/sbin/exportfs", "-f", NULL };
 	char *envp[] = { NULL };
 
 	(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 }
 
 /*
  * Attempt to unmount a snapshot by making a call to user space.
  * There is no assurance that this can or will succeed, is just a
  * best effort.  In the case where it does fail, perhaps because
  * it's in use, the unmount will fail harmlessly.
  */
 int
 zfsctl_snapshot_unmount(const char *snapname, int flags)
 {
 	char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
 	    NULL };
 	char *envp[] = { NULL };
 	zfs_snapentry_t *se;
 	int error;
 
 	rw_enter(&zfs_snapshot_lock, RW_READER);
 	if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
 		rw_exit(&zfs_snapshot_lock);
 		return (SET_ERROR(ENOENT));
 	}
 	rw_exit(&zfs_snapshot_lock);
 
 	exportfs_flush();
 
 	if (flags & MNT_FORCE)
 		argv[4] = "-fn";
 	argv[5] = se->se_path;
 	dprintf("unmount; path=%s\n", se->se_path);
 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 	zfsctl_snapshot_rele(se);
 
 
 	/*
 	 * The umount system utility will return 256 on error.  We must
 	 * assume this error is because the file system is busy so it is
 	 * converted to the more sensible EBUSY.
 	 */
 	if (error)
 		error = SET_ERROR(EBUSY);
 
 	return (error);
 }
 
 int
 zfsctl_snapshot_mount(struct path *path, int flags)
 {
 	struct dentry *dentry = path->dentry;
 	struct inode *ip = dentry->d_inode;
 	zfsvfs_t *zfsvfs;
 	zfsvfs_t *snap_zfsvfs;
 	zfs_snapentry_t *se;
 	char *full_name, *full_path;
 	char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
 	    NULL };
 	char *envp[] = { NULL };
 	int error;
 	struct path spath;
 
 	if (ip == NULL)
 		return (SET_ERROR(EISDIR));
 
 	zfsvfs = ITOZSB(ip);
 	ZFS_ENTER(zfsvfs);
 
 	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 
 	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
 	    ZFS_MAX_DATASET_NAME_LEN, full_name);
 	if (error)
 		goto error;
 
 	/*
 	 * Construct a mount point path from sb of the ctldir inode and dirent
 	 * name, instead of from d_path(), so that chroot'd process doesn't fail
 	 * on mount.zfs(8).
 	 */
 	snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
 	    zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
 	    dname(dentry));
 
 	/*
 	 * Multiple concurrent automounts of a snapshot are never allowed.
 	 * The snapshot may be manually mounted as many times as desired.
 	 */
 	if (zfsctl_snapshot_ismounted(full_name)) {
 		error = 0;
 		goto error;
 	}
 
 	/*
 	 * Attempt to mount the snapshot from user space.  Normally this
 	 * would be done using the vfs_kern_mount() function, however that
 	 * function is marked GPL-only and cannot be used.  On error we
 	 * careful to log the real error to the console and return EISDIR
 	 * to safely abort the automount.  This should be very rare.
 	 *
 	 * If the user mode helper happens to return EBUSY, a concurrent
 	 * mount is already in progress in which case the error is ignored.
 	 * Take note that if the program was executed successfully the return
 	 * value from call_usermodehelper() will be (exitcode << 8 + signal).
 	 */
 	dprintf("mount; name=%s path=%s\n", full_name, full_path);
 	argv[5] = full_name;
 	argv[6] = full_path;
 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 	if (error) {
 		if (!(error & MOUNT_BUSY << 8)) {
 			zfs_dbgmsg("Unable to automount %s error=%d",
 			    full_path, error);
 			error = SET_ERROR(EISDIR);
 		} else {
 			/*
 			 * EBUSY, this could mean a concurrent mount, or the
 			 * snapshot has already been mounted at completely
 			 * different place. We return 0 so VFS will retry. For
 			 * the latter case the VFS will retry several times
 			 * and return ELOOP, which is probably not a very good
 			 * behavior.
 			 */
 			error = 0;
 		}
 		goto error;
 	}
 
 	/*
 	 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
 	 * to identify this as an automounted filesystem.
 	 */
 	spath = *path;
 	path_get(&spath);
 	if (follow_down_one(&spath)) {
 		snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
 		snap_zfsvfs->z_parent = zfsvfs;
 		dentry = spath.dentry;
 		spath.mnt->mnt_flags |= MNT_SHRINKABLE;
 
 		rw_enter(&zfs_snapshot_lock, RW_WRITER);
 		se = zfsctl_snapshot_alloc(full_name, full_path,
 		    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
 		    dentry);
 		zfsctl_snapshot_add(se);
 		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
 		rw_exit(&zfs_snapshot_lock);
 	}
 	path_put(&spath);
 error:
 	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
 	kmem_free(full_path, MAXPATHLEN);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Get the snapdir inode from fid
  */
 int
 zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
     struct inode **ipp)
 {
 	int error;
 	struct path path;
 	char *mnt;
 	struct dentry *dentry;
 
 	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 
 	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
 	    MAXPATHLEN, mnt);
 	if (error)
 		goto out;
 
 	/* Trigger automount */
 	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
 	if (error)
 		goto out;
 
 	path_put(&path);
 	/*
 	 * Get the snapdir inode. Note, we don't want to use the above
 	 * path because it contains the root of the snapshot rather
 	 * than the snapdir.
 	 */
 	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
 	if (*ipp == NULL) {
 		error = SET_ERROR(ENOENT);
 		goto out;
 	}
 
 	/* check gen, see zfsctl_snapdir_fid */
 	dentry = d_obtain_alias(igrab(*ipp));
 	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
 		iput(*ipp);
 		*ipp = NULL;
 		error = SET_ERROR(ENOENT);
 	}
 	if (!IS_ERR(dentry))
 		dput(dentry);
 out:
 	kmem_free(mnt, MAXPATHLEN);
 	return (error);
 }
 
 int
 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(dip);
 	znode_t *zp;
 	znode_t *dzp;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 
 	if (zfsvfs->z_shares_dir == 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 		error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL);
 		zrele(dzp);
 	}
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Initialize the various pieces we'll need to create and manipulate .zfs
  * directories.  Currently this is unused but available.
  */
 void
 zfsctl_init(void)
 {
 	avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
 	    se_node_name));
 	avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
 	    se_node_objsetid));
 	rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
 }
 
 /*
  * Cleanup the various pieces we needed for .zfs directories.  In particular
  * ensure the expiry timer is canceled safely.
  */
 void
 zfsctl_fini(void)
 {
 	avl_destroy(&zfs_snapshots_by_name);
 	avl_destroy(&zfs_snapshots_by_objsetid);
 	rw_destroy(&zfs_snapshot_lock);
 }
 
 module_param(zfs_admin_snapshot, int, 0644);
 MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
 
 module_param(zfs_expire_snapshot, int, 0644);
 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
index 991d1aa1c22c..294a8e6314d2 100644
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -1,4089 +1,4096 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/vfs.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/taskq.h>
 #include <sys/uio.h>
 #include <sys/vmsystm.h>
 #include <sys/atomic.h>
 #include <sys/pathname.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/policy.h>
 #include <sys/sunddi.h>
 #include <sys/sid.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_rlock.h>
 #include <sys/cred.h>
 #include <sys/zpl.h>
 #include <sys/zil.h>
 #include <sys/sa_impl.h>
 
 /*
  * Programming rules.
  *
  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
  * properly lock its in-core state, create a DMU transaction, do the work,
  * record this work in the intent log (ZIL), commit the DMU transaction,
  * and wait for the intent log to commit if it is a synchronous operation.
  * Moreover, the vnode ops must work in both normal and log replay context.
  * The ordering of events is important to avoid deadlocks and references
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1) A check must be made in each zfs thread for a mounted file system.
  *	This is done avoiding races using ZFS_ENTER(zfsvfs).
  *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
  *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
  *      can return EIO from the calling function.
  *
  *  (2) zrele() should always be the last thing except for zil_commit() (if
  *	necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the
  *	last reference, the vnode/znode can be freed, so the zp may point to
  *	freed memory.  Second, the last reference will call zfs_zinactive(),
  *	which may induce a lot of work -- pushing cached pages (which acquires
  *	range locks) and syncing out cached atime changes.  Third,
  *	zfs_zinactive() may require a new tx, which could deadlock the system
  *	if you were already holding one. This deadlock occurs because the tx
  *	currently being operated on prevents a txg from syncing, which
  *	prevents the new tx from progressing, resulting in a deadlock.  If you
  *	must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
  *	is a synonym for zrele().
  *
  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
  *	as they can span dmu_tx_assign() calls.
  *
  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
  *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
  *
  *      Note, in particular, that if a lock is sometimes acquired before
  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
  *      to use a non-blocking assign can deadlock the system.  The scenario:
  *
  *	Thread A has grabbed a lock before calling dmu_tx_assign().
  *	Thread B is in an already-assigned tx, and blocks for this lock.
  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
  *	forever, because the previous txg can't quiesce until B's tx commits.
  *
  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
  *	to indicate that this operation has already called dmu_tx_wait().
  *	This will ensure that we don't retry forever, waiting a short bit
  *	each time.
  *
  *  (5)	If the operation succeeded, generate the intent log entry for it
  *	before dropping locks.  This ensures that the ordering of events
  *	in the intent log matches the order in which they actually occurred.
  *	During ZIL replay the zfs_log_* functions will update the sequence
  *	number to indicate the zil transaction has replayed.
  *
  *  (6)	At the end of each vnode op, the DMU tx must always commit,
  *	regardless of whether there were any errors.
  *
  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
  *	to ensure that synchronous semantics are provided when necessary.
  *
  * In general, this is how things should be ordered in each vnode op:
  *
  *	ZFS_ENTER(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
  *	rw_enter(...);			// grab any other locks you need
  *	tx = dmu_tx_create(...);	// get DMU tx
  *	dmu_tx_hold_*();		// hold each object you might modify
  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
  *	if (error) {
  *		rw_exit(...);		// drop locks
  *		zfs_dirent_unlock(dl);	// unlock directory entry
  *		zrele(...);		// release held znodes
  *		if (error == ERESTART) {
  *			waited = B_TRUE;
  *			dmu_tx_wait(tx);
  *			dmu_tx_abort(tx);
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
  *		ZFS_EXIT(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
  *	if (error == 0)
  *		zfs_log_*(...);		// on success, make ZIL entry
  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
  *	rw_exit(...);			// drop locks
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	zrele(...);			// release held znodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
  *	ZFS_EXIT(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 
 /*
  * Virus scanning is unsupported.  It would be possible to add a hook
  * here to performance the required virus scan.  This could be done
  * entirely in the kernel or potentially as an update to invoke a
  * scanning utility.
  */
 static int
 zfs_vscan(struct inode *ip, cred_t *cr, int async)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 {
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/* Honor ZFS_APPENDONLY file attribute */
 	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/* Virus scan eligible files on open */
 	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
 		if (zfs_vscan(ip, cr, 0) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EACCES));
 		}
 	}
 
 	/* Keep a count of the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_inc_32(&zp->z_sync_cnt);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_close(struct inode *ip, int flag, cred_t *cr)
 {
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/* Decrement the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_dec_32(&zp->z_sync_cnt);
 
 	if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
 		VERIFY(zfs_vscan(ip, cr, 1) == 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 #if defined(_KERNEL)
 
 static int zfs_fillpage(struct inode *ip, struct page *pp);
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  Update all mapped
  * pages with the contents of the coresponding dmu buffer.
  */
 void
 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 {
 	struct address_space *mp = ZTOI(zp)->i_mapping;
 	int64_t off = start & (PAGE_SIZE - 1);
 
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
 		uint64_t nbytes = MIN(PAGE_SIZE - off, len);
 
 		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);
 
 			void *pb = kmap(pp);
 			int error = dmu_read(os, zp->z_id, start + off,
 			    nbytes, pb + off, DMU_READ_PREFETCH);
 			kunmap(pp);
 
 			if (error) {
 				SetPageError(pp);
 				ClearPageUptodate(pp);
 			} else {
 				ClearPageError(pp);
 				SetPageUptodate(pp);
 
 				if (mapping_writably_mapped(mp))
 					flush_dcache_page(pp);
 
 				mark_page_accessed(pp);
 			}
 
 			unlock_page(pp);
 			put_page(pp);
 		}
 
 		len -= nbytes;
 		off = 0;
 	}
 }
 
 /*
  * When a file is memory mapped, we must keep the I/O data synchronized
  * between the DMU cache and the memory mapped pages.  Preferentially read
  * from memory mapped pages, otherwise fallback to reading through the dmu.
  */
 int
 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
 	int64_t start = uio->uio_loffset;
 	int64_t off = start & (PAGE_SIZE - 1);
 	int len = nbytes;
 	int error = 0;
 
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
 		uint64_t bytes = MIN(PAGE_SIZE - off, len);
 
 		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			/*
 			 * If filemap_fault() retries there exists a window
 			 * where the page will be unlocked and not up to date.
 			 * In this case we must try and fill the page.
 			 */
 			if (unlikely(!PageUptodate(pp))) {
 				error = zfs_fillpage(ip, pp);
 				if (error) {
 					unlock_page(pp);
 					put_page(pp);
 					return (error);
 				}
 			}
 
 			ASSERT(PageUptodate(pp) || PageDirty(pp));
 
 			unlock_page(pp);
 
 			void *pb = kmap(pp);
 			error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
 			kunmap(pp);
 
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);
 
 			mark_page_accessed(pp);
 			put_page(pp);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, bytes);
 		}
 
 		len -= bytes;
 		off = 0;
 
 		if (error)
 			break;
 	}
 
 	return (error);
 }
 #endif /* _KERNEL */
 
 unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
 
 /*
  * Write the bytes to a file.
  *
  *	IN:	zp	- znode of file to be written to
  *		data	- bytes to write
  *		len	- number of bytes to write
  *		pos	- offset to start writing at
  *
  *	OUT:	resid	- remaining bytes to write
  *
  *	RETURN:	0 if success
  *		positive error code if failure.  EIO is	returned
  *		for a short write when residp isn't provided.
  *
  * Timestamps:
  *	zp - ctime|mtime updated if byte count > 0
  */
 int
 zfs_write_simple(znode_t *zp, const void *data, size_t len,
     loff_t pos, size_t *residp)
 {
 	fstrans_cookie_t cookie;
 	int error;
 
 	struct iovec iov;
 	iov.iov_base = (void *)data;
 	iov.iov_len = len;
 
 	zfs_uio_t uio;
 	zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
 
 	cookie = spl_fstrans_mark();
 	error = zfs_write(zp, &uio, 0, kcred);
 	spl_fstrans_unmark(cookie);
 
 	if (error == 0) {
 		if (residp != NULL)
 			*residp = zfs_uio_resid(&uio);
 		else if (zfs_uio_resid(&uio) != 0)
 			error = SET_ERROR(EIO);
 	}
 
 	return (error);
 }
 
 static void
 zfs_rele_async_task(void *arg)
 {
 	iput(arg);
 }
 
 void
 zfs_zrele_async(znode_t *zp)
 {
 	struct inode *ip = ZTOI(zp);
 	objset_t *os = ITOZSB(ip)->z_os;
 
 	ASSERT(atomic_read(&ip->i_count) > 0);
 	ASSERT(os != NULL);
 
 	/*
 	 * If decrementing the count would put us at 0, we can't do it inline
 	 * here, because that would be synchronous. Instead, dispatch an iput
 	 * to run later.
 	 *
 	 * For more information on the dangers of a synchronous iput, see the
 	 * header comment of this file.
 	 */
 	if (!atomic_add_unless(&ip->i_count, -1, 1)) {
 		VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
 		    zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
 	}
 }
 
 
 /*
  * Lookup an entry in a directory, or an extended attribute directory.
  * If it exists, return a held inode reference for it.
  *
  *	IN:	zdp	- znode of directory to search.
  *		nm	- name of entry to lookup.
  *		flags	- LOOKUP_XATTR set if looking for an attribute.
  *		cr	- credentials of caller.
  *		direntflags - directory lookup flags
  *		realpnp - returned pathname.
  *
  *	OUT:	zpp	- znode of located entry, NULL if not found.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	NA
  */
 /* ARGSUSED */
 int
 zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
     int *direntflags, pathname_t *realpnp)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zdp);
 	int error = 0;
 
 	/*
 	 * Fast path lookup, however we must skip DNLC lookup
 	 * for case folding or normalizing lookups because the
 	 * DNLC code only stores the passed in name.  This means
 	 * creating 'a' and removing 'A' on a case insensitive
 	 * file system would work, but DNLC still thinks 'a'
 	 * exists and won't let you create it again on the next
 	 * pass through fast path.
 	 */
 	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
 
 		if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
 			return (SET_ERROR(ENOTDIR));
 		} else if (zdp->z_sa_hdl == NULL) {
 			return (SET_ERROR(EIO));
 		}
 
 		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
 			error = zfs_fastaccesschk_execute(zdp, cr);
 			if (!error) {
 				*zpp = zdp;
 				zhold(*zpp);
 				return (0);
 			}
 			return (error);
 		}
 	}
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zdp);
 
 	*zpp = NULL;
 
 	if (flags & LOOKUP_XATTR) {
 		/*
 		 * We don't allow recursive attributes..
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 
 		/*
 		 * Do we have permission to get into attribute directory?
 		 */
 
 		if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
 		    B_FALSE, cr))) {
 			zrele(*zpp);
 			*zpp = NULL;
 		}
 
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOTDIR));
 	}
 
 	/*
 	 * Check accessibility of directory.
 	 */
 
 	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
 	if ((error == 0) && (*zpp))
 		zfs_znode_update_vfs(*zpp);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Attempt to create a new entry in a directory.  If the entry
  * already exists, truncate the file if permissible, else return
  * an error.  Return the ip of the created or trunc'd file.
  *
  *	IN:	dzp	- znode of directory to put new file entry in.
  *		name	- name of new file entry.
  *		vap	- attributes of new file.
  *		excl	- flag indicating exclusive or non-exclusive mode.
  *		mode	- mode to open file with.
  *		cr	- credentials of caller.
  *		flag	- file flag.
  *		vsecp	- ACL to be set
  *
  *	OUT:	zpp	- znode of created or trunc'd entry.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dzp - ctime|mtime updated if new entry created
  *	 zp - ctime|mtime always, atime if new
  */
 
 /* ARGSUSED */
 int
 zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
     int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	objset_t	*os;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid;
 	gid_t		gid;
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	boolean_t	have_acl = B_FALSE;
 	boolean_t	waited = B_FALSE;
 	boolean_t	skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	gid = crgetgid(cr);
 	uid = crgetuid(cr);
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 top:
 	*zpp = NULL;
 	if (*name == '\0') {
 		/*
 		 * Null component name refers to the directory itself.
 		 */
 		zhold(dzp);
 		zp = dzp;
 		dl = NULL;
 		error = 0;
 	} else {
 		/* possible igrab(zp) */
 		int zflg = 0;
 
 		if (flag & FIGNORECASE)
 			zflg |= ZCILOOK;
 
 		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 		    NULL, NULL);
 		if (error) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			if (strcmp(name, "..") == 0)
 				error = SET_ERROR(EISDIR);
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	if (zp == NULL) {
 		uint64_t txtype;
 		uint64_t projid = ZFS_DEFAULT_PROJID;
 
 		/*
 		 * Create a new file object and update the directory
 		 * to reference it.
 		 */
 		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr))) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			goto out;
 		}
 
 		/*
 		 * We only support the creation of regular files in
 		 * extended attribute directories.
 		 */
 
 		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 
 		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
 		    cr, vsecp, &acl_ids)) != 0)
 			goto out;
 		have_acl = B_TRUE;
 
 		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
 			projid = zfs_inherit_projid(dzp);
 		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
 			zfs_acl_ids_free(&acl_ids);
 			error = SET_ERROR(EDQUOT);
 			goto out;
 		}
 
 		tx = dmu_tx_create(os);
 
 		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 		    ZFS_SA_BASE_ATTR_SIZE);
 
 		fuid_dirtied = zfsvfs->z_fuid_dirty;
 		if (fuid_dirtied)
 			zfs_fuid_txhold(zfsvfs, tx);
 		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 		if (!zfsvfs->z_use_sa &&
 		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 
 		error = dmu_tx_assign(tx,
 		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 		if (error) {
 			zfs_dirent_unlock(dl);
 			if (error == ERESTART) {
 				waited = B_TRUE;
 				dmu_tx_wait(tx);
 				dmu_tx_abort(tx);
 				goto top;
 			}
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_abort(tx);
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 		error = zfs_link_create(dl, zp, tx, ZNEW);
 		if (error != 0) {
 			/*
 			 * Since, we failed to add the directory entry for it,
 			 * delete the newly created dnode.
 			 */
 			zfs_znode_delete(zp, tx);
 			remove_inode_hash(ZTOI(zp));
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_commit(tx);
 			goto out;
 		}
 
 		if (fuid_dirtied)
 			zfs_fuid_sync(zfsvfs, tx);
 
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
 		    vsecp, acl_ids.z_fuidp, vap);
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_commit(tx);
 	} else {
 		int aflags = (flag & O_APPEND) ? V_APPEND : 0;
 
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
 		have_acl = B_FALSE;
 
 		/*
 		 * A directory entry already exists for this name.
 		 */
 		/*
 		 * Can't truncate an existing file if in exclusive mode.
 		 */
 		if (excl) {
 			error = SET_ERROR(EEXIST);
 			goto out;
 		}
 		/*
 		 * Can't open a directory for writing.
 		 */
 		if (S_ISDIR(ZTOI(zp)->i_mode)) {
 			error = SET_ERROR(EISDIR);
 			goto out;
 		}
 		/*
 		 * Verify requested access to file.
 		 */
 		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
 			goto out;
 		}
 
 		mutex_enter(&dzp->z_lock);
 		dzp->z_seq++;
 		mutex_exit(&dzp->z_lock);
 
 		/*
 		 * Truncate regular files if requested.
 		 */
 		if (S_ISREG(ZTOI(zp)->i_mode) &&
 		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
 			/* we can't hold any locks when calling zfs_freesp() */
 			if (dl) {
 				zfs_dirent_unlock(dl);
 				dl = NULL;
 			}
 			error = zfs_freesp(zp, 0, 0, mode, TRUE);
 		}
 	}
 out:
 
 	if (dl)
 		zfs_dirent_unlock(dl);
 
 	if (error) {
 		if (zp)
 			zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 		*zpp = zp;
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
     int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
 {
 	znode_t		*zp = NULL, *dzp = ITOZ(dip);
 	zfsvfs_t	*zfsvfs = ITOZSB(dip);
 	objset_t	*os;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid;
 	gid_t		gid;
 	zfs_acl_ids_t   acl_ids;
 	uint64_t	projid = ZFS_DEFAULT_PROJID;
 	boolean_t	fuid_dirtied;
 	boolean_t	have_acl = B_FALSE;
 	boolean_t	waited = B_FALSE;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	gid = crgetgid(cr);
 	uid = crgetuid(cr);
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	os = zfsvfs->z_os;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 top:
 	*ipp = NULL;
 
 	/*
 	 * Create a new file object and update the directory
 	 * to reference it.
 	 */
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
 		goto out;
 	}
 
 	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
 	    cr, vsecp, &acl_ids)) != 0)
 		goto out;
 	have_acl = B_TRUE;
 
 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
 		projid = zfs_inherit_projid(dzp);
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
 		zfs_acl_ids_free(&acl_ids);
 		error = SET_ERROR(EDQUOT);
 		goto out;
 	}
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa &&
 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, acl_ids.z_aclp->z_acl_bytes);
 	}
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	/* Add to unlinked set */
 	zp->z_unlinked = B_TRUE;
 	zfs_unlinked_add(zp, tx);
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
 out:
 
 	if (error) {
 		if (zp)
 			zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 		*ipp = ZTOI(zp);
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Remove an entry from a directory.
  *
  *	IN:	dzp	- znode of directory to remove entry from.
  *		name	- name of entry to remove.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	dzp - ctime|mtime
  *	 ip - ctime (if nlink > 0)
  */
 
 uint64_t null_xattr = 0;
 
 /*ARGSUSED*/
 int
 zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 {
 	znode_t		*zp;
 	znode_t		*xzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	uint64_t	acl_obj, xattr_obj;
 	uint64_t	xattr_obj_unlinked = 0;
 	uint64_t	obj = 0;
 	uint64_t	links;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	boolean_t	may_delete_now, delete_now = FALSE;
 	boolean_t	unlinked, toobig = FALSE;
 	uint64_t	txtype;
 	pathname_t	*realnmp = NULL;
 	pathname_t	realnm;
 	int		error;
 	int		zflg = ZEXISTS;
 	boolean_t	waited = B_FALSE;
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE) {
 		zflg |= ZCILOOK;
 		pn_alloc(&realnm);
 		realnmp = &realnm;
 	}
 
 top:
 	xattr_obj = 0;
 	xzp = NULL;
 	/*
 	 * Attempt to lock directory; fail if entry doesn't exist.
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, realnmp))) {
 		if (realnmp)
 			pn_free(realnmp);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
 		goto out;
 	}
 
 	/*
 	 * Need to use rmdir for removing directories.
 	 */
 	if (S_ISDIR(ZTOI(zp)->i_mode)) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 	mutex_enter(&zp->z_lock);
 	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
 	    !zn_has_cached_data(zp, 0, LLONG_MAX);
 	mutex_exit(&zp->z_lock);
 
 	/*
 	 * We may delete the znode now, or we may put it in the unlinked set;
 	 * it depends on whether we're the last link, and on whether there are
 	 * other holds on the inode.  So we dmu_tx_hold() the right things to
 	 * allow for either case.
 	 */
 	obj = zp->z_id;
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	if (may_delete_now) {
 		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
 		/* if the file is too big, only hold_free a token amount */
 		dmu_tx_hold_free(tx, zp->z_id, 0,
 		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
 	}
 
 	/* are there any extended attributes? */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 	    &xattr_obj, sizeof (xattr_obj));
 	if (error == 0 && xattr_obj) {
 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 		ASSERT0(error);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
 	mutex_enter(&zp->z_lock);
 	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 	mutex_exit(&zp->z_lock);
 
 	/* charge as an update -- would be nice not to charge at all */
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	/*
 	 * Mark this transaction as typically resulting in a net free of space
 	 */
 	dmu_tx_mark_netfree(tx);
 
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(zp);
 			if (xzp)
 				zrele(xzp);
 			goto top;
 		}
 		if (realnmp)
 			pn_free(realnmp);
 		dmu_tx_abort(tx);
 		zrele(zp);
 		if (xzp)
 			zrele(xzp);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Remove the directory entry.
 	 */
 	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		goto out;
 	}
 
 	if (unlinked) {
 		/*
 		 * Hold z_lock so that we can make sure that the ACL obj
 		 * hasn't changed.  Could have been deleted due to
 		 * zfs_sa_upgrade().
 		 */
 		mutex_enter(&zp->z_lock);
 		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
 		delete_now = may_delete_now && !toobig &&
 		    atomic_read(&ZTOI(zp)->i_count) == 1 &&
 		    !zn_has_cached_data(zp, 0, LLONG_MAX) &&
 		    xattr_obj == xattr_obj_unlinked &&
 		    zfs_external_acl(zp) == acl_obj;
 	}
 
 	if (delete_now) {
 		if (xattr_obj_unlinked) {
 			ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
 			mutex_enter(&xzp->z_lock);
 			xzp->z_unlinked = B_TRUE;
 			clear_nlink(ZTOI(xzp));
 			links = 0;
 			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
 			    &links, sizeof (links), tx);
 			ASSERT3U(error,  ==,  0);
 			mutex_exit(&xzp->z_lock);
 			zfs_unlinked_add(xzp, tx);
 
 			if (zp->z_is_sa)
 				error = sa_remove(zp->z_sa_hdl,
 				    SA_ZPL_XATTR(zfsvfs), tx);
 			else
 				error = sa_update(zp->z_sa_hdl,
 				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
 				    sizeof (uint64_t), tx);
 			ASSERT0(error);
 		}
 		/*
 		 * Add to the unlinked set because a new reference could be
 		 * taken concurrently resulting in a deferred destruction.
 		 */
 		zfs_unlinked_add(zp, tx);
 		mutex_exit(&zp->z_lock);
 	} else if (unlinked) {
 		mutex_exit(&zp->z_lock);
 		zfs_unlinked_add(zp, tx);
 	}
 
 	txtype = TX_REMOVE;
 	if (flags & FIGNORECASE)
 		txtype |= TX_CI;
 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
 
 	dmu_tx_commit(tx);
 out:
 	if (realnmp)
 		pn_free(realnmp);
 
 	zfs_dirent_unlock(dl);
 	zfs_znode_update_vfs(dzp);
 	zfs_znode_update_vfs(zp);
 
 	if (delete_now)
 		zrele(zp);
 	else
 		zfs_zrele_async(zp);
 
 	if (xzp) {
 		zfs_znode_update_vfs(xzp);
 		zfs_zrele_async(xzp);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Create a new directory and insert it into dzp using the name
  * provided.  Return a pointer to the inserted directory.
  *
  *	IN:	dzp	- znode of directory to add subdir to.
  *		dirname	- name of new directory.
  *		vap	- attributes of new directory.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *		vsecp	- ACL to be set
  *
  *	OUT:	zpp	- znode of created directory.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	dzp - ctime|mtime updated
  *	zpp - ctime|mtime|atime updated
  */
 /*ARGSUSED*/
 int
 zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
     cred_t *cr, int flags, vsecattr_t *vsecp)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	uint64_t	txtype;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zf = ZNEW;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	boolean_t	waited = B_FALSE;
 
 	ASSERT(S_ISDIR(vap->va_mode));
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	uid = crgetuid(cr);
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	if (dirname == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    vsecp, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	/*
 	 * First make sure the new directory doesn't exist.
 	 *
 	 * Existence is checked first to make sure we don't return
 	 * EACCES instead of EEXIST which can cause some applications
 	 * to fail.
 	 */
 top:
 	*zpp = NULL;
 
 	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
 	    NULL, NULL))) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	/*
 	 * Add a new entry to the directory.
 	 */
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create new node.
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	/*
 	 * Now put new name in parent dir.
 	 */
 	error = zfs_link_create(dl, zp, tx, ZNEW);
 	if (error != 0) {
 		zfs_znode_delete(zp, tx);
 		remove_inode_hash(ZTOI(zp));
 		goto out;
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	*zpp = zp;
 
 	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
 	if (flags & FIGNORECASE)
 		txtype |= TX_CI;
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
 	    acl_ids.z_fuidp, vap);
 
 out:
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	if (error != 0) {
 		zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Remove a directory subdir entry.  If the current working
  * directory is the same as the subdir to be removed, the
  * remove will fail.
  *
  *	IN:	dzp	- znode of directory to remove from.
  *		name	- name of directory to be removed.
  *		cwd	- inode of current working directory.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dzp - ctime|mtime updated
  */
 /*ARGSUSED*/
 int
 zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
     int flags)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zflg = ZEXISTS;
 	boolean_t	waited = B_FALSE;
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 top:
 	zp = NULL;
 
 	/*
 	 * Attempt to lock directory; fail if entry doesn't exist.
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, NULL))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
 		goto out;
 	}
 
 	if (!S_ISDIR(ZTOI(zp)->i_mode)) {
 		error = SET_ERROR(ENOTDIR);
 		goto out;
 	}
 
 	if (zp == cwd) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	/*
 	 * Grab a lock on the directory to make sure that no one is
 	 * trying to add (or lookup) entries while we are removing it.
 	 */
 	rw_enter(&zp->z_name_lock, RW_WRITER);
 
 	/*
 	 * Grab a lock on the parent pointer to make sure we play well
 	 * with the treewalk and directory rename code.
 	 */
 	rw_enter(&zp->z_parent_lock, RW_WRITER);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		rw_exit(&zp->z_parent_lock);
 		rw_exit(&zp->z_name_lock);
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(zp);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zrele(zp);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
 
 	if (error == 0) {
 		uint64_t txtype = TX_RMDIR;
 		if (flags & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
 		    B_FALSE);
 	}
 
 	dmu_tx_commit(tx);
 
 	rw_exit(&zp->z_parent_lock);
 	rw_exit(&zp->z_name_lock);
 out:
 	zfs_dirent_unlock(dl);
 
 	zfs_znode_update_vfs(dzp);
 	zfs_znode_update_vfs(zp);
 	zrele(zp);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Read directory entries from the given directory cursor position and emit
  * name and position for each entry.
  *
  *	IN:	ip	- inode of directory to read.
  *		ctx	- directory entry context.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - atime updated
  *
  * Note that the low 4 bits of the cookie returned by zap is always zero.
  * This allows us to use the low range for "special" directory entries:
  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
  * we use the offset 2 for the '.zfs' directory.
  */
 /* ARGSUSED */
 int
 zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	objset_t	*os;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	int		error;
 	uint8_t		prefetch;
 	uint8_t		type;
 	int		done = 0;
 	uint64_t	parent;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0)
 		goto out;
 
 	/*
 	 * Quit if directory has been removed (posix)
 	 */
 	if (zp->z_unlinked)
 		goto out;
 
 	error = 0;
 	os = zfsvfs->z_os;
 	offset = ctx->pos;
 	prefetch = zp->z_zn_prefetch;
 
 	/*
 	 * Initialize the iterator cursor.
 	 */
 	if (offset <= 3) {
 		/*
 		 * Start iteration from the beginning of the directory.
 		 */
 		zap_cursor_init(&zc, os, zp->z_id);
 	} else {
 		/*
 		 * The offset is a serialized cursor.
 		 */
 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
 	}
 
 	/*
 	 * Transform to file-system independent format
 	 */
 	while (!done) {
 		uint64_t objnum;
 		/*
 		 * Special case `.', `..', and `.zfs'.
 		 */
 		if (offset == 0) {
 			(void) strcpy(zap.za_name, ".");
 			zap.za_normalization_conflict = 0;
 			objnum = zp->z_id;
 			type = DT_DIR;
 		} else if (offset == 1) {
 			(void) strcpy(zap.za_name, "..");
 			zap.za_normalization_conflict = 0;
 			objnum = parent;
 			type = DT_DIR;
 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
 			zap.za_normalization_conflict = 0;
 			objnum = ZFSCTL_INO_ROOT;
 			type = DT_DIR;
 		} else {
 			/*
 			 * Grab next entry.
 			 */
 			if ((error = zap_cursor_retrieve(&zc, &zap))) {
 				if (error == ENOENT)
 					break;
 				else
 					goto update;
 			}
 
 			/*
 			 * Allow multiple entries provided the first entry is
 			 * the object id.  Non-zpl consumers may safely make
 			 * use of the additional space.
 			 *
 			 * XXX: This should be a feature flag for compatibility
 			 */
 			if (zap.za_integer_length != 8 ||
 			    zap.za_num_integers == 0) {
 				cmn_err(CE_WARN, "zap_readdir: bad directory "
 				    "entry, obj = %lld, offset = %lld, "
 				    "length = %d, num = %lld\n",
 				    (u_longlong_t)zp->z_id,
 				    (u_longlong_t)offset,
 				    zap.za_integer_length,
 				    (u_longlong_t)zap.za_num_integers);
 				error = SET_ERROR(ENXIO);
 				goto update;
 			}
 
 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 		}
 
 		done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
 		    objnum, type);
 		if (done)
 			break;
 
 		/* Prefetch znode */
 		if (prefetch) {
 			dmu_prefetch(os, objnum, 0, 0, 0,
 			    ZIO_PRIORITY_SYNC_READ);
 		}
 
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */
 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
 			zap_cursor_advance(&zc);
 			offset = zap_cursor_serialize(&zc);
 		} else {
 			offset += 1;
 		}
 		ctx->pos = offset;
 	}
 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
 
 update:
 	zap_cursor_fini(&zc);
 	if (error == ENOENT)
 		error = 0;
 out:
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Get the basic file attributes and place them in the provided kstat
  * structure.  The inode is assumed to be the authoritative source
  * for most of the attributes.  However, the znode currently has the
  * authoritative atime, blksize, and block count.
  *
  *	IN:	ip	- inode of file.
  *
  *	OUT:	sp	- kstat values.
  *
  *	RETURN:	0 (always succeeds)
  */
 /* ARGSUSED */
 int
 #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
 zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
     struct kstat *sp)
 #else
 zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
 #endif
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint32_t blksize;
 	u_longlong_t nblocks;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	mutex_enter(&zp->z_lock);
 
 #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
 	zpl_generic_fillattr(user_ns, request_mask, ip, sp);
 #else
 	zpl_generic_fillattr(user_ns, ip, sp);
 #endif
 	/*
 	 * +1 link count for root inode with visible '.zfs' directory.
 	 */
 	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
 		if (sp->nlink < ZFS_LINK_MAX)
 			sp->nlink++;
 
 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
 	sp->blksize = blksize;
 	sp->blocks = nblocks;
 
 	if (unlikely(zp->z_blksz == 0)) {
 		/*
 		 * Block size hasn't been set; suggest maximal I/O transfers.
 		 */
 		sp->blksize = zfsvfs->z_max_blksz;
 	}
 
 	mutex_exit(&zp->z_lock);
 
 	/*
 	 * Required to prevent NFS client from detecting different inode
 	 * numbers of snapshot root dentry before and after snapshot mount.
 	 */
 	if (zfsvfs->z_issnap) {
 		if (ip->i_sb->s_root->d_inode == ip)
 			sp->ino = ZFSCTL_INO_SNAPDIRS -
 			    dmu_objset_id(zfsvfs->z_os);
 	}
 
 	ZFS_EXIT(zfsvfs);
 
 	return (0);
 }
 
 /*
  * For the operation of changing file's user/group/project, we need to
  * handle not only the main object that is assigned to the file directly,
  * but also the ones that are used by the file via hidden xattr directory.
  *
  * Because the xattr directory may contains many EA entries, as to it may
  * be impossible to change all of them via the transaction of changing the
  * main object's user/group/project attributes. Then we have to change them
  * via other multiple independent transactions one by one. It may be not good
  * solution, but we have no better idea yet.
  */
 static int
 zfs_setattr_dir(znode_t *dzp)
 {
 	struct inode	*dxip = ZTOI(dzp);
 	struct inode	*xip = NULL;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	objset_t	*os = zfsvfs->z_os;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	zfs_dirlock_t	*dl;
 	znode_t		*zp = NULL;
 	dmu_tx_t	*tx = NULL;
 	uint64_t	uid, gid;
 	sa_bulk_attr_t	bulk[4];
 	int		count;
 	int		err;
 
 	zap_cursor_init(&zc, os, dzp->z_id);
 	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
 		count = 0;
 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
 			err = ENXIO;
 			break;
 		}
 
 		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
 		    ZEXISTS, NULL, NULL);
 		if (err == ENOENT)
 			goto next;
 		if (err)
 			break;
 
 		xip = ZTOI(zp);
 		if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
 		    KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
 		    zp->z_projid == dzp->z_projid)
 			goto next;
 
 		tx = dmu_tx_create(os);
 		if (!(zp->z_pflags & ZFS_PROJID))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 
 		err = dmu_tx_assign(tx, TXG_WAIT);
 		if (err)
 			break;
 
 		mutex_enter(&dzp->z_lock);
 
 		if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
 			xip->i_uid = dxip->i_uid;
 			uid = zfs_uid_read(dxip);
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &uid, sizeof (uid));
 		}
 
 		if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
 			xip->i_gid = dxip->i_gid;
 			gid = zfs_gid_read(dxip);
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
 			    &gid, sizeof (gid));
 		}
 
 		if (zp->z_projid != dzp->z_projid) {
 			if (!(zp->z_pflags & ZFS_PROJID)) {
 				zp->z_pflags |= ZFS_PROJID;
 				SA_ADD_BULK_ATTR(bulk, count,
 				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
 				    sizeof (zp->z_pflags));
 			}
 
 			zp->z_projid = dzp->z_projid;
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
 			    NULL, &zp->z_projid, sizeof (zp->z_projid));
 		}
 
 		mutex_exit(&dzp->z_lock);
 
 		if (likely(count > 0)) {
 			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 			dmu_tx_commit(tx);
 		} else {
 			dmu_tx_abort(tx);
 		}
 		tx = NULL;
 		if (err != 0 && err != ENOENT)
 			break;
 
 next:
 		if (zp) {
 			zrele(zp);
 			zp = NULL;
 			zfs_dirent_unlock(dl);
 		}
 		zap_cursor_advance(&zc);
 	}
 
 	if (tx)
 		dmu_tx_abort(tx);
 	if (zp) {
 		zrele(zp);
 		zfs_dirent_unlock(dl);
 	}
 	zap_cursor_fini(&zc);
 
 	return (err == ENOENT ? 0 : err);
 }
 
 /*
  * Set the file attributes to the values contained in the
  * vattr structure.
  *
  *	IN:	zp	- znode of file to be modified.
  *		vap	- new attribute values.
  *			  If ATTR_XVATTR set, then optional attrs are being set
  *		flags	- ATTR_UTIME set if non-default time values provided.
  *			- ATTR_NOACLCHECK (CIFS context only).
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - ctime updated, mtime updated if size changed.
  */
 /* ARGSUSED */
 int
 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 {
 	struct inode	*ip;
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	objset_t	*os = zfsvfs->z_os;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	vattr_t		oldva;
 	xvattr_t	*tmpxvattr;
 	uint_t		mask = vap->va_mask;
 	uint_t		saved_mask = 0;
 	int		trim_mask = 0;
 	uint64_t	new_mode;
 	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
 	uint64_t	xattr_obj;
 	uint64_t	mtime[2], ctime[2], atime[2];
 	uint64_t	projid = ZFS_INVALID_PROJID;
 	znode_t		*attrzp;
 	int		need_policy = FALSE;
 	int		err, err2 = 0;
 	zfs_fuid_info_t *fuidp = NULL;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t	*xoap;
 	zfs_acl_t	*aclp;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	boolean_t	fuid_dirtied = B_FALSE;
 	boolean_t	handle_eadir = B_FALSE;
 	sa_bulk_attr_t	*bulk, *xattr_bulk;
 	int		count = 0, xattr_count = 0, bulks = 8;
 
 	if (mask == 0)
 		return (0);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	ip = ZTOI(zp);
 
 	/*
 	 * If this is a xvattr_t, then get a pointer to the structure of
 	 * optional attributes.  If this is NULL, then we have a vattr_t.
 	 */
 	xoap = xva_getxoptattr(xvap);
 	if (xoap != NULL && (mask & ATTR_XVATTR)) {
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
 				ZFS_EXIT(zfsvfs);
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
 				ZFS_EXIT(zfsvfs);
 				return (SET_ERROR(EINVAL));
 			}
 
 			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
 				projid = ZFS_INVALID_PROJID;
 			else
 				need_policy = TRUE;
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
 		    (xoap->xoa_projinherit !=
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(ENOTSUP));
 		}
 	}
 
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
 	 * that file system is at proper version level
 	 */
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & ATTR_XVATTR))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
 	xva_init(tmpxvattr);
 
 	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
 	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
 
 	/*
 	 * Immutable files can only alter immutable bit and atime
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
 	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		err = SET_ERROR(EPERM);
 		goto out3;
 	}
 
 	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
 		err = SET_ERROR(EPERM);
 		goto out3;
 	}
 
 	/*
 	 * Verify timestamps doesn't overflow 32 bits.
 	 * ZFS can handle large timestamps, but 32bit syscalls can't
 	 * handle times greater than 2039.  This check should be removed
 	 * once large timestamps are fully supported.
 	 */
 	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
 		if (((mask & ATTR_ATIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & ATTR_MTIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
 			err = SET_ERROR(EOVERFLOW);
 			goto out3;
 		}
 	}
 
 top:
 	attrzp = NULL;
 	aclp = NULL;
 
 	/* Can this be moved to before the top label? */
 	if (zfs_is_readonly(zfsvfs)) {
 		err = SET_ERROR(EROFS);
 		goto out3;
 	}
 
 	/*
 	 * First validate permissions
 	 */
 
 	if (mask & ATTR_SIZE) {
 		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
 		if (err)
 			goto out3;
 
 		/*
 		 * XXX - Note, we are not providing any open
 		 * mode flags here (like FNDELAY), so we may
 		 * block if there are locks present... this
 		 * should be addressed in openat().
 		 */
 		/* XXX - would it be OK to generate a log record here? */
 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
 		if (err)
 			goto out3;
 	}
 
 	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
 	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
 		    skipaclchk, cr);
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID)) {
 		int	idmask = (mask & (ATTR_UID|ATTR_GID));
 		int	take_owner;
 		int	take_group;
 
 		/*
 		 * NOTE: even if a new mode is being set,
 		 * we may clear S_ISUID/S_ISGID bits.
 		 */
 
 		if (!(mask & ATTR_MODE))
 			vap->va_mode = zp->z_mode;
 
 		/*
 		 * Take ownership or chgrp to group we are a member of
 		 */
 
 		take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
 		take_group = (mask & ATTR_GID) &&
 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
 
 		/*
 		 * If both ATTR_UID and ATTR_GID are set then take_owner and
 		 * take_group must both be set in order to allow taking
 		 * ownership.
 		 *
 		 * Otherwise, send the check through secpolicy_vnode_setattr()
 		 *
 		 */
 
 		if (((idmask == (ATTR_UID|ATTR_GID)) &&
 		    take_owner && take_group) ||
 		    ((idmask == ATTR_UID) && take_owner) ||
 		    ((idmask == ATTR_GID) && take_group)) {
 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
 			    skipaclchk, cr) == 0) {
 				/*
 				 * Remove setuid/setgid for non-privileged users
 				 */
 				(void) secpolicy_setid_clear(vap, cr);
 				trim_mask = (mask & (ATTR_UID|ATTR_GID));
 			} else {
 				need_policy =  TRUE;
 			}
 		} else {
 			need_policy =  TRUE;
 		}
 	}
 
 	mutex_enter(&zp->z_lock);
 	oldva.va_mode = zp->z_mode;
 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
 	if (mask & ATTR_XVATTR) {
 		/*
 		 * Update xvattr mask to include only those attributes
 		 * that are actually changing.
 		 *
 		 * the bits will be restored prior to actually setting
 		 * the attributes so the caller thinks they were set.
 		 */
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			if (xoap->xoa_appendonly !=
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
 				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
 			if (xoap->xoa_projinherit !=
 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
 				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			if (xoap->xoa_nounlink !=
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
 				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			if (xoap->xoa_immutable !=
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
 				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			if (xoap->xoa_nodump !=
 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NODUMP);
 				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			if (xoap->xoa_av_modified !=
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
 				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			if ((!S_ISREG(ip->i_mode) &&
 			    xoap->xoa_av_quarantined) ||
 			    xoap->xoa_av_quarantined !=
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
 				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			mutex_exit(&zp->z_lock);
 			err = SET_ERROR(EPERM);
 			goto out3;
 		}
 
 		if (need_policy == FALSE &&
 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
 			need_policy = TRUE;
 		}
 	}
 
 	mutex_exit(&zp->z_lock);
 
 	if (mask & ATTR_MODE) {
 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
 			err = secpolicy_setid_setsticky_clear(ip, vap,
 			    &oldva, cr);
 			if (err)
 				goto out3;
 
 			trim_mask |= ATTR_MODE;
 		} else {
 			need_policy = TRUE;
 		}
 	}
 
 	if (need_policy) {
 		/*
 		 * If trim_mask is set then take ownership
 		 * has been granted or write_acl is present and user
 		 * has the ability to modify mode.  In that case remove
 		 * UID|GID and or MODE from mask so that
 		 * secpolicy_vnode_setattr() doesn't revoke it.
 		 */
 
 		if (trim_mask) {
 			saved_mask = vap->va_mask;
 			vap->va_mask &= ~trim_mask;
 		}
 		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
 		if (err)
 			goto out3;
 
 		if (trim_mask)
 			vap->va_mask |= saved_mask;
 	}
 
 	/*
 	 * secpolicy_vnode_setattr, or take ownership may have
 	 * changed va_mask
 	 */
 	mask = vap->va_mask;
 
 	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
 		handle_eadir = B_TRUE;
 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj, sizeof (xattr_obj));
 
 		if (err == 0 && xattr_obj) {
 			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
 			if (err)
 				goto out2;
 		}
 		if (mask & ATTR_UID) {
 			new_kuid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
 			if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
 			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
 			    new_kuid)) {
 				if (attrzp)
 					zrele(attrzp);
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (mask & ATTR_GID) {
 			new_kgid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
 			if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
 			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 			    new_kgid)) {
 				if (attrzp)
 					zrele(attrzp);
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (projid != ZFS_INVALID_PROJID &&
 		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
 			if (attrzp)
 				zrele(attrzp);
 			err = EDQUOT;
 			goto out2;
 		}
 	}
 	tx = dmu_tx_create(os);
 
 	if (mask & ATTR_MODE) {
 		uint64_t pmode = zp->z_mode;
 		uint64_t acl_obj;
 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
 
 		if (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_RESTRICTED &&
 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
 			err = EPERM;
 			goto out;
 		}
 
 		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
 			goto out;
 
 		mutex_enter(&zp->z_lock);
 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
 			/*
 			 * Are we upgrading ACL from old V0 format
 			 * to V1 format?
 			 */
 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 			    zfs_znode_acl_version(zp) ==
 			    ZFS_ACL_VERSION_INITIAL) {
 				dmu_tx_hold_free(tx, acl_obj, 0,
 				    DMU_OBJECT_END);
 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 				    0, aclp->z_acl_bytes);
 			} else {
 				dmu_tx_hold_write(tx, acl_obj, 0,
 				    aclp->z_acl_bytes);
 			}
 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, aclp->z_acl_bytes);
 		}
 		mutex_exit(&zp->z_lock);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	} else {
 		if (((mask & ATTR_XVATTR) &&
 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
 		    (projid != ZFS_INVALID_PROJID &&
 		    !(zp->z_pflags & ZFS_PROJID)))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	}
 
 	if (attrzp) {
 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
 	}
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err)
 		goto out;
 
 	count = 0;
 	/*
 	 * Set each attribute requested.
 	 * We group settings according to the locks they need to acquire.
 	 *
 	 * Note: you cannot set ctime directly, although it will be
 	 * updated as a side-effect of calling this function.
 	 */
 
 	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
 		/*
 		 * For the existed object that is upgraded from old system,
 		 * its on-disk layout has no slot for the project ID attribute.
 		 * But quota accounting logic needs to access related slots by
 		 * offset directly. So we need to adjust old objects' layout
 		 * to make the project ID to some unified and fixed offset.
 		 */
 		if (attrzp)
 			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
 		if (err == 0)
 			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
 
 		if (unlikely(err == EEXIST))
 			err = 0;
 		else if (err != 0)
 			goto out;
 		else
 			projid = ZFS_INVALID_PROJID;
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 		mutex_enter(&zp->z_acl_lock);
 	mutex_enter(&zp->z_lock);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (attrzp) {
 		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 			mutex_enter(&attrzp->z_acl_lock);
 		mutex_enter(&attrzp->z_lock);
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
 		    sizeof (attrzp->z_pflags));
 		if (projid != ZFS_INVALID_PROJID) {
 			attrzp->z_projid = projid;
 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
 			    sizeof (attrzp->z_projid));
 		}
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID)) {
 
 		if (mask & ATTR_UID) {
 			ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
 			new_uid = zfs_uid_read(ZTOI(zp));
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &new_uid, sizeof (new_uid));
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
 				    sizeof (new_uid));
 				ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
 			}
 		}
 
 		if (mask & ATTR_GID) {
 			ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
 			new_gid = zfs_gid_read(ZTOI(zp));
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
 			    NULL, &new_gid, sizeof (new_gid));
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
 				    sizeof (new_gid));
 				ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
 			}
 		}
 		if (!(mask & ATTR_MODE)) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
 			    NULL, &new_mode, sizeof (new_mode));
 			new_mode = zp->z_mode;
 		}
 		err = zfs_acl_chown_setattr(zp);
 		ASSERT(err == 0);
 		if (attrzp) {
 			err = zfs_acl_chown_setattr(attrzp);
 			ASSERT(err == 0);
 		}
 	}
 
 	if (mask & ATTR_MODE) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 		    &new_mode, sizeof (new_mode));
 		zp->z_mode = ZTOI(zp)->i_mode = new_mode;
 		ASSERT3P(aclp, !=, NULL);
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT0(err);
 		if (zp->z_acl_cached)
 			zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = aclp;
 		aclp = NULL;
 	}
 
 	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
 		zp->z_atime_dirty = B_FALSE;
-		ZFS_TIME_ENCODE(&ip->i_atime, atime);
+		inode_timespec_t tmp_atime;
+		ZFS_TIME_ENCODE(&tmp_atime, atime);
+		zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 		    &atime, sizeof (atime));
 	}
 
 	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-		ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
-		    vap->va_mtime, ZTOI(zp));
+		zpl_inode_set_mtime_to_ts(ZTOI(zp),
+		    zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    mtime, sizeof (mtime));
 	}
 
 	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
 		zpl_inode_set_ctime_to_ts(ZTOI(zp),
 		    zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    ctime, sizeof (ctime));
 	}
 
 	if (projid != ZFS_INVALID_PROJID) {
 		zp->z_projid = projid;
 		SA_ADD_BULK_ATTR(bulk, count,
 		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
 		    sizeof (zp->z_projid));
 	}
 
 	if (attrzp && mask) {
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
 		    sizeof (ctime));
 	}
 
 	/*
 	 * Do this after setting timestamps to prevent timestamp
 	 * update from toggling bit
 	 */
 
 	if (xoap && (mask & ATTR_XVATTR)) {
 
 		/*
 		 * restore trimmed off masks
 		 * so that return masks can be set for caller.
 		 */
 
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
 			XVA_SET_REQ(xvap, XAT_NODUMP);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
 			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			ASSERT(S_ISREG(ip->i_mode));
 
 		zfs_xvattr_set(zp, xvap, tx);
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (mask != 0)
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
 
 	mutex_exit(&zp->z_lock);
 	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 		mutex_exit(&zp->z_acl_lock);
 
 	if (attrzp) {
 		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 			mutex_exit(&attrzp->z_acl_lock);
 		mutex_exit(&attrzp->z_lock);
 	}
 out:
 	if (err == 0 && xattr_count > 0) {
 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
 		    xattr_count, tx);
 		ASSERT(err2 == 0);
 	}
 
 	if (aclp)
 		zfs_acl_free(aclp);
 
 	if (fuidp) {
 		zfs_fuid_info_free(fuidp);
 		fuidp = NULL;
 	}
 
 	if (err) {
 		dmu_tx_abort(tx);
 		if (attrzp)
 			zrele(attrzp);
 		if (err == ERESTART)
 			goto top;
 	} else {
 		if (count > 0)
 			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		dmu_tx_commit(tx);
 		if (attrzp) {
 			if (err2 == 0 && handle_eadir)
 				err2 = zfs_setattr_dir(attrzp);
 			zrele(attrzp);
 		}
 		zfs_znode_update_vfs(zp);
 	}
 
 out2:
 	if (os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 out3:
 	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(tmpxvattr, sizeof (xvattr_t));
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }
 
 typedef struct zfs_zlock {
 	krwlock_t	*zl_rwlock;	/* lock we acquired */
 	znode_t		*zl_znode;	/* znode we held */
 	struct zfs_zlock *zl_next;	/* next in list */
 } zfs_zlock_t;
 
 /*
  * Drop locks and release vnodes that were held by zfs_rename_lock().
  */
 static void
 zfs_rename_unlock(zfs_zlock_t **zlpp)
 {
 	zfs_zlock_t *zl;
 
 	while ((zl = *zlpp) != NULL) {
 		if (zl->zl_znode != NULL)
 			zfs_zrele_async(zl->zl_znode);
 		rw_exit(zl->zl_rwlock);
 		*zlpp = zl->zl_next;
 		kmem_free(zl, sizeof (*zl));
 	}
 }
 
 /*
  * Search back through the directory tree, using the ".." entries.
  * Lock each directory in the chain to prevent concurrent renames.
  * Fail any attempt to move a directory into one of its own descendants.
  * XXX - z_parent_lock can overlap with map or grow locks
  */
 static int
 zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
 {
 	zfs_zlock_t	*zl;
 	znode_t		*zp = tdzp;
 	uint64_t	rootid = ZTOZSB(zp)->z_root;
 	uint64_t	oidp = zp->z_id;
 	krwlock_t	*rwlp = &szp->z_parent_lock;
 	krw_t		rw = RW_WRITER;
 
 	/*
 	 * First pass write-locks szp and compares to zp->z_id.
 	 * Later passes read-lock zp and compare to zp->z_parent.
 	 */
 	do {
 		if (!rw_tryenter(rwlp, rw)) {
 			/*
 			 * Another thread is renaming in this path.
 			 * Note that if we are a WRITER, we don't have any
 			 * parent_locks held yet.
 			 */
 			if (rw == RW_READER && zp->z_id > szp->z_id) {
 				/*
 				 * Drop our locks and restart
 				 */
 				zfs_rename_unlock(&zl);
 				*zlpp = NULL;
 				zp = tdzp;
 				oidp = zp->z_id;
 				rwlp = &szp->z_parent_lock;
 				rw = RW_WRITER;
 				continue;
 			} else {
 				/*
 				 * Wait for other thread to drop its locks
 				 */
 				rw_enter(rwlp, rw);
 			}
 		}
 
 		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
 		zl->zl_rwlock = rwlp;
 		zl->zl_znode = NULL;
 		zl->zl_next = *zlpp;
 		*zlpp = zl;
 
 		if (oidp == szp->z_id)		/* We're a descendant of szp */
 			return (SET_ERROR(EINVAL));
 
 		if (oidp == rootid)		/* We've hit the top */
 			return (0);
 
 		if (rw == RW_READER) {		/* i.e. not the first pass */
 			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
 			if (error)
 				return (error);
 			zl->zl_znode = zp;
 		}
 		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
 		    &oidp, sizeof (oidp));
 		rwlp = &zp->z_parent_lock;
 		rw = RW_READER;
 
 	} while (zp->z_id != sdzp->z_id);
 
 	return (0);
 }
 
 /*
  * Move an entry from the provided source directory to the target
  * directory.  Change the entry name as indicated.
  *
  *	IN:	sdzp	- Source directory containing the "old entry".
  *		snm	- Old entry name.
  *		tdzp	- Target directory to contain the "new entry".
  *		tnm	- New entry name.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	sdzp,tdzp - ctime|mtime updated
  */
 /*ARGSUSED*/
 int
 zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
     cred_t *cr, int flags)
 {
 	znode_t		*szp, *tzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(sdzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*sdl, *tdl;
 	dmu_tx_t	*tx;
 	zfs_zlock_t	*zl;
 	int		cmp, serr, terr;
 	int		error = 0;
 	int		zflg = 0;
 	boolean_t	waited = B_FALSE;
 
 	if (snm == NULL || tnm == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(sdzp);
 	zilog = zfsvfs->z_log;
 
 	ZFS_VERIFY_ZP(tdzp);
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
 	 * super blocks.
 	 */
 	if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
 	    zfsctl_is_node(ZTOI(tdzp))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 top:
 	szp = NULL;
 	tzp = NULL;
 	zl = NULL;
 
 	/*
 	 * This is to prevent the creation of links into attribute space
 	 * by renaming a linked file into/outof an attribute directory.
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Lock source and target directory entries.  To prevent deadlock,
 	 * a lock ordering must be defined.  We lock the directory with
 	 * the smallest object id first, or if it's a tie, the one with
 	 * the lexically first name.
 	 */
 	if (sdzp->z_id < tdzp->z_id) {
 		cmp = -1;
 	} else if (sdzp->z_id > tdzp->z_id) {
 		cmp = 1;
 	} else {
 		/*
 		 * First compare the two name arguments without
 		 * considering any case folding.
 		 */
 		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
 
 		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
 		ASSERT(error == 0 || !zfsvfs->z_utf8);
 		if (cmp == 0) {
 			/*
 			 * POSIX: "If the old argument and the new argument
 			 * both refer to links to the same existing file,
 			 * the rename() function shall return successfully
 			 * and perform no other action."
 			 */
 			ZFS_EXIT(zfsvfs);
 			return (0);
 		}
 		/*
 		 * If the file system is case-folding, then we may
 		 * have some more checking to do.  A case-folding file
 		 * system is either supporting mixed case sensitivity
 		 * access or is completely case-insensitive.  Note
 		 * that the file system is always case preserving.
 		 *
 		 * In mixed sensitivity mode case sensitive behavior
 		 * is the default.  FIGNORECASE must be used to
 		 * explicitly request case insensitive behavior.
 		 *
 		 * If the source and target names provided differ only
 		 * by case (e.g., a request to rename 'tim' to 'Tim'),
 		 * we will treat this as a special case in the
 		 * case-insensitive mode: as long as the source name
 		 * is an exact match, we will allow this to proceed as
 		 * a name-change request.
 		 */
 		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
 		    flags & FIGNORECASE)) &&
 		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
 		    &error) == 0) {
 			/*
 			 * case preserving rename request, require exact
 			 * name matches
 			 */
 			zflg |= ZCIEXACT;
 			zflg &= ~ZCILOOK;
 		}
 	}
 
 	/*
 	 * If the source and destination directories are the same, we should
 	 * grab the z_name_lock of that directory only once.
 	 */
 	if (sdzp == tdzp) {
 		zflg |= ZHAVELOCK;
 		rw_enter(&sdzp->z_name_lock, RW_READER);
 	}
 
 	if (cmp < 0) {
 		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
 		    ZEXISTS | zflg, NULL, NULL);
 		terr = zfs_dirent_lock(&tdl,
 		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
 	} else {
 		terr = zfs_dirent_lock(&tdl,
 		    tdzp, tnm, &tzp, zflg, NULL, NULL);
 		serr = zfs_dirent_lock(&sdl,
 		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
 		    NULL, NULL);
 	}
 
 	if (serr) {
 		/*
 		 * Source entry invalid or not there.
 		 */
 		if (!terr) {
 			zfs_dirent_unlock(tdl);
 			if (tzp)
 				zrele(tzp);
 		}
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (strcmp(snm, "..") == 0)
 			serr = EINVAL;
 		ZFS_EXIT(zfsvfs);
 		return (serr);
 	}
 	if (terr) {
 		zfs_dirent_unlock(sdl);
 		zrele(szp);
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (strcmp(tnm, "..") == 0)
 			terr = EINVAL;
 		ZFS_EXIT(zfsvfs);
 		return (terr);
 	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow renames into our tree when the project
 	 * IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	/*
 	 * Must have write access at the source to remove the old entry
 	 * and write access at the target to create the new entry.
 	 * Note that if target and source are the same, this can be
 	 * done in a single check.
 	 */
 
 	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
 		goto out;
 
 	if (S_ISDIR(ZTOI(szp)->i_mode)) {
 		/*
 		 * Check to make sure rename is valid.
 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
 		 */
 		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
 			goto out;
 	}
 
 	/*
 	 * Does target exist?
 	 */
 	if (tzp) {
 		/*
 		 * Source and target must be the same type.
 		 */
 		if (S_ISDIR(ZTOI(szp)->i_mode)) {
 			if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
 				error = SET_ERROR(ENOTDIR);
 				goto out;
 			}
 		} else {
 			if (S_ISDIR(ZTOI(tzp)->i_mode)) {
 				error = SET_ERROR(EISDIR);
 				goto out;
 			}
 		}
 		/*
 		 * POSIX dictates that when the source and target
 		 * entries refer to the same file object, rename
 		 * must do nothing and exit without error.
 		 */
 		if (szp->z_id == tzp->z_id) {
 			error = 0;
 			goto out;
 		}
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
 	if (sdzp != tdzp) {
 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tdzp);
 	}
 	if (tzp) {
 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tzp);
 	}
 
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (zl != NULL)
 			zfs_rename_unlock(&zl);
 		zfs_dirent_unlock(sdl);
 		zfs_dirent_unlock(tdl);
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(szp);
 			if (tzp)
 				zrele(tzp);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zrele(szp);
 		if (tzp)
 			zrele(tzp);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (tzp)	/* Attempt to remove the existing target */
 		error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
 
 	if (error == 0) {
 		error = zfs_link_create(tdl, szp, tx, ZRENAMING);
 		if (error == 0) {
 			szp->z_pflags |= ZFS_AV_MODIFIED;
 			if (tdzp->z_pflags & ZFS_PROJINHERIT)
 				szp->z_pflags |= ZFS_PROJINHERIT;
 
 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
 			ASSERT0(error);
 
 			error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
 			if (error == 0) {
 				zfs_log_rename(zilog, tx, TX_RENAME |
 				    (flags & FIGNORECASE ? TX_CI : 0), sdzp,
 				    sdl->dl_name, tdzp, tdl->dl_name, szp);
 			} else {
 				/*
 				 * At this point, we have successfully created
 				 * the target name, but have failed to remove
 				 * the source name.  Since the create was done
 				 * with the ZRENAMING flag, there are
 				 * complications; for one, the link count is
 				 * wrong.  The easiest way to deal with this
 				 * is to remove the newly created target, and
 				 * return the original error.  This must
 				 * succeed; fortunately, it is very unlikely to
 				 * fail, since we just created it.
 				 */
 				VERIFY3U(zfs_link_destroy(tdl, szp, tx,
 				    ZRENAMING, NULL), ==, 0);
 			}
 		} else {
 			/*
 			 * If we had removed the existing target, subsequent
 			 * call to zfs_link_create() to add back the same entry
 			 * but, the new dnode (szp) should not fail.
 			 */
 			ASSERT(tzp == NULL);
 		}
 	}
 
 	dmu_tx_commit(tx);
 out:
 	if (zl != NULL)
 		zfs_rename_unlock(&zl);
 
 	zfs_dirent_unlock(sdl);
 	zfs_dirent_unlock(tdl);
 
 	zfs_znode_update_vfs(sdzp);
 	if (sdzp == tdzp)
 		rw_exit(&sdzp->z_name_lock);
 
 	if (sdzp != tdzp)
 		zfs_znode_update_vfs(tdzp);
 
 	zfs_znode_update_vfs(szp);
 	zrele(szp);
 	if (tzp) {
 		zfs_znode_update_vfs(tzp);
 		zrele(tzp);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Insert the indicated symbolic reference entry into the directory.
  *
  *	IN:	dzp	- Directory to contain new symbolic link.
  *		name	- Name of directory entry in dip.
  *		vap	- Attributes of new entry.
  *		link	- Name for new symlink entry.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *
  *	OUT:	zpp	- Znode for new symbolic link.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dip - ctime|mtime updated
  */
 /*ARGSUSED*/
 int
 zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
     znode_t **zpp, cred_t *cr, int flags)
 {
 	znode_t		*zp;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	uint64_t	len = strlen(link);
 	int		error;
 	int		zflg = ZNEW;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	fuid_dirtied;
 	uint64_t	txtype = TX_SYMLINK;
 	boolean_t	waited = B_FALSE;
 
 	ASSERT(S_ISLNK(vap->va_mode));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 	if (len > MAXPATHLEN) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 top:
 	*zpp = NULL;
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE + len);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create a new object for the symlink.
 	 * for version 4 ZPL datasets the symlink will be an SA attribute
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
 		    link, len, tx);
 	else
 		zfs_sa_symlink(zp, link, len, tx);
 	mutex_exit(&zp->z_lock);
 
 	zp->z_size = len;
 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 	    &zp->z_size, sizeof (zp->z_size), tx);
 	/*
 	 * Insert the new object into the directory.
 	 */
 	error = zfs_link_create(dl, zp, tx, ZNEW);
 	if (error != 0) {
 		zfs_znode_delete(zp, tx);
 		remove_inode_hash(ZTOI(zp));
 	} else {
 		if (flags & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (error == 0) {
 		*zpp = zp;
 
 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 			zil_commit(zilog, 0);
 	} else {
 		zrele(zp);
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Return, in the buffer contained in the provided uio structure,
  * the symbolic path referred to by ip.
  *
  *	IN:	ip	- inode of symbolic link
  *		uio	- structure to contain the link path.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - atime updated
  */
 /* ARGSUSED */
 int
 zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
 		    SA_ZPL_SYMLINK(zfsvfs), uio);
 	else
 		error = zfs_sa_readlink(zp, uio);
 	mutex_exit(&zp->z_lock);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Insert a new entry into directory tdzp referencing szp.
  *
  *	IN:	tdzp	- Directory to contain new entry.
  *		szp	- znode of new entry.
  *		name	- name of new entry.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	tdzp - ctime|mtime updated
  *	 szp - ctime updated
  */
 /* ARGSUSED */
 int
 zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
     int flags)
 {
 	struct inode *sip = ZTOI(szp);
 	znode_t		*tzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(tdzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zf = ZNEW;
 	uint64_t	parent;
 	uid_t		owner;
 	boolean_t	waited = B_FALSE;
 	boolean_t	is_tmpfile = 0;
 	uint64_t	txg;
 #ifdef HAVE_TMPFILE
 	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
 #endif
 	ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(tdzp);
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * POSIX dictates that we return EPERM here.
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (S_ISDIR(sip->i_mode)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	ZFS_VERIFY_ZP(szp);
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow hard link creation in our tree when the
 	 * project IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EXDEV));
 	}
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
 	 * super blocks.
 	 */
 	if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EXDEV));
 	}
 
 	/* Prevent links to .zfs/shares files */
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
 
 	/*
 	 * We do not support links between attributes and non-attributes
 	 * because of the potential security risk of creating links
 	 * into "normal" file space in order to circumvent restrictions
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
 	    cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 top:
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
 	if (is_tmpfile)
 		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	zfs_sa_upgrade_txholds(tx, szp);
 	zfs_sa_upgrade_txholds(tx, tdzp);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	/* unmark z_unlinked so zfs_link_create will not reject */
 	if (is_tmpfile)
 		szp->z_unlinked = B_FALSE;
 	error = zfs_link_create(dl, szp, tx, 0);
 
 	if (error == 0) {
 		uint64_t txtype = TX_LINK;
 		/*
 		 * tmpfile is created to be in z_unlinkedobj, so remove it.
 		 * Also, we don't log in ZIL, because all previous file
 		 * operation on the tmpfile are ignored by ZIL. Instead we
 		 * always wait for txg to sync to make sure all previous
 		 * operation are sync safe.
 		 */
 		if (is_tmpfile) {
 			VERIFY(zap_remove_int(zfsvfs->z_os,
 			    zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
 		} else {
 			if (flags & FIGNORECASE)
 				txtype |= TX_CI;
 			zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
 		}
 	} else if (is_tmpfile) {
 		/* restore z_unlinked since when linking failed */
 		szp->z_unlinked = B_TRUE;
 	}
 	txg = dmu_tx_get_txg(tx);
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
 
 	zfs_znode_update_vfs(tdzp);
 	zfs_znode_update_vfs(szp);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 static void
 zfs_putpage_sync_commit_cb(void *arg)
 {
 	struct page *pp = arg;
 
 	ClearPageError(pp);
 	end_page_writeback(pp);
 }
 
 static void
 zfs_putpage_async_commit_cb(void *arg)
 {
 	struct page *pp = arg;
 	znode_t *zp = ITOZ(pp->mapping->host);
 
 	ClearPageError(pp);
 	end_page_writeback(pp);
 	atomic_dec_32(&zp->z_async_writes_cnt);
 }
 
 /*
  * Push a page out to disk, once the page is on stable storage the
  * registered commit callback will be run as notification of completion.
  *
  *	IN:	ip	 - page mapped for inode.
  *		pp	 - page to push (page is locked)
  *		wbc	 - writeback control data
  *		for_sync - does the caller intend to wait synchronously for the
  *			   page writeback to complete?
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - ctime|mtime updated
  */
 /* ARGSUSED */
 int
 zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
     boolean_t for_sync)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	loff_t		offset;
 	loff_t		pgoff;
 	unsigned int	pglen;
 	dmu_tx_t	*tx;
 	caddr_t		va;
 	int		err = 0;
 	uint64_t	mtime[2], ctime[2];
-	inode_timespec_t tmp_ctime;
+	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[3];
 	int		cnt = 0;
 	struct address_space *mapping;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	ASSERT(PageLocked(pp));
 
 	pgoff = page_offset(pp);	/* Page byte-offset in file */
 	offset = i_size_read(ip);	/* File length in bytes */
 	pglen = MIN(PAGE_SIZE,		/* Page length in bytes */
 	    P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
 
 	/* Page is beyond end of file */
 	if (pgoff >= offset) {
 		unlock_page(pp);
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	/* Truncate page length to end of file */
 	if (pgoff + pglen > offset)
 		pglen = offset - pgoff;
 
 #if 0
 	/*
 	 * FIXME: Allow mmap writes past its quota.  The correct fix
 	 * is to register a page_mkwrite() handler to count the page
 	 * against its quota when it is about to be dirtied.
 	 */
 	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
 	    KUID_TO_SUID(ip->i_uid)) ||
 	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 	    KGID_TO_SGID(ip->i_gid)) ||
 	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
 	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 	    zp->z_projid))) {
 		err = EDQUOT;
 	}
 #endif
 
 	/*
 	 * The ordering here is critical and must adhere to the following
 	 * rules in order to avoid deadlocking in either zfs_read() or
 	 * zfs_free_range() due to a lock inversion.
 	 *
 	 * 1) The page must be unlocked prior to acquiring the range lock.
 	 *    This is critical because zfs_read() calls find_lock_page()
 	 *    which may block on the page lock while holding the range lock.
 	 *
 	 * 2) Before setting or clearing write back on a page the range lock
 	 *    must be held in order to prevent a lock inversion with the
 	 *    zfs_free_range() function.
 	 *
 	 * This presents a problem because upon entering this function the
 	 * page lock is already held.  To safely acquire the range lock the
 	 * page lock must be dropped.  This creates a window where another
 	 * process could truncate, invalidate, dirty, or write out the page.
 	 *
 	 * Therefore, after successfully reacquiring the range and page locks
 	 * the current page state is checked.  In the common case everything
 	 * will be as is expected and it can be written out.  However, if
 	 * the page state has changed it must be handled accordingly.
 	 */
 	mapping = pp->mapping;
 	redirty_page_for_writepage(wbc, pp);
 	unlock_page(pp);
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
 	    pgoff, pglen, RL_WRITER);
 	lock_page(pp);
 
 	/* Page mapping changed or it was no longer dirty, we're done */
 	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	/* Another process started write block if required */
 	if (PageWriteback(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 
 		if (wbc->sync_mode != WB_SYNC_NONE) {
 			/*
 			 * Speed up any non-sync page writebacks since
 			 * they may take several seconds to complete.
 			 * Refer to the comment in zpl_fsync() (when
 			 * HAVE_FSYNC_RANGE is defined) for details.
 			 */
 			if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
 				zil_commit(zfsvfs->z_log, zp->z_id);
 			}
 
 			if (PageWriteback(pp))
 #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
 				folio_wait_bit(page_folio(pp), PG_writeback);
 #else
 				wait_on_page_bit(pp, PG_writeback);
 #endif
 		}
 
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	/* Clear the dirty flag the required locks are held */
 	if (!clear_page_dirty_for_io(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	/*
 	 * Counterpart for redirty_page_for_writepage() above.  This page
 	 * was in fact not skipped and should not be counted as if it were.
 	 */
 	wbc->pages_skipped--;
 	if (!for_sync)
 		atomic_inc_32(&zp->z_async_writes_cnt);
 	set_page_writeback(pp);
 	unlock_page(pp);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_NOWAIT);
 	if (err != 0) {
 		if (err == ERESTART)
 			dmu_tx_wait(tx);
 
 		dmu_tx_abort(tx);
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 		filemap_dirty_folio(page_mapping(pp), page_folio(pp));
 #else
 		__set_page_dirty_nobuffers(pp);
 #endif
 		ClearPageError(pp);
 		end_page_writeback(pp);
 		if (!for_sync)
 			atomic_dec_32(&zp->z_async_writes_cnt);
 		zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (err);
 	}
 
 	va = kmap(pp);
 	ASSERT3U(pglen, <=, PAGE_SIZE);
 	dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
 	kunmap(pp);
 
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 
 	/* Preserve the mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	tmp_ctime = zpl_inode_get_ctime(ip);
-	ZFS_TIME_ENCODE(&tmp_ctime, ctime);
+	tmp_ts = zpl_inode_get_mtime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, mtime);
+	tmp_ts = zpl_inode_get_ctime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_seq++;
 
 	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
 
 	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
 	    for_sync ? zfs_putpage_sync_commit_cb :
 	    zfs_putpage_async_commit_cb, pp);
 
 	dmu_tx_commit(tx);
 
 	zfs_rangelock_exit(lr);
 
 	if (wbc->sync_mode != WB_SYNC_NONE) {
 		/*
 		 * Note that this is rarely called under writepages(), because
 		 * writepages() normally handles the entire commit for
 		 * performance reasons.
 		 */
 		zil_commit(zfsvfs->z_log, zp->z_id);
 	} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
 		/*
 		 * If the caller does not intend to wait synchronously
 		 * for this page writeback to complete and there are active
 		 * synchronous calls on this file, do a commit so that
 		 * the latter don't accidentally end up waiting for
 		 * our writeback to complete. Refer to the comment in
 		 * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
 		 */
 		zil_commit(zfsvfs->z_log, zp->z_id);
 	}
 
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
 
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }
 
 /*
  * Update the system attributes when the inode has been dirtied.  For the
  * moment we only update the mode, atime, mtime, and ctime.
  */
 int
 zfs_dirty_inode(struct inode *ip, int flags)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	dmu_tx_t	*tx;
 	uint64_t	mode, atime[2], mtime[2], ctime[2];
-	inode_timespec_t tmp_ctime;
+	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[4];
 	int		error = 0;
 	int		cnt = 0;
 
 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
 		return (0);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 #ifdef I_DIRTY_TIME
 	/*
 	 * This is the lazytime semantic introduced in Linux 4.0
 	 * This flag will only be called from update_time when lazytime is set.
 	 * (Note, I_DIRTY_SYNC will also set if not lazytime)
 	 * Fortunately mtime and ctime are managed within ZFS itself, so we
 	 * only need to dirty atime.
 	 */
 	if (flags == I_DIRTY_TIME) {
 		zp->z_atime_dirty = B_TRUE;
 		goto out;
 	}
 #endif
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		goto out;
 	}
 
 	mutex_enter(&zp->z_lock);
 	zp->z_atime_dirty = B_FALSE;
 
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 
 	/* Preserve the mode, mtime and ctime provided by the inode */
-	ZFS_TIME_ENCODE(&ip->i_atime, atime);
-	ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-	tmp_ctime = zpl_inode_get_ctime(ip);
-	ZFS_TIME_ENCODE(&tmp_ctime, ctime);
+	tmp_ts = zpl_inode_get_atime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, atime);
+	tmp_ts = zpl_inode_get_mtime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, mtime);
+	tmp_ts = zpl_inode_get_ctime(ip);
+	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	mode = ip->i_mode;
 
 	zp->z_mode = mode;
 
 	error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
 	mutex_exit(&zp->z_lock);
 
 	dmu_tx_commit(tx);
 out:
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*ARGSUSED*/
 void
 zfs_inactive(struct inode *ip)
 {
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint64_t atime[2];
 	int error;
 	int need_unlock = 0;
 
 	/* Only read lock if we haven't already write locked, e.g. rollback */
 	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
 		need_unlock = 1;
 		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
 	}
 	if (zp->z_sa_hdl == NULL) {
 		if (need_unlock)
 			rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		return;
 	}
 
 	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
-			ZFS_TIME_ENCODE(&ip->i_atime, atime);
+			inode_timespec_t tmp_atime;
+			tmp_atime = zpl_inode_get_atime(ip);
+			ZFS_TIME_ENCODE(&tmp_atime, atime);
 			mutex_enter(&zp->z_lock);
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
 			    (void *)&atime, sizeof (atime), tx);
 			zp->z_atime_dirty = B_FALSE;
 			mutex_exit(&zp->z_lock);
 			dmu_tx_commit(tx);
 		}
 	}
 
 	zfs_zinactive(zp);
 	if (need_unlock)
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 }
 
 /*
  * Fill pages with data from the disk.
  */
 static int
 zfs_fillpage(struct inode *ip, struct page *pp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	loff_t i_size = i_size_read(ip);
 	u_offset_t io_off = page_offset(pp);
 	size_t io_len = PAGE_SIZE;
 
 	ASSERT3U(io_off, <, i_size);
 
 	if (io_off + io_len > i_size)
 		io_len = i_size - io_off;
 
 	void *va = kmap(pp);
 	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
 	    io_len, va, DMU_READ_PREFETCH);
 	if (io_len != PAGE_SIZE)
 		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
 	kunmap(pp);
 
 	if (error) {
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
 			error = SET_ERROR(EIO);
 
 		SetPageError(pp);
 		ClearPageUptodate(pp);
 	} else {
 		ClearPageError(pp);
 		SetPageUptodate(pp);
 	}
 
 	return (error);
 }
 
 /*
  * Uses zfs_fillpage to read data from the file and fill the page.
  *
  *	IN:	ip	 - inode of file to get data from.
  *		pp	 - page to read
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  */
 /* ARGSUSED */
 int
 zfs_getpage(struct inode *ip, struct page *pp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	znode_t *zp = ITOZ(ip);
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	error = zfs_fillpage(ip, pp);
 	if (error == 0)
 		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 /*
  * Check ZFS specific permissions to memory map a section of a file.
  *
  *	IN:	ip	- inode of the file to mmap
  *		off	- file offset
  *		addrp	- start address in memory region
  *		len	- length of memory region
  *		vm_flags- address flags
  *
  *	RETURN:	0 if success
  *		error code if failure
  */
 /*ARGSUSED*/
 int
 zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
     unsigned long vm_flags)
 {
 	znode_t  *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
 	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((vm_flags & (VM_READ | VM_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EACCES));
 	}
 
 	if (off < 0 || len > MAXOFFSET_T - off) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENXIO));
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Free or allocate space in a file.  Currently, this function only
  * supports the `F_FREESP' command.  However, this command is somewhat
  * misnamed, as its functionality includes the ability to allocate as
  * well as free space.
  *
  *	IN:	zp	- znode of file to free data in.
  *		cmd	- action to take (only F_FREESP supported).
  *		bfp	- section of file to free/alloc.
  *		flag	- current file open mode flags.
  *		offset	- current file offset.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	zp - ctime|mtime updated
  */
 /* ARGSUSED */
 int
 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
     offset_t offset, cred_t *cr)
 {
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	uint64_t	off, len;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (cmd != F_FREESP) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Permissions aren't checked on Solaris because on this OS
 	 * zfs_space() can only be called with an opened file handle.
 	 * On Linux we can get here through truncate_range() which
 	 * operates directly on inodes, so we need to check access rights.
 	 */
 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	off = bfp->l_start;
 	len = bfp->l_len; /* 0 means from off to end of file */
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*ARGSUSED*/
 int
 zfs_fid(struct inode *ip, fid_t *fidp)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	uint32_t	gen;
 	uint64_t	gen64;
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
 	ZFS_ENTER(zfsvfs);
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOSPC));
 	}
 
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	gen = (uint32_t)gen64;
 
 	size = SHORT_FID_LEN;
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = size;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* Must have a non-zero generation number to distinguish from .zfs */
 	if (gen == 0)
 		gen = 1;
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_open);
 EXPORT_SYMBOL(zfs_close);
 EXPORT_SYMBOL(zfs_lookup);
 EXPORT_SYMBOL(zfs_create);
 EXPORT_SYMBOL(zfs_tmpfile);
 EXPORT_SYMBOL(zfs_remove);
 EXPORT_SYMBOL(zfs_mkdir);
 EXPORT_SYMBOL(zfs_rmdir);
 EXPORT_SYMBOL(zfs_readdir);
 EXPORT_SYMBOL(zfs_getattr_fast);
 EXPORT_SYMBOL(zfs_setattr);
 EXPORT_SYMBOL(zfs_rename);
 EXPORT_SYMBOL(zfs_symlink);
 EXPORT_SYMBOL(zfs_readlink);
 EXPORT_SYMBOL(zfs_link);
 EXPORT_SYMBOL(zfs_inactive);
 EXPORT_SYMBOL(zfs_space);
 EXPORT_SYMBOL(zfs_fid);
 EXPORT_SYMBOL(zfs_getpage);
 EXPORT_SYMBOL(zfs_putpage);
 EXPORT_SYMBOL(zfs_dirty_inode);
 EXPORT_SYMBOL(zfs_map);
 
 /* BEGIN CSTYLED */
 module_param(zfs_delete_blocks, ulong, 0644);
 MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
 /* END CSTYLED */
 
 #endif
diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c
index 41ccae5ff954..d1e64d0a5cbb 100644
--- a/module/os/linux/zfs/zfs_znode.c
+++ b/module/os/linux/zfs/zfs_znode.c
@@ -1,2268 +1,2275 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 
 #ifdef _KERNEL
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/mntent.h>
 #include <sys/u8_textprep.h>
 #include <sys/dsl_dataset.h>
 #include <sys/vfs.h>
 #include <sys/vnode.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/errno.h>
 #include <sys/atomic.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_rlock.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/dnode.h>
 #include <sys/fs/zfs.h>
 #include <sys/zpl.h>
 #endif /* _KERNEL */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/zfs_refcount.h>
 #include <sys/stat.h>
 #include <sys/zap.h>
 #include <sys/zfs_znode.h>
 #include <sys/sa.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_stat.h>
 
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
 /*
  * Functions needed for userland (ie: libzpool) are not put under
  * #ifdef_KERNEL; the rest of the functions have dependencies
  * (such as VFS logic) that will not compile easily in userland.
  */
 #ifdef _KERNEL
 
 static kmem_cache_t *znode_cache = NULL;
 static kmem_cache_t *znode_hold_cache = NULL;
 unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
 
 /*
  * This is used by the test suite so that it can delay znodes from being
  * freed in order to inspect the unlinked set.
  */
 int zfs_unlink_suspend_progress = 0;
 
 /*
  * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
  * z_rangelock. It will modify the offset and length of the lock to reflect
  * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
  * called with the rangelock_t's rl_lock held, which avoids races.
  */
 static void
 zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
 {
 	znode_t *zp = arg;
 
 	/*
 	 * If in append mode, convert to writer and lock starting at the
 	 * current end of file.
 	 */
 	if (new->lr_type == RL_APPEND) {
 		new->lr_offset = zp->z_size;
 		new->lr_type = RL_WRITER;
 	}
 
 	/*
 	 * If we need to grow the block size then lock the whole file range.
 	 */
 	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
 	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
 	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
 		new->lr_offset = 0;
 		new->lr_length = UINT64_MAX;
 	}
 }
 
 /*ARGSUSED*/
 static int
 zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
 {
 	znode_t *zp = buf;
 
 	inode_init_once(ZTOI(zp));
 	list_link_init(&zp->z_link_node);
 
 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
 	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
 
 	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
 
 	zp->z_dirlocks = NULL;
 	zp->z_acl_cached = NULL;
 	zp->z_xattr_cached = NULL;
 	zp->z_xattr_parent = 0;
 	zp->z_sync_writes_cnt = 0;
 	zp->z_async_writes_cnt = 0;
 
 	return (0);
 }
 
 /*ARGSUSED*/
 static void
 zfs_znode_cache_destructor(void *buf, void *arg)
 {
 	znode_t *zp = buf;
 
 	ASSERT(!list_link_active(&zp->z_link_node));
 	mutex_destroy(&zp->z_lock);
 	rw_destroy(&zp->z_parent_lock);
 	rw_destroy(&zp->z_name_lock);
 	mutex_destroy(&zp->z_acl_lock);
 	rw_destroy(&zp->z_xattr_lock);
 	zfs_rangelock_fini(&zp->z_rangelock);
 
 	ASSERT3P(zp->z_dirlocks, ==, NULL);
 	ASSERT3P(zp->z_acl_cached, ==, NULL);
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 
 	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
 	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
 }
 
 static int
 zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
 {
 	znode_hold_t *zh = buf;
 
 	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
 	zh->zh_refcount = 0;
 
 	return (0);
 }
 
 static void
 zfs_znode_hold_cache_destructor(void *buf, void *arg)
 {
 	znode_hold_t *zh = buf;
 
 	mutex_destroy(&zh->zh_lock);
 }
 
 void
 zfs_znode_init(void)
 {
 	/*
 	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
 	 * backed by kmalloc() when on the Linux slab in order that any
 	 * wait_on_bit() operations on the related inode operate properly.
 	 */
 	ASSERT(znode_cache == NULL);
 	znode_cache = kmem_cache_create("zfs_znode_cache",
 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
 	    zfs_znode_cache_destructor, NULL, NULL, NULL, KMC_SLAB);
 
 	ASSERT(znode_hold_cache == NULL);
 	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
 	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
 	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
 }
 
 void
 zfs_znode_fini(void)
 {
 	/*
 	 * Cleanup zcache
 	 */
 	if (znode_cache)
 		kmem_cache_destroy(znode_cache);
 	znode_cache = NULL;
 
 	if (znode_hold_cache)
 		kmem_cache_destroy(znode_hold_cache);
 	znode_hold_cache = NULL;
 }
 
 /*
  * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
  * serialize access to a znode and its SA buffer while the object is being
  * created or destroyed.  This kind of locking would normally reside in the
  * znode itself but in this case that's impossible because the znode and SA
  * buffer may not yet exist.  Therefore the locking is handled externally
  * with an array of mutexes and AVLs trees which contain per-object locks.
  *
  * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
  * in to the correct AVL tree and finally the per-object lock is held.  In
  * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
  * released, removed from the AVL tree and destroyed if there are no waiters.
  *
  * This scheme has two important properties:
  *
  * 1) No memory allocations are performed while holding one of the z_hold_locks.
  *    This ensures evict(), which can be called from direct memory reclaim, will
  *    never block waiting on a z_hold_locks which just happens to have hashed
  *    to the same index.
  *
  * 2) All locks used to serialize access to an object are per-object and never
  *    shared.  This minimizes lock contention without creating a large number
  *    of dedicated locks.
  *
  * On the downside it does require znode_lock_t structures to be frequently
  * allocated and freed.  However, because these are backed by a kmem cache
  * and very short lived this cost is minimal.
  */
 int
 zfs_znode_hold_compare(const void *a, const void *b)
 {
 	const znode_hold_t *zh_a = (const znode_hold_t *)a;
 	const znode_hold_t *zh_b = (const znode_hold_t *)b;
 
 	return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj));
 }
 
 static boolean_t __maybe_unused
 zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
 {
 	znode_hold_t *zh, search;
 	int i = ZFS_OBJ_HASH(zfsvfs, obj);
 	boolean_t held;
 
 	search.zh_obj = obj;
 
 	mutex_enter(&zfsvfs->z_hold_locks[i]);
 	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
 	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
 	mutex_exit(&zfsvfs->z_hold_locks[i]);
 
 	return (held);
 }
 
 static znode_hold_t *
 zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
 {
 	znode_hold_t *zh, *zh_new, search;
 	int i = ZFS_OBJ_HASH(zfsvfs, obj);
 	boolean_t found = B_FALSE;
 
 	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
 	search.zh_obj = obj;
 
 	mutex_enter(&zfsvfs->z_hold_locks[i]);
 	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
 	if (likely(zh == NULL)) {
 		zh = zh_new;
 		zh->zh_obj = obj;
 		avl_add(&zfsvfs->z_hold_trees[i], zh);
 	} else {
 		ASSERT3U(zh->zh_obj, ==, obj);
 		found = B_TRUE;
 	}
 	zh->zh_refcount++;
 	ASSERT3S(zh->zh_refcount, >, 0);
 	mutex_exit(&zfsvfs->z_hold_locks[i]);
 
 	if (found == B_TRUE)
 		kmem_cache_free(znode_hold_cache, zh_new);
 
 	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
 	mutex_enter(&zh->zh_lock);
 
 	return (zh);
 }
 
 static void
 zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
 {
 	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
 	boolean_t remove = B_FALSE;
 
 	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
 	mutex_exit(&zh->zh_lock);
 
 	mutex_enter(&zfsvfs->z_hold_locks[i]);
 	ASSERT3S(zh->zh_refcount, >, 0);
 	if (--zh->zh_refcount == 0) {
 		avl_remove(&zfsvfs->z_hold_trees[i], zh);
 		remove = B_TRUE;
 	}
 	mutex_exit(&zfsvfs->z_hold_locks[i]);
 
 	if (remove == B_TRUE)
 		kmem_cache_free(znode_hold_cache, zh);
 }
 
 dev_t
 zfs_cmpldev(uint64_t dev)
 {
 	return (dev);
 }
 
 static void
 zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
 {
 	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
 
 	mutex_enter(&zp->z_lock);
 
 	ASSERT(zp->z_sa_hdl == NULL);
 	ASSERT(zp->z_acl_cached == NULL);
 	if (sa_hdl == NULL) {
 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
 		    SA_HDL_SHARED, &zp->z_sa_hdl));
 	} else {
 		zp->z_sa_hdl = sa_hdl;
 		sa_set_userp(sa_hdl, zp);
 	}
 
 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
 
 	mutex_exit(&zp->z_lock);
 }
 
 void
 zfs_znode_dmu_fini(znode_t *zp)
 {
 	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || zp->z_unlinked ||
 	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
 
 	sa_handle_destroy(zp->z_sa_hdl);
 	zp->z_sa_hdl = NULL;
 }
 
 /*
  * Called by new_inode() to allocate a new inode.
  */
 int
 zfs_inode_alloc(struct super_block *sb, struct inode **ip)
 {
 	znode_t *zp;
 
 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 	*ip = ZTOI(zp);
 
 	return (0);
 }
 
 /*
  * Called in multiple places when an inode should be destroyed.
  */
 void
 zfs_inode_destroy(struct inode *ip)
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	if (list_link_active(&zp->z_link_node)) {
 		list_remove(&zfsvfs->z_all_znodes, zp);
 		zfsvfs->z_nr_znodes--;
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 
 	if (zp->z_xattr_cached) {
 		nvlist_free(zp->z_xattr_cached);
 		zp->z_xattr_cached = NULL;
 	}
 
 	kmem_cache_free(znode_cache, zp);
 }
 
 static void
 zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
 {
 	uint64_t rdev = 0;
 
 	switch (ip->i_mode & S_IFMT) {
 	case S_IFREG:
 		ip->i_op = &zpl_inode_operations;
 		ip->i_fop = &zpl_file_operations;
 		ip->i_mapping->a_ops = &zpl_address_space_operations;
 		break;
 
 	case S_IFDIR:
 		ip->i_op = &zpl_dir_inode_operations;
 		ip->i_fop = &zpl_dir_file_operations;
 		ITOZ(ip)->z_zn_prefetch = B_TRUE;
 		break;
 
 	case S_IFLNK:
 		ip->i_op = &zpl_symlink_inode_operations;
 		break;
 
 	/*
 	 * rdev is only stored in a SA only for device files.
 	 */
 	case S_IFCHR:
 	case S_IFBLK:
 		(void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
 		    sizeof (rdev));
 		fallthrough;
 	case S_IFIFO:
 	case S_IFSOCK:
 		init_special_inode(ip, ip->i_mode, rdev);
 		ip->i_op = &zpl_special_inode_operations;
 		break;
 
 	default:
 		zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
 		    (u_longlong_t)ip->i_ino, ip->i_mode);
 
 		/* Assume the inode is a file and attempt to continue */
 		ip->i_mode = S_IFREG | 0644;
 		ip->i_op = &zpl_inode_operations;
 		ip->i_fop = &zpl_file_operations;
 		ip->i_mapping->a_ops = &zpl_address_space_operations;
 		break;
 	}
 }
 
 static void
 zfs_set_inode_flags(znode_t *zp, struct inode *ip)
 {
 	/*
 	 * Linux and Solaris have different sets of file attributes, so we
 	 * restrict this conversion to the intersection of the two.
 	 */
 #ifdef HAVE_INODE_SET_FLAGS
 	unsigned int flags = 0;
 	if (zp->z_pflags & ZFS_IMMUTABLE)
 		flags |= S_IMMUTABLE;
 	if (zp->z_pflags & ZFS_APPENDONLY)
 		flags |= S_APPEND;
 
 	inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
 #else
 	if (zp->z_pflags & ZFS_IMMUTABLE)
 		ip->i_flags |= S_IMMUTABLE;
 	else
 		ip->i_flags &= ~S_IMMUTABLE;
 
 	if (zp->z_pflags & ZFS_APPENDONLY)
 		ip->i_flags |= S_APPEND;
 	else
 		ip->i_flags &= ~S_APPEND;
 #endif
 }
 
 /*
  * Update the embedded inode given the znode.
  */
 void
 zfs_znode_update_vfs(znode_t *zp)
 {
 	zfsvfs_t	*zfsvfs;
 	struct inode	*ip;
 	uint32_t	blksize;
 	u_longlong_t	i_blocks;
 
 	ASSERT(zp != NULL);
 	zfsvfs = ZTOZSB(zp);
 	ip = ZTOI(zp);
 
 	/* Skip .zfs control nodes which do not exist on disk. */
 	if (zfsctl_is_node(ip))
 		return;
 
 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
 
 	spin_lock(&ip->i_lock);
 	ip->i_mode = zp->z_mode;
 	ip->i_blocks = i_blocks;
 	i_size_write(ip, zp->z_size);
 	spin_unlock(&ip->i_lock);
 }
 
 
 /*
  * Construct a znode+inode and initialize.
  *
  * This does not do a call to dmu_set_user() that is
  * up to the caller to do, in case you don't want to
  * return the znode
  */
 static znode_t *
 zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
     dmu_object_type_t obj_type, sa_handle_t *hdl)
 {
 	znode_t	*zp;
 	struct inode *ip;
 	uint64_t mode;
 	uint64_t parent;
 	uint64_t tmp_gen;
 	uint64_t links;
 	uint64_t z_uid, z_gid;
 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
-	inode_timespec_t tmp_ctime;
+	inode_timespec_t tmp_ts;
 	uint64_t projid = ZFS_DEFAULT_PROJID;
 	sa_bulk_attr_t bulk[12];
 	int count = 0;
 
 	ASSERT(zfsvfs != NULL);
 
 	ip = new_inode(zfsvfs->z_sb);
 	if (ip == NULL)
 		return (NULL);
 
 	zp = ITOZ(ip);
 	ASSERT(zp->z_dirlocks == NULL);
 	ASSERT3P(zp->z_acl_cached, ==, NULL);
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_unlinked = B_FALSE;
 	zp->z_atime_dirty = B_FALSE;
 #if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
 	zp->z_is_mapped = B_FALSE;
 #endif
 	zp->z_is_ctldir = B_FALSE;
 	zp->z_suspended = B_FALSE;
 	zp->z_sa_hdl = NULL;
 	zp->z_mapcnt = 0;
 	zp->z_id = db->db_object;
 	zp->z_blksz = blksz;
 	zp->z_seq = 0x7A4653;
 	zp->z_sync_cnt = 0;
 	zp->z_sync_writes_cnt = 0;
 	zp->z_async_writes_cnt = 0;
 
 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
 	    &parent, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
 
 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
 	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
 	    (zp->z_pflags & ZFS_PROJID) &&
 	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
 		if (hdl == NULL)
 			sa_handle_destroy(zp->z_sa_hdl);
 		zp->z_sa_hdl = NULL;
 		goto error;
 	}
 
 	zp->z_projid = projid;
 	zp->z_mode = ip->i_mode = mode;
 	ip->i_generation = (uint32_t)tmp_gen;
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
 	set_nlink(ip, (uint32_t)links);
 	zfs_uid_write(ip, z_uid);
 	zfs_gid_write(ip, z_gid);
 	zfs_set_inode_flags(zp, ip);
 
 	/* Cache the xattr parent id */
 	if (zp->z_pflags & ZFS_XATTR)
 		zp->z_xattr_parent = parent;
 
-	ZFS_TIME_DECODE(&ip->i_atime, atime);
-	ZFS_TIME_DECODE(&ip->i_mtime, mtime);
-	ZFS_TIME_DECODE(&tmp_ctime, ctime);
-	zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
+	ZFS_TIME_DECODE(&tmp_ts, atime);
+	zpl_inode_set_atime_to_ts(ip, tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, mtime);
+	zpl_inode_set_mtime_to_ts(ip, tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, ctime);
+	zpl_inode_set_ctime_to_ts(ip, tmp_ts);
 	ZFS_TIME_DECODE(&zp->z_btime, btime);
 
 	ip->i_ino = zp->z_id;
 	zfs_znode_update_vfs(zp);
 	zfs_inode_set_ops(zfsvfs, ip);
 
 	/*
 	 * The only way insert_inode_locked() can fail is if the ip->i_ino
 	 * number is already hashed for this super block.  This can never
 	 * happen because the inode numbers map 1:1 with the object numbers.
 	 *
 	 * Exceptions include rolling back a mounted file system, either
 	 * from the zfs rollback or zfs recv command.
 	 *
 	 * Active inodes are unhashed during the rollback, but since zrele
 	 * can happen asynchronously, we can't guarantee they've been
 	 * unhashed.  This can cause hash collisions in unlinked drain
 	 * processing so do not hash unlinked znodes.
 	 */
 	if (links > 0)
 		VERIFY3S(insert_inode_locked(ip), ==, 0);
 
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
 	zfsvfs->z_nr_znodes++;
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	if (links > 0)
 		unlock_new_inode(ip);
 	return (zp);
 
 error:
 	iput(ip);
 	return (NULL);
 }
 
 /*
  * Safely mark an inode dirty.  Inodes which are part of a read-only
  * file system or snapshot may not be dirtied.
  */
 void
 zfs_mark_inode_dirty(struct inode *ip)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 
 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
 		return;
 
 	mark_inode_dirty(ip);
 }
 
 static uint64_t empty_xattr;
 static uint64_t pad[4];
 static zfs_acl_phys_t acl_phys;
 /*
  * Create a new DMU object to hold a zfs znode.
  *
  *	IN:	dzp	- parent directory for new znode
  *		vap	- file attributes for new znode
  *		tx	- dmu transaction id for zap operations
  *		cr	- credentials of caller
  *		flag	- flags:
  *			  IS_ROOT_NODE	- new object will be root
  *			  IS_TMPFILE	- new object is of O_TMPFILE
  *			  IS_XATTR	- new object is an attribute
  *		acl_ids	- ACL related attributes
  *
  *	OUT:	zpp	- allocated znode (set to dzp if IS_ROOT_NODE)
  *
  */
 void
 zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
 {
 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
 	uint64_t	mode, size, links, parent, pflags;
 	uint64_t	projid = ZFS_DEFAULT_PROJID;
 	uint64_t	rdev = 0;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	dmu_buf_t	*db;
 	inode_timespec_t now;
 	uint64_t	gen, obj;
 	int		bonuslen;
 	int		dnodesize;
 	sa_handle_t	*sa_hdl;
 	dmu_object_type_t obj_type;
 	sa_bulk_attr_t	*sa_attrs;
 	int		cnt = 0;
 	zfs_acl_locator_cb_t locate = { 0 };
 	znode_hold_t	*zh;
 
 	if (zfsvfs->z_replay) {
 		obj = vap->va_nodeid;
 		now = vap->va_ctime;		/* see zfs_replay_create() */
 		gen = vap->va_nblocks;		/* ditto */
 		dnodesize = vap->va_fsid;	/* ditto */
 	} else {
 		obj = 0;
 		gethrestime(&now);
 		gen = dmu_tx_get_txg(tx);
 		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
 	}
 
 	if (dnodesize == 0)
 		dnodesize = DNODE_MIN_SIZE;
 
 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
 
 	bonuslen = (obj_type == DMU_OT_SA) ?
 	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
 
 	/*
 	 * Create a new DMU object.
 	 */
 	/*
 	 * There's currently no mechanism for pre-reading the blocks that will
 	 * be needed to allocate a new object, so we accept the small chance
 	 * that there will be an i/o error and we will fail one of the
 	 * assertions below.
 	 */
 	if (S_ISDIR(vap->va_mode)) {
 		if (zfsvfs->z_replay) {
 			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    obj_type, bonuslen, dnodesize, tx));
 		} else {
 			obj = zap_create_norm_dnsize(zfsvfs->z_os,
 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
 			    obj_type, bonuslen, dnodesize, tx);
 		}
 	} else {
 		if (zfsvfs->z_replay) {
 			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    obj_type, bonuslen, dnodesize, tx));
 		} else {
 			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
 			    obj_type, bonuslen, dnodesize, tx);
 		}
 	}
 
 	zh = zfs_znode_hold_enter(zfsvfs, obj);
 	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
 
 	/*
 	 * If this is the root, fix up the half-initialized parent pointer
 	 * to reference the just-allocated physical data area.
 	 */
 	if (flag & IS_ROOT_NODE) {
 		dzp->z_id = obj;
 	}
 
 	/*
 	 * If parent is an xattr, so am I.
 	 */
 	if (dzp->z_pflags & ZFS_XATTR) {
 		flag |= IS_XATTR;
 	}
 
 	if (zfsvfs->z_use_fuids)
 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
 	else
 		pflags = 0;
 
 	if (S_ISDIR(vap->va_mode)) {
 		size = 2;		/* contents ("." and "..") */
 		links = 2;
 	} else {
 		size = 0;
 		links = (flag & IS_TMPFILE) ? 0 : 1;
 	}
 
 	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
 		rdev = vap->va_rdev;
 
 	parent = dzp->z_id;
 	mode = acl_ids->z_mode;
 	if (flag & IS_XATTR)
 		pflags |= ZFS_XATTR;
 
 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
 		/*
 		 * With ZFS_PROJID flag, we can easily know whether there is
 		 * project ID stored on disk or not. See zfs_space_delta_cb().
 		 */
 		if (obj_type != DMU_OT_ZNODE &&
 		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
 			pflags |= ZFS_PROJID;
 
 		/*
 		 * Inherit project ID from parent if required.
 		 */
 		projid = zfs_inherit_projid(dzp);
 		if (dzp->z_pflags & ZFS_PROJINHERIT)
 			pflags |= ZFS_PROJINHERIT;
 	}
 
 	/*
 	 * No execs denied will be determined when zfs_mode_compute() is called.
 	 */
 	pflags |= acl_ids->z_aclp->z_hints &
 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
 
 	ZFS_TIME_ENCODE(&now, crtime);
 	ZFS_TIME_ENCODE(&now, ctime);
 
 	if (vap->va_mask & ATTR_ATIME) {
 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
 	} else {
 		ZFS_TIME_ENCODE(&now, atime);
 	}
 
 	if (vap->va_mask & ATTR_MTIME) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 	} else {
 		ZFS_TIME_ENCODE(&now, mtime);
 	}
 
 	/* Now add in all of the "SA" attributes */
 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
 	    &sa_hdl));
 
 	/*
 	 * Setup the array of attributes to be replaced/set on the new file
 	 *
 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
 	 * in the old znode_phys_t format.  Don't change this ordering
 	 */
 	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
 
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
 		    NULL, &atime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
 		    NULL, &mtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
 		    NULL, &ctime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
 		    NULL, &crtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
 		    NULL, &gen, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
 		    NULL, &mode, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
 		    NULL, &size, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
 		    NULL, &parent, 8);
 	} else {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
 		    NULL, &mode, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
 		    NULL, &size, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
 		    NULL, &gen, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
 		    NULL, &acl_ids->z_fuid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
 		    NULL, &acl_ids->z_fgid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
 		    NULL, &parent, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &pflags, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
 		    NULL, &atime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
 		    NULL, &mtime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
 		    NULL, &ctime, 16);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
 		    NULL, &crtime, 16);
 	}
 
 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
 
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
 		    &empty_xattr, 8);
 	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
 	    pflags & ZFS_PROJID) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
 		    NULL, &projid, 8);
 	}
 	if (obj_type == DMU_OT_ZNODE ||
 	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
 		    NULL, &rdev, 8);
 	}
 	if (obj_type == DMU_OT_ZNODE) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &pflags, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
 		    &acl_ids->z_fuid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
 		    &acl_ids->z_fgid, 8);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
 		    sizeof (uint64_t) * 4);
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
 		    &acl_phys, sizeof (zfs_acl_phys_t));
 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
 		    &acl_ids->z_aclp->z_acl_count, 8);
 		locate.cb_aclp = acl_ids->z_aclp;
 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
 		    zfs_acl_data_locator, &locate,
 		    acl_ids->z_aclp->z_acl_bytes);
 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
 		    acl_ids->z_fuid, acl_ids->z_fgid);
 	}
 
 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
 
 	if (!(flag & IS_ROOT_NODE)) {
 		/*
 		 * The call to zfs_znode_alloc() may fail if memory is low
 		 * via the call path: alloc_inode() -> inode_init_always() ->
 		 * security_inode_alloc() -> inode_alloc_security().  Since
 		 * the existing code is written such that zfs_mknode() can
 		 * not fail retry until sufficient memory has been reclaimed.
 		 */
 		do {
 			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
 		} while (*zpp == NULL);
 
 		VERIFY(*zpp != NULL);
 		VERIFY(dzp != NULL);
 	} else {
 		/*
 		 * If we are creating the root node, the "parent" we
 		 * passed in is the znode for the root.
 		 */
 		*zpp = dzp;
 
 		(*zpp)->z_sa_hdl = sa_hdl;
 	}
 
 	(*zpp)->z_pflags = pflags;
 	(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
 	(*zpp)->z_dnodesize = dnodesize;
 	(*zpp)->z_projid = projid;
 
 	if (obj_type == DMU_OT_ZNODE ||
 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
 		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
 	}
 	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
 	zfs_znode_hold_exit(zfsvfs, zh);
 }
 
 /*
  * Update in-core attributes.  It is assumed the caller will be doing an
  * sa_bulk_update to push the changes out.
  */
 void
 zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
 {
 	xoptattr_t *xoap;
 	boolean_t update_inode = B_FALSE;
 
 	xoap = xva_getxoptattr(xvap);
 	ASSERT(xoap);
 
 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
 		uint64_t times[2];
 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
 		    &times, sizeof (times), tx);
 		XVA_SET_RTN(xvap, XAT_CREATETIME);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_READONLY);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_HIDDEN);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_SYSTEM);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 
 		update_inode = B_TRUE;
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
 
 		update_inode = B_TRUE;
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_NODUMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_OPAQUE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
 		zfs_sa_set_scanstamp(zp, xvap, tx);
 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_REPARSE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_OFFLINE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_SPARSE);
 	}
 	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
 		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
 		    zp->z_pflags, tx);
 		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
 	}
 
 	if (update_inode)
 		zfs_set_inode_flags(zp, ZTOI(zp));
 }
 
 int
 zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
 {
 	dmu_object_info_t doi;
 	dmu_buf_t	*db;
 	znode_t		*zp;
 	znode_hold_t	*zh;
 	int err;
 	sa_handle_t	*hdl;
 
 	*zpp = NULL;
 
 again:
 	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
 
 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	if (doi.doi_bonus_type != DMU_OT_SA &&
 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (SET_ERROR(EINVAL));
 	}
 
 	hdl = dmu_buf_get_user(db);
 	if (hdl != NULL) {
 		zp = sa_get_userdata(hdl);
 
 
 		/*
 		 * Since "SA" does immediate eviction we
 		 * should never find a sa handle that doesn't
 		 * know about the znode.
 		 */
 
 		ASSERT3P(zp, !=, NULL);
 
 		mutex_enter(&zp->z_lock);
 		ASSERT3U(zp->z_id, ==, obj_num);
 		/*
 		 * If zp->z_unlinked is set, the znode is already marked
 		 * for deletion and should not be discovered. Check this
 		 * after checking igrab() due to fsetxattr() & O_TMPFILE.
 		 *
 		 * If igrab() returns NULL the VFS has independently
 		 * determined the inode should be evicted and has
 		 * called iput_final() to start the eviction process.
 		 * The SA handle is still valid but because the VFS
 		 * requires that the eviction succeed we must drop
 		 * our locks and references to allow the eviction to
 		 * complete.  The zfs_zget() may then be retried.
 		 *
 		 * This unlikely case could be optimized by registering
 		 * a sops->drop_inode() callback.  The callback would
 		 * need to detect the active SA hold thereby informing
 		 * the VFS that this inode should not be evicted.
 		 */
 		if (igrab(ZTOI(zp)) == NULL) {
 			if (zp->z_unlinked)
 				err = SET_ERROR(ENOENT);
 			else
 				err = SET_ERROR(EAGAIN);
 		} else {
 			*zpp = zp;
 			err = 0;
 		}
 
 		mutex_exit(&zp->z_lock);
 		sa_buf_rele(db, NULL);
 		zfs_znode_hold_exit(zfsvfs, zh);
 
 		if (err == EAGAIN) {
 			/* inode might need this to finish evict */
 			cond_resched();
 			goto again;
 		}
 		return (err);
 	}
 
 	/*
 	 * Not found create new znode/vnode but only if file exists.
 	 *
 	 * There is a small window where zfs_vget() could
 	 * find this object while a file create is still in
 	 * progress.  This is checked for in zfs_znode_alloc()
 	 *
 	 * if zfs_znode_alloc() fails it will drop the hold on the
 	 * bonus buffer.
 	 */
 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
 	    doi.doi_bonus_type, NULL);
 	if (zp == NULL) {
 		err = SET_ERROR(ENOENT);
 	} else {
 		*zpp = zp;
 	}
 	zfs_znode_hold_exit(zfsvfs, zh);
 	return (err);
 }
 
 int
 zfs_rezget(znode_t *zp)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	uint64_t obj_num = zp->z_id;
 	uint64_t mode;
 	uint64_t links;
 	sa_bulk_attr_t bulk[11];
 	int err;
 	int count = 0;
 	uint64_t gen;
 	uint64_t z_uid, z_gid;
 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
-	inode_timespec_t tmp_ctime;
+	inode_timespec_t tmp_ts;
 	uint64_t projid = ZFS_DEFAULT_PROJID;
 	znode_hold_t *zh;
 
 	/*
 	 * skip ctldir, otherwise they will always get invalidated. This will
 	 * cause funny behaviour for the mounted snapdirs. Especially for
 	 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
 	 * anyone automount it again as long as someone is still using the
 	 * detached mount.
 	 */
 	if (zp->z_is_ctldir)
 		return (0);
 
 	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
 
 	mutex_enter(&zp->z_acl_lock);
 	if (zp->z_acl_cached) {
 		zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = NULL;
 	}
 	mutex_exit(&zp->z_acl_lock);
 
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 	if (zp->z_xattr_cached) {
 		nvlist_free(zp->z_xattr_cached);
 		zp->z_xattr_cached = NULL;
 	}
 	rw_exit(&zp->z_xattr_lock);
 
 	ASSERT(zp->z_sa_hdl == NULL);
 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
 	if (err) {
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (err);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	if (doi.doi_bonus_type != DMU_OT_SA &&
 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
 		sa_buf_rele(db, NULL);
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (SET_ERROR(EINVAL));
 	}
 
 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
 
 	/* reload cached values */
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
 	    &gen, sizeof (gen));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, sizeof (zp->z_size));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
 	    &links, sizeof (links));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 	    &z_uid, sizeof (z_uid));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
 	    &z_gid, sizeof (z_gid));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 	    &mode, sizeof (mode));
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 	    &atime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 	    &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 	    &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
 
 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
 		zfs_znode_dmu_fini(zp);
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (SET_ERROR(EIO));
 	}
 
 	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
 		    &projid, 8);
 		if (err != 0 && err != ENOENT) {
 			zfs_znode_dmu_fini(zp);
 			zfs_znode_hold_exit(zfsvfs, zh);
 			return (SET_ERROR(err));
 		}
 	}
 
 	zp->z_projid = projid;
 	zp->z_mode = ZTOI(zp)->i_mode = mode;
 	zfs_uid_write(ZTOI(zp), z_uid);
 	zfs_gid_write(ZTOI(zp), z_gid);
 
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
-	ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
-	ZFS_TIME_DECODE(&tmp_ctime, ctime);
-	zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
+	ZFS_TIME_DECODE(&tmp_ts, atime);
+	zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, mtime);
+	zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
+	ZFS_TIME_DECODE(&tmp_ts, ctime);
+	zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
 	ZFS_TIME_DECODE(&zp->z_btime, btime);
 
 	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
 		zfs_znode_dmu_fini(zp);
 		zfs_znode_hold_exit(zfsvfs, zh);
 		return (SET_ERROR(EIO));
 	}
 
 	set_nlink(ZTOI(zp), (uint32_t)links);
 	zfs_set_inode_flags(zp, ZTOI(zp));
 
 	zp->z_blksz = doi.doi_data_block_size;
 	zp->z_atime_dirty = B_FALSE;
 	zfs_znode_update_vfs(zp);
 
 	/*
 	 * If the file has zero links, then it has been unlinked on the send
 	 * side and it must be in the received unlinked set.
 	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
 	 * stale data and to prevent automatic removal of the file in
 	 * zfs_zinactive().  The file will be removed either when it is removed
 	 * on the send side and the next incremental stream is received or
 	 * when the unlinked set gets processed.
 	 */
 	zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
 	if (zp->z_unlinked)
 		zfs_znode_dmu_fini(zp);
 
 	zfs_znode_hold_exit(zfsvfs, zh);
 
 	return (0);
 }
 
 void
 zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	objset_t *os = zfsvfs->z_os;
 	uint64_t obj = zp->z_id;
 	uint64_t acl_obj = zfs_external_acl(zp);
 	znode_hold_t *zh;
 
 	zh = zfs_znode_hold_enter(zfsvfs, obj);
 	if (acl_obj) {
 		VERIFY(!zp->z_is_sa);
 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
 	}
 	VERIFY(0 == dmu_object_free(os, obj, tx));
 	zfs_znode_dmu_fini(zp);
 	zfs_znode_hold_exit(zfsvfs, zh);
 }
 
 void
 zfs_zinactive(znode_t *zp)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	uint64_t z_id = zp->z_id;
 	znode_hold_t *zh;
 
 	ASSERT(zp->z_sa_hdl);
 
 	/*
 	 * Don't allow a zfs_zget() while were trying to release this znode.
 	 */
 	zh = zfs_znode_hold_enter(zfsvfs, z_id);
 
 	mutex_enter(&zp->z_lock);
 
 	/*
 	 * If this was the last reference to a file with no links, remove
 	 * the file from the file system unless the file system is mounted
 	 * read-only.  That can happen, for example, if the file system was
 	 * originally read-write, the file was opened, then unlinked and
 	 * the file system was made read-only before the file was finally
 	 * closed.  The file will remain in the unlinked set.
 	 */
 	if (zp->z_unlinked) {
 		ASSERT(!zfsvfs->z_issnap);
 		if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
 			mutex_exit(&zp->z_lock);
 			zfs_znode_hold_exit(zfsvfs, zh);
 			zfs_rmnode(zp);
 			return;
 		}
 	}
 
 	mutex_exit(&zp->z_lock);
 	zfs_znode_dmu_fini(zp);
 
 	zfs_znode_hold_exit(zfsvfs, zh);
 }
 
 #if defined(HAVE_INODE_TIMESPEC64_TIMES)
 #define	zfs_compare_timespec timespec64_compare
 #else
 #define	zfs_compare_timespec timespec_compare
 #endif
 
 /*
  * Determine whether the znode's atime must be updated.  The logic mostly
  * duplicates the Linux kernel's relatime_need_update() functionality.
  * This function is only called if the underlying filesystem actually has
  * atime updates enabled.
  */
 boolean_t
 zfs_relatime_need_update(const struct inode *ip)
 {
-	inode_timespec_t now, tmp_ctime;
+	inode_timespec_t now, tmp_atime, tmp_ts;
 
 	gethrestime(&now);
+	tmp_atime = zpl_inode_get_atime(ip);
 	/*
 	 * In relatime mode, only update the atime if the previous atime
 	 * is earlier than either the ctime or mtime or if at least a day
 	 * has passed since the last update of atime.
 	 */
-	if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
+	tmp_ts = zpl_inode_get_mtime(ip);
+	if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
 		return (B_TRUE);
 
-	tmp_ctime = zpl_inode_get_ctime(ip);
-	if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
+	tmp_ts = zpl_inode_get_ctime(ip);
+	if (zfs_compare_timespec(&tmp_ts, &tmp_atime) >= 0)
 		return (B_TRUE);
 
-	if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
+	if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 /*
  * Prepare to update znode time stamps.
  *
  *	IN:	zp	- znode requiring timestamp update
  *		flag	- ATTR_MTIME, ATTR_CTIME flags
  *
  *	OUT:	zp	- z_seq
  *		mtime	- new mtime
  *		ctime	- new ctime
  *
  *	Note: We don't update atime here, because we rely on Linux VFS to do
  *	atime updating.
  */
 void
 zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
     uint64_t ctime[2])
 {
-	inode_timespec_t now, tmp_ctime;
+	inode_timespec_t now, tmp_ts;
 
 	gethrestime(&now);
 
 	zp->z_seq++;
 
 	if (flag & ATTR_MTIME) {
 		ZFS_TIME_ENCODE(&now, mtime);
-		ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
+		ZFS_TIME_DECODE(&tmp_ts, mtime);
+		zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
 		if (ZTOZSB(zp)->z_use_fuids) {
 			zp->z_pflags |= (ZFS_ARCHIVE |
 			    ZFS_AV_MODIFIED);
 		}
 	}
 
 	if (flag & ATTR_CTIME) {
 		ZFS_TIME_ENCODE(&now, ctime);
-		ZFS_TIME_DECODE(&tmp_ctime, ctime);
-		zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
+		ZFS_TIME_DECODE(&tmp_ts, ctime);
+		zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
 		if (ZTOZSB(zp)->z_use_fuids)
 			zp->z_pflags |= ZFS_ARCHIVE;
 	}
 }
 
 /*
  * Grow the block size for a file.
  *
  *	IN:	zp	- znode of file to free data in.
  *		size	- requested block size
  *		tx	- open transaction.
  *
  * NOTE: this function assumes that the znode is write locked.
  */
 void
 zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
 {
 	int		error;
 	u_longlong_t	dummy;
 
 	if (size <= zp->z_blksz)
 		return;
 	/*
 	 * If the file size is already greater than the current blocksize,
 	 * we will not grow.  If there is more than one block in a file,
 	 * the blocksize cannot change.
 	 */
 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
 		return;
 
 	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
 	    size, 0, tx);
 
 	if (error == ENOTSUP)
 		return;
 	ASSERT0(error);
 
 	/* What blocksize did we actually get? */
 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
 }
 
 /*
  * Increase the file length
  *
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_extend(znode_t *zp, uint64_t end)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	dmu_tx_t *tx;
 	zfs_locked_range_t *lr;
 	uint64_t newblksz;
 	int error;
 
 	/*
 	 * We will change zp_size, lock the whole file.
 	 */
 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (end <= zp->z_size) {
 		zfs_rangelock_exit(lr);
 		return (0);
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	if (end > zp->z_blksz &&
 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
 		/*
 		 * We are growing the file past the current block size.
 		 */
 		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
 			/*
 			 * File's blocksize is already larger than the
 			 * "recordsize" property.  Only let it grow to
 			 * the next power of 2.
 			 */
 			ASSERT(!ISP2(zp->z_blksz));
 			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
 		} else {
 			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
 		}
 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
 	} else {
 		newblksz = 0;
 	}
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		zfs_rangelock_exit(lr);
 		return (error);
 	}
 
 	if (newblksz)
 		zfs_grow_blocksize(zp, newblksz, tx);
 
 	zp->z_size = end;
 
 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
 	    &zp->z_size, sizeof (zp->z_size), tx));
 
 	zfs_rangelock_exit(lr);
 
 	dmu_tx_commit(tx);
 
 	return (0);
 }
 
 /*
  * zfs_zero_partial_page - Modeled after update_pages() but
  * with different arguments and semantics for use by zfs_freesp().
  *
  * Zeroes a piece of a single page cache entry for zp at offset
  * start and length len.
  *
  * Caller must acquire a range lock on the file for the region
  * being zeroed in order that the ARC and page cache stay in sync.
  */
 static void
 zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
 {
 	struct address_space *mp = ZTOI(zp)->i_mapping;
 	struct page *pp;
 	int64_t	off;
 	void *pb;
 
 	ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
 
 	off = start & (PAGE_SIZE - 1);
 	start &= PAGE_MASK;
 
 	pp = find_lock_page(mp, start >> PAGE_SHIFT);
 	if (pp) {
 		if (mapping_writably_mapped(mp))
 			flush_dcache_page(pp);
 
 		pb = kmap(pp);
 		bzero(pb + off, len);
 		kunmap(pp);
 
 		if (mapping_writably_mapped(mp))
 			flush_dcache_page(pp);
 
 		mark_page_accessed(pp);
 		SetPageUptodate(pp);
 		ClearPageError(pp);
 		unlock_page(pp);
 		put_page(pp);
 	}
 }
 
 /*
  * Free space in a file.
  *
  *	IN:	zp	- znode of file to free data in.
  *		off	- start of section to free.
  *		len	- length of section to free.
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	zfs_locked_range_t *lr;
 	int error;
 
 	/*
 	 * Lock the range being freed.
 	 */
 	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (off >= zp->z_size) {
 		zfs_rangelock_exit(lr);
 		return (0);
 	}
 
 	if (off + len > zp->z_size)
 		len = zp->z_size - off;
 
 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
 
 	/*
 	 * Zero partial page cache entries.  This must be done under a
 	 * range lock in order to keep the ARC and page cache in sync.
 	 */
 	if (zn_has_cached_data(zp, off, off + len - 1)) {
 		loff_t first_page, last_page, page_len;
 		loff_t first_page_offset, last_page_offset;
 
 		/* first possible full page in hole */
 		first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		/* last page of hole */
 		last_page = (off + len) >> PAGE_SHIFT;
 
 		/* offset of first_page */
 		first_page_offset = first_page << PAGE_SHIFT;
 		/* offset of last_page */
 		last_page_offset = last_page << PAGE_SHIFT;
 
 		/* truncate whole pages */
 		if (last_page_offset > first_page_offset) {
 			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
 			    first_page_offset, last_page_offset - 1);
 		}
 
 		/* truncate sub-page ranges */
 		if (first_page > last_page) {
 			/* entire punched area within a single page */
 			zfs_zero_partial_page(zp, off, len);
 		} else {
 			/* beginning of punched area at the end of a page */
 			page_len  = first_page_offset - off;
 			if (page_len > 0)
 				zfs_zero_partial_page(zp, off, page_len);
 
 			/* end of punched area at the beginning of a page */
 			page_len = off + len - last_page_offset;
 			if (page_len > 0)
 				zfs_zero_partial_page(zp, last_page_offset,
 				    page_len);
 		}
 	}
 	zfs_rangelock_exit(lr);
 
 	return (error);
 }
 
 /*
  * Truncate a file
  *
  *	IN:	zp	- znode of file to free data in.
  *		end	- new end-of-file.
  *
  *	RETURN:	0 on success, error code on failure
  */
 static int
 zfs_trunc(znode_t *zp, uint64_t end)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	dmu_tx_t *tx;
 	zfs_locked_range_t *lr;
 	int error;
 	sa_bulk_attr_t bulk[2];
 	int count = 0;
 
 	/*
 	 * We will change zp_size, lock the whole file.
 	 */
 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
 
 	/*
 	 * Nothing to do if file already at desired length.
 	 */
 	if (end >= zp->z_size) {
 		zfs_rangelock_exit(lr);
 		return (0);
 	}
 
 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
 	    DMU_OBJECT_END);
 	if (error) {
 		zfs_rangelock_exit(lr);
 		return (error);
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		zfs_rangelock_exit(lr);
 		return (error);
 	}
 
 	zp->z_size = end;
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
 	    NULL, &zp->z_size, sizeof (zp->z_size));
 
 	if (end == 0) {
 		zp->z_pflags &= ~ZFS_SPARSE;
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 		    NULL, &zp->z_pflags, 8);
 	}
 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
 
 	dmu_tx_commit(tx);
 	zfs_rangelock_exit(lr);
 
 	return (0);
 }
 
 /*
  * Free space in a file
  *
  *	IN:	zp	- znode of file to free data in.
  *		off	- start of range
  *		len	- end of range (0 => EOF)
  *		flag	- current file open mode flags.
  *		log	- TRUE if this action should be logged
  *
  *	RETURN:	0 on success, error code on failure
  */
 int
 zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
 {
 	dmu_tx_t *tx;
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	zilog_t *zilog = zfsvfs->z_log;
 	uint64_t mode;
 	uint64_t mtime[2], ctime[2];
 	sa_bulk_attr_t bulk[3];
 	int count = 0;
 	int error;
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
 	    sizeof (mode))) != 0)
 		return (error);
 
 	if (off > zp->z_size) {
 		error =  zfs_extend(zp, off+len);
 		if (error == 0 && log)
 			goto log;
 		goto out;
 	}
 
 	if (len == 0) {
 		error = zfs_trunc(zp, off);
 	} else {
 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
 		    off + len > zp->z_size)
 			error = zfs_extend(zp, off+len);
 	}
 	if (error || !log)
 		goto out;
 log:
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		goto out;
 	}
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 	    NULL, &zp->z_pflags, 8);
 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 	ASSERT(error == 0);
 
 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
 
 	dmu_tx_commit(tx);
 
 	zfs_znode_update_vfs(zp);
 	error = 0;
 
 out:
 	/*
 	 * Truncate the page cache - for file truncate operations, use
 	 * the purpose-built API for truncations.  For punching operations,
 	 * the truncation is handled under a range lock in zfs_free_range.
 	 */
 	if (len == 0)
 		truncate_setsize(ZTOI(zp), off);
 	return (error);
 }
 
 void
 zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
 {
 	struct super_block *sb;
 	zfsvfs_t	*zfsvfs;
 	uint64_t	moid, obj, sa_obj, version;
 	uint64_t	sense = ZFS_CASE_SENSITIVE;
 	uint64_t	norm = 0;
 	nvpair_t	*elem;
 	int		size;
 	int		error;
 	int		i;
 	znode_t		*rootzp = NULL;
 	vattr_t		vattr;
 	znode_t		*zp;
 	zfs_acl_ids_t	acl_ids;
 
 	/*
 	 * First attempt to create master node.
 	 */
 	/*
 	 * In an empty objset, there are no blocks to read and thus
 	 * there can be no i/o errors (which we assert below).
 	 */
 	moid = MASTER_NODE_OBJ;
 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
 	    DMU_OT_NONE, 0, tx);
 	ASSERT(error == 0);
 
 	/*
 	 * Set starting attributes.
 	 */
 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
 		/* For the moment we expect all zpl props to be uint64_ts */
 		uint64_t val;
 		char *name;
 
 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
 		name = nvpair_name(elem);
 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
 			if (val < version)
 				version = val;
 		} else {
 			error = zap_update(os, moid, name, 8, 1, &val, tx);
 		}
 		ASSERT(error == 0);
 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
 			norm = val;
 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
 			sense = val;
 	}
 	ASSERT(version != 0);
 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
 
 	/*
 	 * Create zap object used for SA attribute registration
 	 */
 
 	if (version >= ZPL_VERSION_SA) {
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT(error == 0);
 	} else {
 		sa_obj = 0;
 	}
 	/*
 	 * Create a delete queue.
 	 */
 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
 
 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
 	ASSERT(error == 0);
 
 	/*
 	 * Create root znode.  Create minimal znode/inode/zfsvfs/sb
 	 * to allow zfs_mknode to work.
 	 */
 	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
 	vattr.va_mode = S_IFDIR|0755;
 	vattr.va_uid = crgetuid(cr);
 	vattr.va_gid = crgetgid(cr);
 
 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
 	rootzp->z_unlinked = B_FALSE;
 	rootzp->z_atime_dirty = B_FALSE;
 	rootzp->z_is_sa = USE_SA(version, os);
 	rootzp->z_pflags = 0;
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 	zfsvfs->z_os = os;
 	zfsvfs->z_parent = zfsvfs;
 	zfsvfs->z_version = version;
 	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
 	zfsvfs->z_use_sa = USE_SA(version, os);
 	zfsvfs->z_norm = norm;
 
 	sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
 	sb->s_fs_info = zfsvfs;
 
 	ZTOI(rootzp)->i_sb = sb;
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zfsvfs->z_attr_table);
 
 	ASSERT(error == 0);
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 
 	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
 	zfsvfs->z_hold_size = size;
 	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
 	    KM_SLEEP);
 	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
 	for (i = 0; i != size; i++) {
 		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
 		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
 		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
 	}
 
 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
 	    cr, NULL, &acl_ids));
 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
 	ASSERT3P(zp, ==, rootzp);
 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
 	ASSERT(error == 0);
 	zfs_acl_ids_free(&acl_ids);
 
 	atomic_set(&ZTOI(rootzp)->i_count, 0);
 	sa_handle_destroy(rootzp->z_sa_hdl);
 	kmem_cache_free(znode_cache, rootzp);
 
 	for (i = 0; i != size; i++) {
 		avl_destroy(&zfsvfs->z_hold_trees[i]);
 		mutex_destroy(&zfsvfs->z_hold_locks[i]);
 	}
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 
 	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
 	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
 	kmem_free(sb, sizeof (struct super_block));
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 #endif /* _KERNEL */
 
 static int
 zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
 {
 	uint64_t sa_obj = 0;
 	int error;
 
 	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
 	if (error != 0 && error != ENOENT)
 		return (error);
 
 	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
 	return (error);
 }
 
 static int
 zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
     dmu_buf_t **db, void *tag)
 {
 	dmu_object_info_t doi;
 	int error;
 
 	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
 		return (error);
 
 	dmu_object_info_from_db(*db, &doi);
 	if ((doi.doi_bonus_type != DMU_OT_SA &&
 	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
 	    doi.doi_bonus_size < sizeof (znode_phys_t))) {
 		sa_buf_rele(*db, tag);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
 	if (error != 0) {
 		sa_buf_rele(*db, tag);
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
 {
 	sa_handle_destroy(hdl);
 	sa_buf_rele(db, tag);
 }
 
 /*
  * Given an object number, return its parent object number and whether
  * or not the object is an extended attribute directory.
  */
 static int
 zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
     uint64_t *pobjp, int *is_xattrdir)
 {
 	uint64_t parent;
 	uint64_t pflags;
 	uint64_t mode;
 	uint64_t parent_mode;
 	sa_bulk_attr_t bulk[3];
 	sa_handle_t *sa_hdl;
 	dmu_buf_t *sa_db;
 	int count = 0;
 	int error;
 
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
 	    &parent, sizeof (parent));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
 	    &pflags, sizeof (pflags));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
 	    &mode, sizeof (mode));
 
 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
 		return (error);
 
 	/*
 	 * When a link is removed its parent pointer is not changed and will
 	 * be invalid.  There are two cases where a link is removed but the
 	 * file stays around, when it goes to the delete queue and when there
 	 * are additional links.
 	 */
 	error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
 	zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
 	if (error != 0)
 		return (error);
 
 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
 
 	/*
 	 * Extended attributes can be applied to files, directories, etc.
 	 * Otherwise the parent must be a directory.
 	 */
 	if (!*is_xattrdir && !S_ISDIR(parent_mode))
 		return (SET_ERROR(EINVAL));
 
 	*pobjp = parent;
 
 	return (0);
 }
 
 /*
  * Given an object number, return some zpl level statistics
  */
 static int
 zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
     zfs_stat_t *sb)
 {
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
 	    &sb->zs_mode, sizeof (sb->zs_mode));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
 	    &sb->zs_gen, sizeof (sb->zs_gen));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
 	    &sb->zs_links, sizeof (sb->zs_links));
 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
 	    &sb->zs_ctime, sizeof (sb->zs_ctime));
 
 	return (sa_bulk_lookup(hdl, bulk, count));
 }
 
 static int
 zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
     sa_attr_type_t *sa_table, char *buf, int len)
 {
 	sa_handle_t *sa_hdl;
 	sa_handle_t *prevhdl = NULL;
 	dmu_buf_t *prevdb = NULL;
 	dmu_buf_t *sa_db = NULL;
 	char *path = buf + len - 1;
 	int error;
 
 	*path = '\0';
 	sa_hdl = hdl;
 
 	uint64_t deleteq_obj;
 	VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ,
 	    ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
 	error = zap_lookup_int(osp, deleteq_obj, obj);
 	if (error == 0) {
 		return (ESTALE);
 	} else if (error != ENOENT) {
 		return (error);
 	}
 	error = 0;
 
 	for (;;) {
 		uint64_t pobj = 0;
 		char component[MAXNAMELEN + 2];
 		size_t complen;
 		int is_xattrdir = 0;
 
 		if (prevdb) {
 			ASSERT(prevhdl != NULL);
 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
 		}
 
 		if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
 		    &is_xattrdir)) != 0)
 			break;
 
 		if (pobj == obj) {
 			if (path[0] != '/')
 				*--path = '/';
 			break;
 		}
 
 		component[0] = '/';
 		if (is_xattrdir) {
 			(void) sprintf(component + 1, "<xattrdir>");
 		} else {
 			error = zap_value_search(osp, pobj, obj,
 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
 			if (error != 0)
 				break;
 		}
 
 		complen = strlen(component);
 		path -= complen;
 		ASSERT(path >= buf);
 		bcopy(component, path, complen);
 		obj = pobj;
 
 		if (sa_hdl != hdl) {
 			prevhdl = sa_hdl;
 			prevdb = sa_db;
 		}
 		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
 		if (error != 0) {
 			sa_hdl = prevhdl;
 			sa_db = prevdb;
 			break;
 		}
 	}
 
 	if (sa_hdl != NULL && sa_hdl != hdl) {
 		ASSERT(sa_db != NULL);
 		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
 	}
 
 	if (error == 0)
 		(void) memmove(buf, path, buf + len - path);
 
 	return (error);
 }
 
 int
 zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
 {
 	sa_attr_type_t *sa_table;
 	sa_handle_t *hdl;
 	dmu_buf_t *db;
 	int error;
 
 	error = zfs_sa_setup(osp, &sa_table);
 	if (error != 0)
 		return (error);
 
 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
 
 	zfs_release_sa_handle(hdl, db, FTAG);
 	return (error);
 }
 
 int
 zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
     char *buf, int len)
 {
 	char *path = buf + len - 1;
 	sa_attr_type_t *sa_table;
 	sa_handle_t *hdl;
 	dmu_buf_t *db;
 	int error;
 
 	*path = '\0';
 
 	error = zfs_sa_setup(osp, &sa_table);
 	if (error != 0)
 		return (error);
 
 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
 	if (error != 0) {
 		zfs_release_sa_handle(hdl, db, FTAG);
 		return (error);
 	}
 
 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
 
 	zfs_release_sa_handle(hdl, db, FTAG);
 	return (error);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_create_fs);
 EXPORT_SYMBOL(zfs_obj_to_path);
 
 /* CSTYLED */
 module_param(zfs_object_mutex_size, uint, 0644);
 MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
 module_param(zfs_unlink_suspend_progress, int, 0644);
 MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
 "(debug - leaks space into the unlinked set)");
 #endif
diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c
index ce1524440ad0..4bd185baeb19 100644
--- a/module/os/linux/zfs/zpl_inode.c
+++ b/module/os/linux/zfs/zpl_inode.c
@@ -1,853 +1,854 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  */
 
 
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_znode.h>
 #include <sys/dmu_objset.h>
 #include <sys/vfs.h>
 #include <sys/zpl.h>
 #include <sys/file.h>
 
 
 static struct dentry *
 zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	cred_t *cr = CRED();
 	struct inode *ip;
 	znode_t *zp;
 	int error;
 	fstrans_cookie_t cookie;
 	pathname_t *ppn = NULL;
 	pathname_t pn;
 	int zfs_flags = 0;
 	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
 
 	if (dlen(dentry) >= ZAP_MAXNAMELEN)
 		return (ERR_PTR(-ENAMETOOLONG));
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 
 	/* If we are a case insensitive fs, we need the real name */
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		zfs_flags = FIGNORECASE;
 		pn_alloc(&pn);
 		ppn = &pn;
 	}
 
 	error = -zfs_lookup(ITOZ(dir), dname(dentry), &zp,
 	    zfs_flags, cr, NULL, ppn);
 	spl_fstrans_unmark(cookie);
 	ASSERT3S(error, <=, 0);
 	crfree(cr);
 
 	spin_lock(&dentry->d_lock);
 	dentry->d_time = jiffies;
 	spin_unlock(&dentry->d_lock);
 
 	if (error) {
 		/*
 		 * If we have a case sensitive fs, we do not want to
 		 * insert negative entries, so return NULL for ENOENT.
 		 * Fall through if the error is not ENOENT. Also free memory.
 		 */
 		if (ppn) {
 			pn_free(ppn);
 			if (error == -ENOENT)
 				return (NULL);
 		}
 
 		if (error == -ENOENT)
 			return (d_splice_alias(NULL, dentry));
 		else
 			return (ERR_PTR(error));
 	}
 	ip = ZTOI(zp);
 
 	/*
 	 * If we are case insensitive, call the correct function
 	 * to install the name.
 	 */
 	if (ppn) {
 		struct dentry *new_dentry;
 		struct qstr ci_name;
 
 		if (strcmp(dname(dentry), pn.pn_buf) == 0) {
 			new_dentry = d_splice_alias(ip,  dentry);
 		} else {
 			ci_name.name = pn.pn_buf;
 			ci_name.len = strlen(pn.pn_buf);
 			new_dentry = d_add_ci(dentry, ip, &ci_name);
 		}
 		pn_free(ppn);
 		return (new_dentry);
 	} else {
 		return (d_splice_alias(ip, dentry));
 	}
 }
 
 void
 zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
 {
 	vap->va_mask = ATTR_MODE;
 	vap->va_mode = mode;
 	vap->va_uid = crgetuid(cr);
 
 	if (dir && dir->i_mode & S_ISGID) {
 		vap->va_gid = KGID_TO_SGID(dir->i_gid);
 		if (S_ISDIR(mode))
 			vap->va_mode |= S_ISGID;
 	} else {
 		vap->va_gid = crgetgid(cr);
 	}
 }
 
 static int
 #ifdef HAVE_IOPS_CREATE_USERNS
 zpl_create(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode, bool flag)
 #elif defined(HAVE_IOPS_CREATE_IDMAP)
 zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode, bool flag)
 #else
 zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
 #endif
 {
 	cred_t *cr = CRED();
 	znode_t *zp;
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, mode, cr);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
 	    mode, &zp, cr, 0, NULL);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
 			error = zpl_init_acl(ZTOI(zp), dir);
 
 		if (error) {
 			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
 			remove_inode_hash(ZTOI(zp));
 			iput(ZTOI(zp));
 		} else {
 			d_instantiate(dentry, ZTOI(zp));
 		}
 	}
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 static int
 #ifdef HAVE_IOPS_MKNOD_USERNS
 zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode,
 #elif defined(HAVE_IOPS_MKNOD_IDMAP)
 zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode,
 #else
 zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 #endif
     dev_t rdev)
 {
 	cred_t *cr = CRED();
 	znode_t *zp;
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
 
 	/*
 	 * We currently expect Linux to supply rdev=0 for all sockets
 	 * and fifos, but we want to know if this behavior ever changes.
 	 */
 	if (S_ISSOCK(mode) || S_ISFIFO(mode))
 		ASSERT(rdev == 0);
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, mode, cr);
 	vap->va_rdev = rdev;
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_create(ITOZ(dir), dname(dentry), vap, 0,
 	    mode, &zp, cr, 0, NULL);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
 			error = zpl_init_acl(ZTOI(zp), dir);
 
 		if (error) {
 			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
 			remove_inode_hash(ZTOI(zp));
 			iput(ZTOI(zp));
 		} else {
 			d_instantiate(dentry, ZTOI(zp));
 		}
 	}
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 #ifdef HAVE_TMPFILE
 static int
 #ifdef HAVE_TMPFILE_IDMAP
 zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
     struct file *file, umode_t mode)
 #elif !defined(HAVE_TMPFILE_DENTRY)
 zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
     struct file *file, umode_t mode)
 #else
 #ifdef HAVE_TMPFILE_USERNS
 zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
     struct dentry *dentry, umode_t mode)
 #else
 zpl_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 #endif
 #endif
 {
 	cred_t *cr = CRED();
 	struct inode *ip;
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	/*
 	 * The VFS does not apply the umask, therefore it is applied here
 	 * when POSIX ACLs are not enabled.
 	 */
 	if (!IS_POSIXACL(dir))
 		mode &= ~current_umask();
 	zpl_vap_init(vap, dir, mode, cr);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
 	if (error == 0) {
 		/* d_tmpfile will do drop_nlink, so we should set it first */
 		set_nlink(ip, 1);
 #ifndef HAVE_TMPFILE_DENTRY
 		d_tmpfile(file, ip);
 
 		error = zpl_xattr_security_init(ip, dir,
 		    &file->f_path.dentry->d_name);
 #else
 		d_tmpfile(dentry, ip);
 
 		error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
 #endif
 		if (error == 0)
 			error = zpl_init_acl(ip, dir);
 #ifndef HAVE_TMPFILE_DENTRY
 		error = finish_open_simple(file, error);
 #endif
 		/*
 		 * don't need to handle error here, file is already in
 		 * unlinked set.
 		 */
 	}
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 #endif
 
 static int
 zpl_unlink(struct inode *dir, struct dentry *dentry)
 {
 	cred_t *cr = CRED();
 	int error;
 	fstrans_cookie_t cookie;
 	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	error = -zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
 
 	/*
 	 * For a CI FS we must invalidate the dentry to prevent the
 	 * creation of negative entries.
 	 */
 	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
 		d_invalidate(dentry);
 
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 static int
 #ifdef HAVE_IOPS_MKDIR_USERNS
 zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode)
 #elif defined(HAVE_IOPS_MKDIR_IDMAP)
 zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
     struct dentry *dentry, umode_t mode)
 #else
 zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 #endif
 {
 	cred_t *cr = CRED();
 	vattr_t *vap;
 	znode_t *zp;
 	int error;
 	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, mode | S_IFDIR, cr);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_mkdir(ITOZ(dir), dname(dentry), vap, &zp, cr, 0, NULL);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error == 0)
 			error = zpl_init_acl(ZTOI(zp), dir);
 
 		if (error) {
 			(void) zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0);
 			remove_inode_hash(ZTOI(zp));
 			iput(ZTOI(zp));
 		} else {
 			d_instantiate(dentry, ZTOI(zp));
 		}
 	}
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 static int
 zpl_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	cred_t *cr = CRED();
 	int error;
 	fstrans_cookie_t cookie;
 	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	error = -zfs_rmdir(ITOZ(dir), dname(dentry), NULL, cr, 0);
 
 	/*
 	 * For a CI FS we must invalidate the dentry to prevent the
 	 * creation of negative entries.
 	 */
 	if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
 		d_invalidate(dentry);
 
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 static int
 #ifdef HAVE_USERNS_IOPS_GETATTR
 zpl_getattr_impl(struct user_namespace *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
 #elif defined(HAVE_IDMAP_IOPS_GETATTR)
 zpl_getattr_impl(struct mnt_idmap *user_ns,
     const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
 #else
 zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
     unsigned int query_flags)
 #endif
 {
 	int error;
 	fstrans_cookie_t cookie;
 	struct inode *ip = path->dentry->d_inode;
 	znode_t *zp __maybe_unused = ITOZ(ip);
 
 	cookie = spl_fstrans_mark();
 
 	/*
 	 * XXX query_flags currently ignored.
 	 */
 
 #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
 	error = -zfs_getattr_fast(user_ns, request_mask, ip, stat);
 #elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
 	error = -zfs_getattr_fast(user_ns, ip, stat);
 #else
 	error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
 #endif
 
 #ifdef STATX_BTIME
 	if (request_mask & STATX_BTIME) {
 		stat->btime = zp->z_btime;
 		stat->result_mask |= STATX_BTIME;
 	}
 #endif
 
 #ifdef STATX_ATTR_IMMUTABLE
 	if (zp->z_pflags & ZFS_IMMUTABLE)
 		stat->attributes |= STATX_ATTR_IMMUTABLE;
 	stat->attributes_mask |= STATX_ATTR_IMMUTABLE;
 #endif
 
 #ifdef STATX_ATTR_APPEND
 	if (zp->z_pflags & ZFS_APPENDONLY)
 		stat->attributes |= STATX_ATTR_APPEND;
 	stat->attributes_mask |= STATX_ATTR_APPEND;
 #endif
 
 #ifdef STATX_ATTR_NODUMP
 	if (zp->z_pflags & ZFS_NODUMP)
 		stat->attributes |= STATX_ATTR_NODUMP;
 	stat->attributes_mask |= STATX_ATTR_NODUMP;
 #endif
 
 	spl_fstrans_unmark(cookie);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 ZPL_GETATTR_WRAPPER(zpl_getattr);
 
 static int
 #ifdef HAVE_USERNS_IOPS_SETATTR
 zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
     struct iattr *ia)
 #elif defined(HAVE_IDMAP_IOPS_SETATTR)
 zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
     struct iattr *ia)
 #else
 zpl_setattr(struct dentry *dentry, struct iattr *ia)
 #endif
 {
 	struct inode *ip = dentry->d_inode;
 	cred_t *cr = CRED();
 	vattr_t *vap;
 	int error;
 	fstrans_cookie_t cookie;
 
 #ifdef HAVE_SETATTR_PREPARE_USERNS
 	error = zpl_setattr_prepare(user_ns, dentry, ia);
 #elif defined(HAVE_SETATTR_PREPARE_IDMAP)
 	error = zpl_setattr_prepare(user_ns, dentry, ia);
 #else
 	error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
 #endif
 	if (error)
 		return (error);
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	vap->va_mask = ia->ia_valid & ATTR_IATTR_MASK;
 	vap->va_mode = ia->ia_mode;
 	vap->va_uid = KUID_TO_SUID(ia->ia_uid);
 	vap->va_gid = KGID_TO_SGID(ia->ia_gid);
 	vap->va_size = ia->ia_size;
 	vap->va_atime = ia->ia_atime;
 	vap->va_mtime = ia->ia_mtime;
 	vap->va_ctime = ia->ia_ctime;
 
 	if (vap->va_mask & ATTR_ATIME)
-		ip->i_atime = zpl_inode_timestamp_truncate(ia->ia_atime, ip);
+		zpl_inode_set_atime_to_ts(ip,
+		    zpl_inode_timestamp_truncate(ia->ia_atime, ip));
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_setattr(ITOZ(ip), vap, 0, cr);
 	if (!error && (ia->ia_valid & ATTR_MODE))
 		error = zpl_chmod_acl(ip);
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 static int
 #ifdef HAVE_IOPS_RENAME_USERNS
 zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
     struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
     unsigned int rflags)
 #elif defined(HAVE_IOPS_RENAME_IDMAP)
 zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
     struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
     unsigned int rflags)
 #else
 zpl_rename2(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
 #endif
 {
 	cred_t *cr = CRED();
 	int error;
 	fstrans_cookie_t cookie;
 
 	/* We don't have renameat2(2) support */
 	if (rflags)
 		return (-EINVAL);
 
 	crhold(cr);
 	cookie = spl_fstrans_mark();
 	error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip),
 	    dname(tdentry), cr, 0);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 #if !defined(HAVE_IOPS_RENAME_USERNS) && \
 	!defined(HAVE_RENAME_WANTS_FLAGS) && \
 	!defined(HAVE_IOPS_RENAME_IDMAP)
 static int
 zpl_rename(struct inode *sdip, struct dentry *sdentry,
     struct inode *tdip, struct dentry *tdentry)
 {
 	return (zpl_rename2(sdip, sdentry, tdip, tdentry, 0));
 }
 #endif
 
 static int
 #ifdef HAVE_IOPS_SYMLINK_USERNS
 zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
     struct dentry *dentry, const char *name)
 #elif defined(HAVE_IOPS_SYMLINK_IDMAP)
 zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
     struct dentry *dentry, const char *name)
 #else
 zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
 #endif
 {
 	cred_t *cr = CRED();
 	vattr_t *vap;
 	znode_t *zp;
 	int error;
 	fstrans_cookie_t cookie;
 
 	crhold(cr);
 	vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
 	zpl_vap_init(vap, dir, S_IFLNK | S_IRWXUGO, cr);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_symlink(ITOZ(dir), dname(dentry), vap,
 	    (char *)name, &zp, cr, 0);
 	if (error == 0) {
 		error = zpl_xattr_security_init(ZTOI(zp), dir, &dentry->d_name);
 		if (error) {
 			(void) zfs_remove(ITOZ(dir), dname(dentry), cr, 0);
 			remove_inode_hash(ZTOI(zp));
 			iput(ZTOI(zp));
 		} else {
 			d_instantiate(dentry, ZTOI(zp));
 		}
 	}
 
 	spl_fstrans_unmark(cookie);
 	kmem_free(vap, sizeof (vattr_t));
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 #if defined(HAVE_PUT_LINK_COOKIE)
 static void
 zpl_put_link(struct inode *unused, void *cookie)
 {
 	kmem_free(cookie, MAXPATHLEN);
 }
 #elif defined(HAVE_PUT_LINK_NAMEIDATA)
 static void
 zpl_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
 {
 	const char *link = nd_get_link(nd);
 
 	if (!IS_ERR(link))
 		kmem_free(link, MAXPATHLEN);
 }
 #elif defined(HAVE_PUT_LINK_DELAYED)
 static void
 zpl_put_link(void *ptr)
 {
 	kmem_free(ptr, MAXPATHLEN);
 }
 #endif
 
 static int
 zpl_get_link_common(struct dentry *dentry, struct inode *ip, char **link)
 {
 	fstrans_cookie_t cookie;
 	cred_t *cr = CRED();
 	int error;
 
 	crhold(cr);
 	*link = NULL;
 
 	struct iovec iov;
 	iov.iov_len = MAXPATHLEN;
 	iov.iov_base = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 
 	zfs_uio_t uio;
 	zfs_uio_iovec_init(&uio, &iov, 1, 0, UIO_SYSSPACE, MAXPATHLEN - 1, 0);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_readlink(ip, &uio, cr);
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 
 	if (error)
 		kmem_free(iov.iov_base, MAXPATHLEN);
 	else
 		*link = iov.iov_base;
 
 	return (error);
 }
 
 #if defined(HAVE_GET_LINK_DELAYED)
 static const char *
 zpl_get_link(struct dentry *dentry, struct inode *inode,
     struct delayed_call *done)
 {
 	char *link = NULL;
 	int error;
 
 	if (!dentry)
 		return (ERR_PTR(-ECHILD));
 
 	error = zpl_get_link_common(dentry, inode, &link);
 	if (error)
 		return (ERR_PTR(error));
 
 	set_delayed_call(done, zpl_put_link, link);
 
 	return (link);
 }
 #elif defined(HAVE_GET_LINK_COOKIE)
 static const char *
 zpl_get_link(struct dentry *dentry, struct inode *inode, void **cookie)
 {
 	char *link = NULL;
 	int error;
 
 	if (!dentry)
 		return (ERR_PTR(-ECHILD));
 
 	error = zpl_get_link_common(dentry, inode, &link);
 	if (error)
 		return (ERR_PTR(error));
 
 	return (*cookie = link);
 }
 #elif defined(HAVE_FOLLOW_LINK_COOKIE)
 static const char *
 zpl_follow_link(struct dentry *dentry, void **cookie)
 {
 	char *link = NULL;
 	int error;
 
 	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
 	if (error)
 		return (ERR_PTR(error));
 
 	return (*cookie = link);
 }
 #elif defined(HAVE_FOLLOW_LINK_NAMEIDATA)
 static void *
 zpl_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char *link = NULL;
 	int error;
 
 	error = zpl_get_link_common(dentry, dentry->d_inode, &link);
 	if (error)
 		nd_set_link(nd, ERR_PTR(error));
 	else
 		nd_set_link(nd, link);
 
 	return (NULL);
 }
 #endif
 
 static int
 zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
 	cred_t *cr = CRED();
 	struct inode *ip = old_dentry->d_inode;
 	int error;
 	fstrans_cookie_t cookie;
 
 	if (ip->i_nlink >= ZFS_LINK_MAX)
 		return (-EMLINK);
 
 	crhold(cr);
 	zpl_inode_set_ctime_to_ts(ip, current_time(ip));
 	/* Must have an existing ref, so igrab() cannot return NULL */
 	VERIFY3P(igrab(ip), !=, NULL);
 
 	cookie = spl_fstrans_mark();
 	error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0);
 	if (error) {
 		iput(ip);
 		goto out;
 	}
 
 	d_instantiate(dentry, ip);
 out:
 	spl_fstrans_unmark(cookie);
 	crfree(cr);
 	ASSERT3S(error, <=, 0);
 
 	return (error);
 }
 
 const struct inode_operations zpl_inode_operations = {
 	.setattr	= zpl_setattr,
 	.getattr	= zpl_getattr,
 #ifdef HAVE_GENERIC_SETXATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= zpl_xattr_list,
 #if defined(CONFIG_FS_POSIX_ACL)
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
 #if defined(HAVE_GET_INODE_ACL)
 	.get_inode_acl	= zpl_get_acl,
 #else
 	.get_acl	= zpl_get_acl,
 #endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
 };
 
 const struct inode_operations zpl_dir_inode_operations = {
 	.create		= zpl_create,
 	.lookup		= zpl_lookup,
 	.link		= zpl_link,
 	.unlink		= zpl_unlink,
 	.symlink	= zpl_symlink,
 	.mkdir		= zpl_mkdir,
 	.rmdir		= zpl_rmdir,
 	.mknod		= zpl_mknod,
 #if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
 	.rename		= zpl_rename2,
 #elif defined(HAVE_IOPS_RENAME_IDMAP)
 	.rename		= zpl_rename2,
 #else
 	.rename		= zpl_rename,
 #endif
 #ifdef HAVE_TMPFILE
 	.tmpfile	= zpl_tmpfile,
 #endif
 	.setattr	= zpl_setattr,
 	.getattr	= zpl_getattr,
 #ifdef HAVE_GENERIC_SETXATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= zpl_xattr_list,
 #if defined(CONFIG_FS_POSIX_ACL)
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
 #if defined(HAVE_GET_INODE_ACL)
 	.get_inode_acl	= zpl_get_acl,
 #else
 	.get_acl	= zpl_get_acl,
 #endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
 };
 
 const struct inode_operations zpl_symlink_inode_operations = {
 #ifdef HAVE_GENERIC_READLINK
 	.readlink	= generic_readlink,
 #endif
 #if defined(HAVE_GET_LINK_DELAYED) || defined(HAVE_GET_LINK_COOKIE)
 	.get_link	= zpl_get_link,
 #elif defined(HAVE_FOLLOW_LINK_COOKIE) || defined(HAVE_FOLLOW_LINK_NAMEIDATA)
 	.follow_link	= zpl_follow_link,
 #endif
 #if defined(HAVE_PUT_LINK_COOKIE) || defined(HAVE_PUT_LINK_NAMEIDATA)
 	.put_link	= zpl_put_link,
 #endif
 	.setattr	= zpl_setattr,
 	.getattr	= zpl_getattr,
 #ifdef HAVE_GENERIC_SETXATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= zpl_xattr_list,
 };
 
 const struct inode_operations zpl_special_inode_operations = {
 	.setattr	= zpl_setattr,
 	.getattr	= zpl_getattr,
 #ifdef HAVE_GENERIC_SETXATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= zpl_xattr_list,
 #if defined(CONFIG_FS_POSIX_ACL)
 #if defined(HAVE_SET_ACL)
 	.set_acl	= zpl_set_acl,
 #endif /* HAVE_SET_ACL */
 #if defined(HAVE_GET_INODE_ACL)
 	.get_inode_acl	= zpl_get_acl,
 #else
 	.get_acl	= zpl_get_acl,
 #endif /* HAVE_GET_INODE_ACL */
 #endif /* CONFIG_FS_POSIX_ACL */
 };