diff --git a/sys/contrib/openzfs/config/kernel-blk-queue.m4 b/sys/contrib/openzfs/config/kernel-blk-queue.m4
index 2f0b386e6637..a064140f337a 100644
--- a/sys/contrib/openzfs/config/kernel-blk-queue.m4
+++ b/sys/contrib/openzfs/config/kernel-blk-queue.m4
@@ -1,433 +1,461 @@
 dnl #
 dnl # 2.6.39 API change,
 dnl # blk_start_plug() and blk_finish_plug()
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG], [
 	ZFS_LINUX_TEST_SRC([blk_plug], [
 		#include <linux/blkdev.h>
 	],[
 		struct blk_plug plug __attribute__ ((unused));
 
 		blk_start_plug(&plug);
 		blk_finish_plug(&plug);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PLUG], [
 	AC_MSG_CHECKING([whether struct blk_plug is available])
 	ZFS_LINUX_TEST_RESULT([blk_plug], [
 		AC_MSG_RESULT(yes)
 	],[
 		ZFS_LINUX_TEST_ERROR([blk_plug])
 	])
 ])
 
 dnl #
 dnl # 2.6.32 - 4.11: statically allocated bdi in request_queue
 dnl # 4.12: dynamically allocated bdi in request_queue
+dnl # 6.11: bdi no longer available through request_queue, so get it from
+dnl #       the gendisk attached to the queue
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI], [
 	ZFS_LINUX_TEST_SRC([blk_queue_bdi], [
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue q;
 		struct backing_dev_info bdi;
 		q.backing_dev_info = &bdi;
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
 	AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
 	ZFS_LINUX_TEST_RESULT([blk_queue_bdi], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
 		    [blk queue backing_dev_info is dynamic])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISK_BDI], [
+	ZFS_LINUX_TEST_SRC([blk_queue_disk_bdi], [
+		#include <linux/blkdev.h>
+		#include <linux/backing-dev.h>
+	], [
+		struct request_queue q;
+		struct gendisk disk;
+		struct backing_dev_info bdi __attribute__ ((unused));
+		q.disk = &disk;
+		q.disk->bdi = &bdi;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISK_BDI], [
+	AC_MSG_CHECKING([whether backing_dev_info is available through queue gendisk])
+	ZFS_LINUX_TEST_RESULT([blk_queue_disk_bdi], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_QUEUE_DISK_BDI, 1,
+		    [backing_dev_info is available through queue gendisk])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
 dnl #
 dnl # 5.9: added blk_queue_update_readahead(),
 dnl # 5.15: renamed to disk_update_readahead()
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD], [
 	ZFS_LINUX_TEST_SRC([blk_queue_update_readahead], [
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue q;
 		blk_queue_update_readahead(&q);
 	])
 
 	ZFS_LINUX_TEST_SRC([disk_update_readahead], [
 		#include <linux/blkdev.h>
 	],[
 		struct gendisk disk;
 		disk_update_readahead(&disk);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD], [
 	AC_MSG_CHECKING([whether blk_queue_update_readahead() exists])
 	ZFS_LINUX_TEST_RESULT([blk_queue_update_readahead], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_UPDATE_READAHEAD, 1,
 		    [blk_queue_update_readahead() exists])
 	],[
 		AC_MSG_RESULT(no)
 
 		AC_MSG_CHECKING([whether disk_update_readahead() exists])
 		ZFS_LINUX_TEST_RESULT([disk_update_readahead], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_DISK_UPDATE_READAHEAD, 1,
 			    [disk_update_readahead() exists])
 		],[
 			AC_MSG_RESULT(no)
 		])
 	])
 ])
 
 dnl #
 dnl # 5.19: bdev_max_discard_sectors() available
 dnl # 2.6.32: blk_queue_discard() available
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD], [
 	ZFS_LINUX_TEST_SRC([bdev_max_discard_sectors], [
 		#include <linux/blkdev.h>
 	],[
 		struct block_device *bdev __attribute__ ((unused)) = NULL;
 		unsigned int error __attribute__ ((unused));
 
 		error = bdev_max_discard_sectors(bdev);
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_queue_discard], [
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue r;
 		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
 		memset(q, 0, sizeof(r));
 		value = blk_queue_discard(q);
 	],[-Wframe-larger-than=8192])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_DISCARD], [
 	AC_MSG_CHECKING([whether bdev_max_discard_sectors() is available])
 	ZFS_LINUX_TEST_RESULT([bdev_max_discard_sectors], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BDEV_MAX_DISCARD_SECTORS, 1,
 		    [bdev_max_discard_sectors() is available])
 	],[
 		AC_MSG_RESULT(no)
 
 		AC_MSG_CHECKING([whether blk_queue_discard() is available])
 		ZFS_LINUX_TEST_RESULT([blk_queue_discard], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_QUEUE_DISCARD, 1,
 			    [blk_queue_discard() is available])
 		],[
 			ZFS_LINUX_TEST_ERROR([blk_queue_discard])
 		])
 	])
 ])
 
 dnl #
 dnl # 5.19: bdev_max_secure_erase_sectors() available
 dnl # 4.8: blk_queue_secure_erase() available
 dnl # 2.6.36: blk_queue_secdiscard() available
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE], [
 	ZFS_LINUX_TEST_SRC([bdev_max_secure_erase_sectors], [
 		#include <linux/blkdev.h>
 	],[
 		struct block_device *bdev __attribute__ ((unused)) = NULL;
 		unsigned int error __attribute__ ((unused));
 
 		error = bdev_max_secure_erase_sectors(bdev);
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_queue_secure_erase], [
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue r;
 		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
 		memset(q, 0, sizeof(r));
 		value = blk_queue_secure_erase(q);
 	],[-Wframe-larger-than=8192])
 
 	ZFS_LINUX_TEST_SRC([blk_queue_secdiscard], [
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue r;
 		struct request_queue *q = &r;
 		int value __attribute__ ((unused));
 		memset(q, 0, sizeof(r));
 		value = blk_queue_secdiscard(q);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE], [
 	AC_MSG_CHECKING([whether bdev_max_secure_erase_sectors() is available])
 	ZFS_LINUX_TEST_RESULT([bdev_max_secure_erase_sectors], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS, 1,
 		    [bdev_max_secure_erase_sectors() is available])
 	],[
 		AC_MSG_RESULT(no)
 
 		AC_MSG_CHECKING([whether blk_queue_secure_erase() is available])
 		ZFS_LINUX_TEST_RESULT([blk_queue_secure_erase], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_QUEUE_SECURE_ERASE, 1,
 			    [blk_queue_secure_erase() is available])
 		],[
 			AC_MSG_RESULT(no)
 
 			AC_MSG_CHECKING([whether blk_queue_secdiscard() is available])
 			ZFS_LINUX_TEST_RESULT([blk_queue_secdiscard], [
 				AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_QUEUE_SECDISCARD, 1,
 				    [blk_queue_secdiscard() is available])
 			],[
 				ZFS_LINUX_TEST_ERROR([blk_queue_secure_erase])
 			])
 		])
 	])
 ])
 
 dnl #
 dnl # 4.16 API change,
 dnl # Introduction of blk_queue_flag_set and blk_queue_flag_clear
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET], [
 	ZFS_LINUX_TEST_SRC([blk_queue_flag_set], [
 		#include <linux/kernel.h>
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		blk_queue_flag_set(0, q);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET], [
 	AC_MSG_CHECKING([whether blk_queue_flag_set() exists])
 	ZFS_LINUX_TEST_RESULT([blk_queue_flag_set], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_SET, 1,
 		    [blk_queue_flag_set() exists])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR], [
 	ZFS_LINUX_TEST_SRC([blk_queue_flag_clear], [
 		#include <linux/kernel.h>
 		#include <linux/blkdev.h>
 	],[
 		struct request_queue *q = NULL;
 		blk_queue_flag_clear(0, q);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR], [
 	AC_MSG_CHECKING([whether blk_queue_flag_clear() exists])
 	ZFS_LINUX_TEST_RESULT([blk_queue_flag_clear], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_FLAG_CLEAR, 1,
 		    [blk_queue_flag_clear() exists])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 
 dnl #
 dnl # 2.6.36 API change,
 dnl # Added blk_queue_flush() interface, while the previous interface
 dnl # was available to all the new one is GPL-only.  Thus in addition to
 dnl # detecting if this function is available we determine if it is
 dnl # GPL-only.  If the GPL-only interface is there we implement our own
 dnl # compatibility function, otherwise we use the function.  The hope
 dnl # is that long term this function will be opened up.
 dnl #
 dnl # 4.7 API change,
 dnl # Replace blk_queue_flush with blk_queue_write_cache
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH], [
 	ZFS_LINUX_TEST_SRC([blk_queue_flush], [
 		#include <linux/blkdev.h>
 	], [
 		struct request_queue *q __attribute__ ((unused)) = NULL;
 		(void) blk_queue_flush(q, REQ_FLUSH);
 	], [], [ZFS_META_LICENSE])
 
 	ZFS_LINUX_TEST_SRC([blk_queue_write_cache], [
 		#include <linux/kernel.h>
 		#include <linux/blkdev.h>
 	], [
 		struct request_queue *q __attribute__ ((unused)) = NULL;
 		blk_queue_write_cache(q, true, true);
 	], [], [ZFS_META_LICENSE])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_FLUSH], [
 	AC_MSG_CHECKING([whether blk_queue_flush() is available])
 	ZFS_LINUX_TEST_RESULT([blk_queue_flush], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_FLUSH, 1,
 		    [blk_queue_flush() is available])
 
 		AC_MSG_CHECKING([whether blk_queue_flush() is GPL-only])
 		ZFS_LINUX_TEST_RESULT([blk_queue_flush_license], [
 			AC_MSG_RESULT(no)
 		],[
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY, 1,
 			    [blk_queue_flush() is GPL-only])
 		])
 	],[
 		AC_MSG_RESULT(no)
 	])
 
 	dnl #
 	dnl # 4.7 API change
 	dnl # Replace blk_queue_flush with blk_queue_write_cache
 	dnl #
 	AC_MSG_CHECKING([whether blk_queue_write_cache() exists])
 	ZFS_LINUX_TEST_RESULT([blk_queue_write_cache], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE, 1,
 		    [blk_queue_write_cache() exists])
 
 		AC_MSG_CHECKING([whether blk_queue_write_cache() is GPL-only])
 		ZFS_LINUX_TEST_RESULT([blk_queue_write_cache_license], [
 			AC_MSG_RESULT(no)
 		],[
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY, 1,
 			    [blk_queue_write_cache() is GPL-only])
 		])
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 
 dnl #
 dnl # 2.6.34 API change
 dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors().
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS], [
 	ZFS_LINUX_TEST_SRC([blk_queue_max_hw_sectors], [
 		#include <linux/blkdev.h>
 	], [
 		struct request_queue *q __attribute__ ((unused)) = NULL;
 		(void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
 	], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
 	AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available])
 	ZFS_LINUX_TEST_RESULT([blk_queue_max_hw_sectors], [
 		AC_MSG_RESULT(yes)
 	],[
 		AC_MSG_RESULT(no)
 	])
 ])
 
 dnl #
 dnl # 2.6.34 API change
 dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments()
 dnl # and blk_queue_max_phys_segments().
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS], [
 	ZFS_LINUX_TEST_SRC([blk_queue_max_segments], [
 		#include <linux/blkdev.h>
 	], [
 		struct request_queue *q __attribute__ ((unused)) = NULL;
 		(void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
 	], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
 	AC_MSG_CHECKING([whether blk_queue_max_segments() is available])
 	ZFS_LINUX_TEST_RESULT([blk_queue_max_segments], [
 		AC_MSG_RESULT(yes)
 	], [
 		AC_MSG_RESULT(no)
 	])
 ])
 
 dnl #
 dnl # See if kernel supports block multi-queue and blk_status_t.
 dnl # blk_status_t represents the new status codes introduced in the 4.13
 dnl # kernel patch:
 dnl #
 dnl #  block: introduce new block status code type
 dnl #
 dnl # We do not currently support the "old" block multi-queue interfaces from
 dnl # prior kernels.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
 	ZFS_LINUX_TEST_SRC([blk_mq], [
 		#include <linux/blk-mq.h>
 	], [
 		struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0};
 		(void) blk_mq_alloc_tag_set(&tag_set);
 		return BLK_STS_OK;
 	], [])
 	ZFS_LINUX_TEST_SRC([blk_mq_rq_hctx], [
 		#include <linux/blk-mq.h>
 		#include <linux/blkdev.h>
 	], [
 		struct request rq = {0};
 		struct blk_mq_hw_ctx *hctx = NULL;
 		rq.mq_hctx = hctx;
 	], [])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
 	AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
 	ZFS_LINUX_TEST_RESULT([blk_mq], [
 		AC_MSG_RESULT(yes)
 		AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
 		AC_MSG_CHECKING([whether block multiqueue hardware context is cached in struct request])
 		ZFS_LINUX_TEST_RESULT([blk_mq_rq_hctx], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE(HAVE_BLK_MQ_RQ_HCTX, 1, [block multiqueue hardware context is cached in struct request])
 		], [
 			AC_MSG_RESULT(no)
 		])
 	], [
 		AC_MSG_RESULT(no)
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
+	ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISK_BDI
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_UPDATE_READAHEAD
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_DISCARD
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_SECURE_ERASE
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_SET
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLAG_CLEAR
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
 	ZFS_AC_KERNEL_SRC_BLK_MQ
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
 	ZFS_AC_KERNEL_BLK_QUEUE_PLUG
 	ZFS_AC_KERNEL_BLK_QUEUE_BDI
+	ZFS_AC_KERNEL_BLK_QUEUE_DISK_BDI
 	ZFS_AC_KERNEL_BLK_QUEUE_UPDATE_READAHEAD
 	ZFS_AC_KERNEL_BLK_QUEUE_DISCARD
 	ZFS_AC_KERNEL_BLK_QUEUE_SECURE_ERASE
 	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_SET
 	ZFS_AC_KERNEL_BLK_QUEUE_FLAG_CLEAR
 	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
 	ZFS_AC_KERNEL_BLK_MQ
 ])
diff --git a/sys/contrib/openzfs/config/kernel-make-request-fn.m4 b/sys/contrib/openzfs/config/kernel-make-request-fn.m4
index 9813ad2fb3f3..4c54bdd6d4a2 100644
--- a/sys/contrib/openzfs/config/kernel-make-request-fn.m4
+++ b/sys/contrib/openzfs/config/kernel-make-request-fn.m4
@@ -1,213 +1,234 @@
 dnl #
 dnl # Check for make_request_fn interface.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN], [
 	ZFS_LINUX_TEST_SRC([make_request_fn_void], [
 		#include <linux/blkdev.h>
 		static void make_request(struct request_queue *q,
 		    struct bio *bio) { return; }
 	],[
 		blk_queue_make_request(NULL, &make_request);
 	])
 
 	ZFS_LINUX_TEST_SRC([make_request_fn_blk_qc_t], [
 		#include <linux/blkdev.h>
 		static blk_qc_t make_request(struct request_queue *q,
 		    struct bio *bio) { return (BLK_QC_T_NONE); }
 	],[
 		blk_queue_make_request(NULL, &make_request);
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn], [
 		#include <linux/blkdev.h>
 		static blk_qc_t make_request(struct request_queue *q,
 		    struct bio *bio) { return (BLK_QC_T_NONE); }
 	],[
 		struct request_queue *q __attribute__ ((unused));
 		q = blk_alloc_queue(make_request, NUMA_NO_NODE);
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_alloc_queue_request_fn_rh], [
 		#include <linux/blkdev.h>
 		static blk_qc_t make_request(struct request_queue *q,
 		    struct bio *bio) { return (BLK_QC_T_NONE); }
 	],[
 		struct request_queue *q __attribute__ ((unused));
 		q = blk_alloc_queue_rh(make_request, NUMA_NO_NODE);
 	])
 
 	ZFS_LINUX_TEST_SRC([block_device_operations_submit_bio], [
 		#include <linux/blkdev.h>
 	],[
 		struct block_device_operations o;
 		o.submit_bio = NULL;
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_alloc_disk], [
 		#include <linux/blkdev.h>
 	],[
 		struct gendisk *disk  __attribute__ ((unused));
 		disk = blk_alloc_disk(NUMA_NO_NODE);
 	])
 
 	ZFS_LINUX_TEST_SRC([blk_alloc_disk_2arg], [
 		#include <linux/blkdev.h>
 	],[
 		struct queue_limits *lim = NULL;
 		struct gendisk *disk  __attribute__ ((unused));
 		disk = blk_alloc_disk(lim, NUMA_NO_NODE);
 	])
 
+	ZFS_LINUX_TEST_SRC([blkdev_queue_limits_features], [
+		#include <linux/blkdev.h>
+	],[
+		struct queue_limits *lim = NULL;
+		lim->features = 0;
+	])
+
 	ZFS_LINUX_TEST_SRC([blk_cleanup_disk], [
 		#include <linux/blkdev.h>
 	],[
 		struct gendisk *disk  __attribute__ ((unused));
 		blk_cleanup_disk(disk);
 	])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_MAKE_REQUEST_FN], [
 	dnl # Checked as part of the blk_alloc_queue_request_fn test
 	dnl #
 	dnl # Linux 5.9 API Change
 	dnl # make_request_fn was moved into block_device_operations->submit_bio
 	dnl #
 	AC_MSG_CHECKING([whether submit_bio is member of struct block_device_operations])
 	ZFS_LINUX_TEST_RESULT([block_device_operations_submit_bio], [
 		AC_MSG_RESULT(yes)
 
 		AC_DEFINE(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS, 1,
 		    [submit_bio is member of struct block_device_operations])
 
 		dnl #
 		dnl # Linux 5.14 API Change:
 		dnl # blk_alloc_queue() + alloc_disk() combo replaced by
 		dnl # a single call to blk_alloc_disk().
 		dnl #
 		AC_MSG_CHECKING([whether blk_alloc_disk() exists])
 		ZFS_LINUX_TEST_RESULT([blk_alloc_disk], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE([HAVE_BLK_ALLOC_DISK], 1, [blk_alloc_disk() exists])
 
 			dnl #
 			dnl # 5.20 API change,
 			dnl # Removed blk_cleanup_disk(), put_disk() should be used.
 			dnl #
 			AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
 			ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
 				AC_MSG_RESULT(yes)
 				AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
 				    [blk_cleanup_disk() exists])
 			], [
 				AC_MSG_RESULT(no)
 			])
 		], [
 			AC_MSG_RESULT(no)
 		])
 
 		dnl #
 		dnl # Linux 6.9 API Change:
 		dnl # blk_alloc_queue() takes a nullable queue_limits arg.
 		dnl #
 		AC_MSG_CHECKING([whether blk_alloc_disk() exists and takes 2 args])
 		ZFS_LINUX_TEST_RESULT([blk_alloc_disk_2arg], [
 			AC_MSG_RESULT(yes)
 			AC_DEFINE([HAVE_BLK_ALLOC_DISK_2ARG], 1, [blk_alloc_disk() exists and takes 2 args])
 
+			dnl #
+			dnl # Linux 6.11 API change:
+			dnl # struct queue_limits gains a 'features' field,
+			dnl # used to set flushing options
+			dnl #
+			AC_MSG_CHECKING([whether struct queue_limits has a features field])
+			ZFS_LINUX_TEST_RESULT([blkdev_queue_limits_features], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE([HAVE_BLKDEV_QUEUE_LIMITS_FEATURES], 1,
+				    [struct queue_limits has a features field])
+			], [
+				AC_MSG_RESULT(no)
+			])
+
 			dnl #
 			dnl # 5.20 API change,
 			dnl # Removed blk_cleanup_disk(), put_disk() should be used.
 			dnl #
 			AC_MSG_CHECKING([whether blk_cleanup_disk() exists])
 			ZFS_LINUX_TEST_RESULT([blk_cleanup_disk], [
 				AC_MSG_RESULT(yes)
 				AC_DEFINE([HAVE_BLK_CLEANUP_DISK], 1,
 				    [blk_cleanup_disk() exists])
 			], [
 				AC_MSG_RESULT(no)
 			])
 		], [
 			AC_MSG_RESULT(no)
 		])
 	],[
 		AC_MSG_RESULT(no)
 
 		dnl # Checked as part of the blk_alloc_queue_request_fn test
 		dnl #
 		dnl # Linux 5.7 API Change
 		dnl # blk_alloc_queue() expects request function.
 		dnl #
 		AC_MSG_CHECKING([whether blk_alloc_queue() expects request function])
 		ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn], [
 			AC_MSG_RESULT(yes)
 
 			dnl # This is currently always the case.
 			AC_MSG_CHECKING([whether make_request_fn() returns blk_qc_t])
 			AC_MSG_RESULT(yes)
 
 			AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN, 1,
 			    [blk_alloc_queue() expects request function])
 			AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
 			    [make_request_fn() return type])
 			AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
 			    [Noting that make_request_fn() returns blk_qc_t])
 		],[
 			dnl #
 			dnl # CentOS Stream 4.18.0-257 API Change
 			dnl # The Linux 5.7 blk_alloc_queue() change was back-
 			dnl # ported and the symbol renamed blk_alloc_queue_rh().
 			dnl # As of this kernel version they're not providing
 			dnl # any compatibility code in the kernel for this.
 			dnl #
 			ZFS_LINUX_TEST_RESULT([blk_alloc_queue_request_fn_rh], [
 				AC_MSG_RESULT(yes)
 
 				dnl # This is currently always the case.
 				AC_MSG_CHECKING([whether make_request_fn_rh() returns blk_qc_t])
 				AC_MSG_RESULT(yes)
 
 				AC_DEFINE(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH, 1,
 				    [blk_alloc_queue_rh() expects request function])
 				AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
 				    [make_request_fn() return type])
 				AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
 				    [Noting that make_request_fn() returns blk_qc_t])
 			],[
 				AC_MSG_RESULT(no)
 
 				dnl #
 				dnl # Linux 3.2 API Change
 				dnl # make_request_fn returns void.
 				dnl #
 				AC_MSG_CHECKING(
 				    [whether make_request_fn() returns void])
 				ZFS_LINUX_TEST_RESULT([make_request_fn_void], [
 					AC_MSG_RESULT(yes)
 					AC_DEFINE(MAKE_REQUEST_FN_RET, void,
 					    [make_request_fn() return type])
 					AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_VOID, 1,
 					    [Noting that make_request_fn() returns void])
 				],[
 					AC_MSG_RESULT(no)
 
 					dnl #
 					dnl # Linux 4.4 API Change
 					dnl # make_request_fn returns blk_qc_t.
 					dnl #
 					AC_MSG_CHECKING(
 					    [whether make_request_fn() returns blk_qc_t])
 					ZFS_LINUX_TEST_RESULT([make_request_fn_blk_qc_t], [
 						AC_MSG_RESULT(yes)
 						AC_DEFINE(MAKE_REQUEST_FN_RET, blk_qc_t,
 						    [make_request_fn() return type])
 						AC_DEFINE(HAVE_MAKE_REQUEST_FN_RET_QC, 1,
 						    [Noting that make_request_fn() ]
 						    [returns blk_qc_t])
 					],[
 						ZFS_LINUX_TEST_ERROR([make_request_fn])
 					])
 				])
 			])
 		])
 	])
 ])
diff --git a/sys/contrib/openzfs/config/kernel-mm-page-size.m4 b/sys/contrib/openzfs/config/kernel-mm-page-size.m4
deleted file mode 100644
index d5ebd926986a..000000000000
--- a/sys/contrib/openzfs/config/kernel-mm-page-size.m4
+++ /dev/null
@@ -1,17 +0,0 @@
-AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
-	ZFS_LINUX_TEST_SRC([page_size], [
-		#include <linux/mm.h>
-	],[
-		unsigned long s;
-		s = page_size(NULL);
-	])
-])
-AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
-	AC_MSG_CHECKING([whether page_size() is available])
-	ZFS_LINUX_TEST_RESULT([page_size], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
diff --git a/sys/contrib/openzfs/config/kernel-mm-pagemap.m4 b/sys/contrib/openzfs/config/kernel-mm-pagemap.m4
new file mode 100644
index 000000000000..466b6fa07d9a
--- /dev/null
+++ b/sys/contrib/openzfs/config/kernel-mm-pagemap.m4
@@ -0,0 +1,36 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE], [
+	ZFS_LINUX_TEST_SRC([page_size], [
+		#include <linux/mm.h>
+	],[
+		unsigned long s;
+		s = page_size(NULL);
+	])
+])
+AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_SIZE], [
+	AC_MSG_CHECKING([whether page_size() is available])
+	ZFS_LINUX_TEST_RESULT([page_size], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_MM_PAGE_SIZE, 1, [page_size() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
+
+
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING], [
+	ZFS_LINUX_TEST_SRC([page_mapping], [
+		#include <linux/pagemap.h>
+	],[
+		struct page *p = NULL;
+		struct address_space *m = page_mapping(NULL);
+	])
+])
+AC_DEFUN([ZFS_AC_KERNEL_MM_PAGE_MAPPING], [
+	AC_MSG_CHECKING([whether page_mapping() is available])
+	ZFS_LINUX_TEST_RESULT([page_mapping], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_MM_PAGE_MAPPING, 1, [page_mapping() is available])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
diff --git a/sys/contrib/openzfs/config/kernel-register_sysctl_table.m4 b/sys/contrib/openzfs/config/kernel-register_sysctl_table.m4
index a5e934f56d29..12ffe9d95142 100644
--- a/sys/contrib/openzfs/config/kernel-register_sysctl_table.m4
+++ b/sys/contrib/openzfs/config/kernel-register_sysctl_table.m4
@@ -1,27 +1,86 @@
 dnl #
 dnl # Linux 6.5 removes register_sysctl_table
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE], [
 	ZFS_LINUX_TEST_SRC([has_register_sysctl_table], [
 		#include <linux/sysctl.h>
 
 		static struct ctl_table dummy_table[] = {
 			{}
 		};
 
     ],[
 		struct ctl_table_header *h
 			__attribute((unused)) = register_sysctl_table(dummy_table);
     ])
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE], [
 	AC_MSG_CHECKING([whether register_sysctl_table exists])
 	ZFS_LINUX_TEST_RESULT([has_register_sysctl_table], [
 		AC_MSG_RESULT([yes])
 		AC_DEFINE(HAVE_REGISTER_SYSCTL_TABLE, 1,
 			[register_sysctl_table exists])
 	],[
 		AC_MSG_RESULT([no])
 	])
 ])
+
+dnl #
+dnl # Linux 6.11 register_sysctl() enforces that sysctl tables no longer
+dnl # supply a sentinel end-of-table element. 6.6 introduces
+dnl # register_sysctl_sz() to enable callers to choose, so we use it if
+dnl # available for backward compatibility.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ], [
+	ZFS_LINUX_TEST_SRC([has_register_sysctl_sz], [
+		#include <linux/sysctl.h>
+	],[
+		struct ctl_table test_table[] __attribute__((unused)) = {0};
+		register_sysctl_sz("", test_table, 0);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ], [
+	AC_MSG_CHECKING([whether register_sysctl_sz exists])
+	ZFS_LINUX_TEST_RESULT([has_register_sysctl_sz], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_REGISTER_SYSCTL_SZ, 1,
+			[register_sysctl_sz exists])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
+
+dnl #
+dnl # Linux 6.11 makes const the ctl_table arg of proc_handler
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST], [
+	ZFS_LINUX_TEST_SRC([has_proc_handler_ctl_table_const], [
+		#include <linux/sysctl.h>
+
+		static int test_handler(
+		    const struct ctl_table *ctl __attribute((unused)),
+		    int write __attribute((unused)),
+		    void *buffer __attribute((unused)),
+		    size_t *lenp __attribute((unused)),
+		    loff_t *ppos __attribute((unused)))
+		{
+			return (0);
+		}
+	], [
+		proc_handler *ph __attribute((unused)) =
+		    &test_handler;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST], [
+	AC_MSG_CHECKING([whether proc_handler ctl_table arg is const])
+	ZFS_LINUX_TEST_RESULT([has_proc_handler_ctl_table_const], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_PROC_HANDLER_CTL_TABLE_CONST, 1,
+		    [proc_handler ctl_table arg is const])
+	], [
+		AC_MSG_RESULT([no])
+	])
+])
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
index f282ccd8b9d7..4d471358d242 100644
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -1,1050 +1,1056 @@
 dnl #
 dnl # Default ZFS kernel configuration
 dnl #
 AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
 	AM_COND_IF([BUILD_LINUX], [
 		dnl # Setup the kernel build environment.
 		ZFS_AC_KERNEL
 		ZFS_AC_QAT
 
 		dnl # Sanity checks for module building and CONFIG_* defines
 		ZFS_AC_KERNEL_CONFIG_DEFINED
 		ZFS_AC_MODULE_SYMVERS
 
 		dnl # Sequential ZFS_LINUX_TRY_COMPILE tests
 		ZFS_AC_KERNEL_FPU_HEADER
 		ZFS_AC_KERNEL_OBJTOOL_HEADER
 		ZFS_AC_KERNEL_WAIT_QUEUE_ENTRY_T
 		ZFS_AC_KERNEL_MISC_MINOR
 		ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
 
 		dnl # Parallel ZFS_LINUX_TEST_SRC / ZFS_LINUX_TEST_RESULT tests
 		ZFS_AC_KERNEL_TEST_SRC
 		ZFS_AC_KERNEL_TEST_RESULT
 
 		AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
 			KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ"
 		])
 
 		AC_SUBST(KERNEL_MAKE)
 	])
 ])
 
 dnl #
 dnl # Generate and compile all of the kernel API test cases to determine
 dnl # which interfaces are available.  By invoking the kernel build system
 dnl # only once the compilation can be done in parallel significantly
 dnl # speeding up the process.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_TYPES
 	ZFS_AC_KERNEL_SRC_OBJTOOL
 	ZFS_AC_KERNEL_SRC_GLOBAL_PAGE_STATE
 	ZFS_AC_KERNEL_SRC_ACCESS_OK_TYPE
 	ZFS_AC_KERNEL_SRC_PDE_DATA
 	ZFS_AC_KERNEL_SRC_FALLOCATE
 	ZFS_AC_KERNEL_SRC_FADVISE
 	ZFS_AC_KERNEL_SRC_GENERIC_FADVISE
 	ZFS_AC_KERNEL_SRC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
 	ZFS_AC_KERNEL_SRC_RWSEM
 	ZFS_AC_KERNEL_SRC_SCHED
 	ZFS_AC_KERNEL_SRC_USLEEP_RANGE
 	ZFS_AC_KERNEL_SRC_KMEM_CACHE
 	ZFS_AC_KERNEL_SRC_KVMALLOC
 	ZFS_AC_KERNEL_SRC_VMALLOC_PAGE_KERNEL
 	ZFS_AC_KERNEL_SRC_WAIT
 	ZFS_AC_KERNEL_SRC_INODE_TIMES
 	ZFS_AC_KERNEL_SRC_INODE_LOCK
 	ZFS_AC_KERNEL_SRC_GROUP_INFO_GID
 	ZFS_AC_KERNEL_SRC_RW
 	ZFS_AC_KERNEL_SRC_TIMER_SETUP
 	ZFS_AC_KERNEL_SRC_SUPER_USER_NS
 	ZFS_AC_KERNEL_SRC_PROC_OPERATIONS
 	ZFS_AC_KERNEL_SRC_BLOCK_DEVICE_OPERATIONS
 	ZFS_AC_KERNEL_SRC_BIO
 	ZFS_AC_KERNEL_SRC_BLKDEV
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE
 	ZFS_AC_KERNEL_SRC_GENHD_FLAGS
 	ZFS_AC_KERNEL_SRC_REVALIDATE_DISK
 	ZFS_AC_KERNEL_SRC_GET_DISK_RO
 	ZFS_AC_KERNEL_SRC_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_SRC_DISCARD_GRANULARITY
 	ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_SRC_XATTR
 	ZFS_AC_KERNEL_SRC_ACL
 	ZFS_AC_KERNEL_SRC_INODE_SETATTR
 	ZFS_AC_KERNEL_SRC_INODE_GETATTR
 	ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION
 	ZFS_AC_KERNEL_SRC_SHOW_OPTIONS
 	ZFS_AC_KERNEL_SRC_FILE_INODE
 	ZFS_AC_KERNEL_SRC_FILE_DENTRY
 	ZFS_AC_KERNEL_SRC_FSYNC
 	ZFS_AC_KERNEL_SRC_AIO_FSYNC
 	ZFS_AC_KERNEL_SRC_EVICT_INODE
 	ZFS_AC_KERNEL_SRC_DIRTY_INODE
 	ZFS_AC_KERNEL_SRC_SHRINKER
 	ZFS_AC_KERNEL_SRC_MKDIR
 	ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS
 	ZFS_AC_KERNEL_SRC_CREATE
 	ZFS_AC_KERNEL_SRC_PERMISSION
 	ZFS_AC_KERNEL_SRC_GET_LINK
 	ZFS_AC_KERNEL_SRC_PUT_LINK
 	ZFS_AC_KERNEL_SRC_TMPFILE
 	ZFS_AC_KERNEL_SRC_AUTOMOUNT
 	ZFS_AC_KERNEL_SRC_ENCODE_FH_WITH_INODE
 	ZFS_AC_KERNEL_SRC_COMMIT_METADATA
 	ZFS_AC_KERNEL_SRC_CLEAR_INODE
 	ZFS_AC_KERNEL_SRC_SETATTR_PREPARE
 	ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED
 	ZFS_AC_KERNEL_SRC_DENTRY
 	ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U
 	ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SRC_SECURITY_INODE
 	ZFS_AC_KERNEL_SRC_FST_MOUNT
 	ZFS_AC_KERNEL_SRC_BDI
 	ZFS_AC_KERNEL_SRC_SET_NLINK
 	ZFS_AC_KERNEL_SRC_SGET
 	ZFS_AC_KERNEL_SRC_LSEEK_EXECUTE
 	ZFS_AC_KERNEL_SRC_VFS_FILEMAP_DIRTY_FOLIO
 	ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_SRC_VFS_GETATTR
 	ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_SRC_VFS_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO
 	ZFS_AC_KERNEL_SRC_VFS_READPAGES
 	ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_SRC_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_SRC_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_SRC_VFS_IOV_ITER
 	ZFS_AC_KERNEL_SRC_VFS_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_GENERIC_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_SPLICE_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_REMAP_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_CLONE_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_DEDUPE_FILE_RANGE
 	ZFS_AC_KERNEL_SRC_VFS_FILE_OPERATIONS_EXTEND
 	ZFS_AC_KERNEL_SRC_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_SRC_KMAP_LOCAL_PAGE
 	ZFS_AC_KERNEL_SRC_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_SRC_MAKE_REQUEST_FN
 	ZFS_AC_KERNEL_SRC_GENERIC_IO_ACCT
 	ZFS_AC_KERNEL_SRC_FPU
 	ZFS_AC_KERNEL_SRC_FMODE_T
 	ZFS_AC_KERNEL_SRC_KUIDGID_T
 	ZFS_AC_KERNEL_SRC_KUID_HELPERS
 	ZFS_AC_KERNEL_SRC_RENAME
 	ZFS_AC_KERNEL_SRC_CURRENT_TIME
 	ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL
 	ZFS_AC_KERNEL_SRC_KTIME
 	ZFS_AC_KERNEL_SRC_TOTALRAM_PAGES_FUNC
 	ZFS_AC_KERNEL_SRC_TOTALHIGH_PAGES
 	ZFS_AC_KERNEL_SRC_KSTRTOUL
 	ZFS_AC_KERNEL_SRC_PERCPU
 	ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
 	ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR
 	ZFS_AC_KERNEL_SRC_MKNOD
 	ZFS_AC_KERNEL_SRC_SYMLINK
 	ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
 	ZFS_AC_KERNEL_SRC_SIGNAL_STOP
 	ZFS_AC_KERNEL_SRC_SIGINFO
 	ZFS_AC_KERNEL_SRC_SYSFS
 	ZFS_AC_KERNEL_SRC_SET_SPECIAL_STATE
 	ZFS_AC_KERNEL_SRC_STANDALONE_LINUX_STDARG
 	ZFS_AC_KERNEL_SRC_STRLCPY
 	ZFS_AC_KERNEL_SRC_STRSCPY
 	ZFS_AC_KERNEL_SRC_PAGEMAP_FOLIO_WAIT_BIT
 	ZFS_AC_KERNEL_SRC_ADD_DISK
 	ZFS_AC_KERNEL_SRC_KTHREAD
 	ZFS_AC_KERNEL_SRC_ZERO_PAGE
 	ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
 	ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
 	ZFS_AC_KERNEL_SRC_IDMAP_MNT_API
 	ZFS_AC_KERNEL_SRC_IDMAP_NO_USERNS
 	ZFS_AC_KERNEL_SRC_IATTR_VFSID
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
 	ZFS_AC_KERNEL_SRC_RECLAIMED
 	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ
+	ZFS_AC_KERNEL_SRC_PROC_HANDLER_CTL_TABLE_CONST
 	ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
 	ZFS_AC_KERNEL_SRC_SYNC_BDEV
 	ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
+	ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
 			ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
 			;;
 		riscv*)
 			ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
 			;;
 	esac
 
 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
 	AC_MSG_RESULT([done])
 ])
 
 dnl #
 dnl # Check results of kernel interface tests.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_TYPES
 	ZFS_AC_KERNEL_ACCESS_OK_TYPE
 	ZFS_AC_KERNEL_GLOBAL_PAGE_STATE
 	ZFS_AC_KERNEL_OBJTOOL
 	ZFS_AC_KERNEL_PDE_DATA
 	ZFS_AC_KERNEL_FALLOCATE
 	ZFS_AC_KERNEL_FADVISE
 	ZFS_AC_KERNEL_GENERIC_FADVISE
 	ZFS_AC_KERNEL_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
 	ZFS_AC_KERNEL_RWSEM
 	ZFS_AC_KERNEL_SCHED
 	ZFS_AC_KERNEL_USLEEP_RANGE
 	ZFS_AC_KERNEL_KMEM_CACHE
 	ZFS_AC_KERNEL_KVMALLOC
 	ZFS_AC_KERNEL_VMALLOC_PAGE_KERNEL
 	ZFS_AC_KERNEL_WAIT
 	ZFS_AC_KERNEL_INODE_TIMES
 	ZFS_AC_KERNEL_INODE_LOCK
 	ZFS_AC_KERNEL_GROUP_INFO_GID
 	ZFS_AC_KERNEL_RW
 	ZFS_AC_KERNEL_TIMER_SETUP
 	ZFS_AC_KERNEL_SUPER_USER_NS
 	ZFS_AC_KERNEL_PROC_OPERATIONS
 	ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS
 	ZFS_AC_KERNEL_BIO
 	ZFS_AC_KERNEL_BLKDEV
 	ZFS_AC_KERNEL_BLK_QUEUE
 	ZFS_AC_KERNEL_GENHD_FLAGS
 	ZFS_AC_KERNEL_REVALIDATE_DISK
 	ZFS_AC_KERNEL_GET_DISK_RO
 	ZFS_AC_KERNEL_GENERIC_READLINK_GLOBAL
 	ZFS_AC_KERNEL_DISCARD_GRANULARITY
 	ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE
 	ZFS_AC_KERNEL_XATTR
 	ZFS_AC_KERNEL_ACL
 	ZFS_AC_KERNEL_INODE_SETATTR
 	ZFS_AC_KERNEL_INODE_GETATTR
 	ZFS_AC_KERNEL_INODE_SET_FLAGS
 	ZFS_AC_KERNEL_INODE_SET_IVERSION
 	ZFS_AC_KERNEL_SHOW_OPTIONS
 	ZFS_AC_KERNEL_FILE_INODE
 	ZFS_AC_KERNEL_FILE_DENTRY
 	ZFS_AC_KERNEL_FSYNC
 	ZFS_AC_KERNEL_AIO_FSYNC
 	ZFS_AC_KERNEL_EVICT_INODE
 	ZFS_AC_KERNEL_DIRTY_INODE
 	ZFS_AC_KERNEL_SHRINKER
 	ZFS_AC_KERNEL_MKDIR
 	ZFS_AC_KERNEL_LOOKUP_FLAGS
 	ZFS_AC_KERNEL_CREATE
 	ZFS_AC_KERNEL_PERMISSION
 	ZFS_AC_KERNEL_GET_LINK
 	ZFS_AC_KERNEL_PUT_LINK
 	ZFS_AC_KERNEL_TMPFILE
 	ZFS_AC_KERNEL_AUTOMOUNT
 	ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE
 	ZFS_AC_KERNEL_COMMIT_METADATA
 	ZFS_AC_KERNEL_CLEAR_INODE
 	ZFS_AC_KERNEL_SETATTR_PREPARE
 	ZFS_AC_KERNEL_INSERT_INODE_LOCKED
 	ZFS_AC_KERNEL_DENTRY
 	ZFS_AC_KERNEL_DENTRY_ALIAS_D_U
 	ZFS_AC_KERNEL_TRUNCATE_SETSIZE
 	ZFS_AC_KERNEL_SECURITY_INODE
 	ZFS_AC_KERNEL_FST_MOUNT
 	ZFS_AC_KERNEL_BDI
 	ZFS_AC_KERNEL_SET_NLINK
 	ZFS_AC_KERNEL_SGET
 	ZFS_AC_KERNEL_LSEEK_EXECUTE
 	ZFS_AC_KERNEL_VFS_FILEMAP_DIRTY_FOLIO
 	ZFS_AC_KERNEL_VFS_READ_FOLIO
 	ZFS_AC_KERNEL_VFS_GETATTR
 	ZFS_AC_KERNEL_VFS_FSYNC_2ARGS
 	ZFS_AC_KERNEL_VFS_ITERATE
 	ZFS_AC_KERNEL_VFS_DIRECT_IO
 	ZFS_AC_KERNEL_VFS_READPAGES
 	ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS
 	ZFS_AC_KERNEL_VFS_RW_ITERATE
 	ZFS_AC_KERNEL_VFS_GENERIC_WRITE_CHECKS
 	ZFS_AC_KERNEL_VFS_IOV_ITER
 	ZFS_AC_KERNEL_VFS_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_GENERIC_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_SPLICE_COPY_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_REMAP_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_CLONE_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_DEDUPE_FILE_RANGE
 	ZFS_AC_KERNEL_VFS_FILE_OPERATIONS_EXTEND
 	ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
 	ZFS_AC_KERNEL_KMAP_LOCAL_PAGE
 	ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
 	ZFS_AC_KERNEL_MAKE_REQUEST_FN
 	ZFS_AC_KERNEL_GENERIC_IO_ACCT
 	ZFS_AC_KERNEL_FPU
 	ZFS_AC_KERNEL_FMODE_T
 	ZFS_AC_KERNEL_KUIDGID_T
 	ZFS_AC_KERNEL_KUID_HELPERS
 	ZFS_AC_KERNEL_RENAME
 	ZFS_AC_KERNEL_CURRENT_TIME
 	ZFS_AC_KERNEL_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_IN_COMPAT_SYSCALL
 	ZFS_AC_KERNEL_KTIME
 	ZFS_AC_KERNEL_TOTALRAM_PAGES_FUNC
 	ZFS_AC_KERNEL_TOTALHIGH_PAGES
 	ZFS_AC_KERNEL_KSTRTOUL
 	ZFS_AC_KERNEL_PERCPU
 	ZFS_AC_KERNEL_CPU_HOTPLUG
 	ZFS_AC_KERNEL_GENERIC_FILLATTR
 	ZFS_AC_KERNEL_MKNOD
 	ZFS_AC_KERNEL_SYMLINK
 	ZFS_AC_KERNEL_BIO_MAX_SEGS
 	ZFS_AC_KERNEL_SIGNAL_STOP
 	ZFS_AC_KERNEL_SIGINFO
 	ZFS_AC_KERNEL_SYSFS
 	ZFS_AC_KERNEL_SET_SPECIAL_STATE
 	ZFS_AC_KERNEL_STANDALONE_LINUX_STDARG
 	ZFS_AC_KERNEL_STRLCPY
 	ZFS_AC_KERNEL_STRSCPY
 	ZFS_AC_KERNEL_PAGEMAP_FOLIO_WAIT_BIT
 	ZFS_AC_KERNEL_ADD_DISK
 	ZFS_AC_KERNEL_KTHREAD
 	ZFS_AC_KERNEL_ZERO_PAGE
 	ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
 	ZFS_AC_KERNEL_USER_NS_COMMON_INUM
 	ZFS_AC_KERNEL_IDMAP_MNT_API
 	ZFS_AC_KERNEL_IDMAP_NO_USERNS
 	ZFS_AC_KERNEL_IATTR_VFSID
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
 	ZFS_AC_KERNEL_RECLAIMED
 	ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
+	ZFS_AC_KERNEL_REGISTER_SYSCTL_SZ
+	ZFS_AC_KERNEL_PROC_HANDLER_CTL_TABLE_CONST
 	ZFS_AC_KERNEL_COPY_SPLICE_READ
 	ZFS_AC_KERNEL_SYNC_BDEV
 	ZFS_AC_KERNEL_MM_PAGE_SIZE
+	ZFS_AC_KERNEL_MM_PAGE_MAPPING
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
 			ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
 			;;
 		riscv*)
 			ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
 			;;
 	esac
 ])
 
 dnl #
 dnl # Detect name used for Module.symvers file in kernel
 dnl #
 AC_DEFUN([ZFS_AC_MODULE_SYMVERS], [
 	modpost=$LINUX/scripts/Makefile.modpost
 	AC_MSG_CHECKING([kernel file name for module symbols])
 	AS_IF([test "x$enable_linux_builtin" != xyes -a -f "$modpost"], [
 		AS_IF([grep -q Modules.symvers $modpost], [
 			LINUX_SYMBOLS=Modules.symvers
 		], [
 			LINUX_SYMBOLS=Module.symvers
 		])
 
 		AS_IF([test ! -f "$LINUX_OBJ/$LINUX_SYMBOLS"], [
 			AC_MSG_ERROR([
 	*** Please make sure the kernel devel package for your distribution
 	*** is installed.  If you are building with a custom kernel, make sure
 	*** the kernel is configured, built, and the '--with-linux=PATH'
 	*** configure option refers to the location of the kernel source.
 			])
 		])
 	], [
 		LINUX_SYMBOLS=NONE
 	])
 	AC_MSG_RESULT($LINUX_SYMBOLS)
 	AC_SUBST(LINUX_SYMBOLS)
 ])
 
 dnl #
 dnl # Detect the kernel to be built against
 dnl #
 dnl # Most modern Linux distributions have separate locations for bare
 dnl # source (source) and prebuilt (build) files. Additionally, there are
 dnl # `source` and `build` symlinks in `/lib/modules/$(KERNEL_VERSION)`
 dnl # pointing to them. The directory search order is now:
 dnl # 
 dnl # - `configure` command line values if both `--with-linux` and
 dnl #   `--with-linux-obj` were defined
 dnl # 
 dnl # - If only `--with-linux` was defined, `--with-linux-obj` is assumed
 dnl #   to have the same value as `--with-linux`
 dnl # 
 dnl # - If neither `--with-linux` nor `--with-linux-obj` were defined
 dnl #   autodetection is used:
 dnl # 
 dnl #   - `/lib/modules/$(uname -r)/{source,build}` respectively, if exist.
 dnl # 
 dnl #   - If only `/lib/modules/$(uname -r)/build` exists, it is assumed
 dnl #     to be both source and build directory.
 dnl # 
 dnl #   - The first directory in `/lib/modules` with the highest version
 dnl #     number according to `sort -V` which contains both `source` and
 dnl #     `build` symlinks/directories. If module directory contains only
 dnl #     `build` component, it is assumed to be both source and build
 dnl #     directory.
 dnl # 
 dnl #   - Last resort: the first directory matching `/usr/src/kernels/*`
 dnl #     and `/usr/src/linux-*` with the highest version number according
 dnl #     to `sort -V` is assumed to be both source and build directory.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL], [
 	AC_ARG_WITH([linux],
 		AS_HELP_STRING([--with-linux=PATH],
 		[Path to kernel source]),
 		[kernelsrc="$withval"])
 
 	AC_ARG_WITH(linux-obj,
 		AS_HELP_STRING([--with-linux-obj=PATH],
 		[Path to kernel build objects]),
 		[kernelbuild="$withval"])
 
 	AC_MSG_CHECKING([kernel source and build directories])
 	AS_IF([test -n "$kernelsrc" && test -z "$kernelbuild"], [
 		kernelbuild="$kernelsrc"
 	], [test -z "$kernelsrc"], [
 		AS_IF([test -e "/lib/modules/$(uname -r)/source" && \
 		       test -e "/lib/modules/$(uname -r)/build"], [
 			src="/lib/modules/$(uname -r)/source"
 			build="/lib/modules/$(uname -r)/build"
 		], [test -e "/lib/modules/$(uname -r)/build"], [
 			build="/lib/modules/$(uname -r)/build"
 			src="$build"
 		], [
 			src=
 
 			for d in $(ls -1d /lib/modules/* 2>/dev/null | sort -Vr); do
 				if test -e "$d/source" && test -e "$d/build"; then
 					src="$d/source"
 					build="$d/build"
 					break
 				fi
 
 				if test -e "$d/build"; then
 					src="$d/build"
 					build="$d/build"
 					break
 				fi
 			done
 
 			# the least reliable method
 			if test -z "$src"; then
 				src=$(ls -1d /usr/src/kernels/* /usr/src/linux-* \
 				      2>/dev/null | grep -v obj | sort -Vr | head -1)
 				build="$src"
 			fi
 		])
 
 		AS_IF([test -n "$src" && test -e "$src"], [
 			kernelsrc=$(readlink -e "$src")
 		], [
 			kernelsrc="[Not found]"
 		])
 		AS_IF([test -n "$build" && test -e "$build"], [
 			kernelbuild=$(readlink -e "$build")
 		], [
 			kernelbuild="[Not found]"
 		])
 	], [
 		AS_IF([test "$kernelsrc" = "NONE"], [
 			kernsrcver=NONE
 		])
 		withlinux=yes
 	])
 
 	AC_MSG_RESULT([done])
 	AC_MSG_CHECKING([kernel source directory])
 	AC_MSG_RESULT([$kernelsrc])
 	AC_MSG_CHECKING([kernel build directory])
 	AC_MSG_RESULT([$kernelbuild])
 	AS_IF([test ! -d "$kernelsrc" || test ! -d "$kernelbuild"], [
 		AC_MSG_ERROR([
 	*** Please make sure the kernel devel package for your distribution
 	*** is installed and then try again.  If that fails, you can specify the
 	*** location of the kernel source and build with the '--with-linux=PATH' and
 	*** '--with-linux-obj=PATH' options respectively.])
 	])
 
 	AC_MSG_CHECKING([kernel source version])
 	utsrelease1=$kernelbuild/include/linux/version.h
 	utsrelease2=$kernelbuild/include/linux/utsrelease.h
 	utsrelease3=$kernelbuild/include/generated/utsrelease.h
 	AS_IF([test -r $utsrelease1 && grep -qF UTS_RELEASE $utsrelease1], [
 		utsrelease=$utsrelease1
 	], [test -r $utsrelease2 && grep -qF UTS_RELEASE $utsrelease2], [
 		utsrelease=$utsrelease2
 	], [test -r $utsrelease3 && grep -qF UTS_RELEASE $utsrelease3], [
 		utsrelease=$utsrelease3
 	])
 
 	AS_IF([test -n "$utsrelease"], [
 		kernsrcver=$($AWK '/UTS_RELEASE/ { gsub(/"/, "", $[3]); print $[3] }' $utsrelease)
 		AS_IF([test -z "$kernsrcver"], [
 			AC_MSG_RESULT([Not found])
 			AC_MSG_ERROR([
 	*** Cannot determine kernel version.
 			])
 		])
 	], [
 		AC_MSG_RESULT([Not found])
 		if test "x$enable_linux_builtin" != xyes; then
 			AC_MSG_ERROR([
 	*** Cannot find UTS_RELEASE definition.
 			])
 		else
 			AC_MSG_ERROR([
 	*** Cannot find UTS_RELEASE definition.
 	*** Please run 'make prepare' inside the kernel source tree.])
 		fi
 	])
 
 	AC_MSG_RESULT([$kernsrcver])
 
 	AS_VERSION_COMPARE([$kernsrcver], [$ZFS_META_KVER_MIN], [
 		 AC_MSG_ERROR([
 	*** Cannot build against kernel version $kernsrcver.
 	*** The minimum supported kernel version is $ZFS_META_KVER_MIN.
 		])
 	])
 
 	LINUX=${kernelsrc}
 	LINUX_OBJ=${kernelbuild}
 	LINUX_VERSION=${kernsrcver}
 
 	AC_SUBST(LINUX)
 	AC_SUBST(LINUX_OBJ)
 	AC_SUBST(LINUX_VERSION)
 ])
 
 dnl #
 dnl # Detect the QAT module to be built against, QAT provides hardware
 dnl # acceleration for data compression:
 dnl #
 dnl # https://01.org/intel-quickassist-technology
 dnl #
 dnl # 1) Download and install QAT driver from the above link
 dnl # 2) Start QAT driver in your system:
 dnl # 	 service qat_service start
 dnl # 3) Enable QAT in ZFS, e.g.:
 dnl # 	 ./configure --with-qat=<qat-driver-path>/QAT1.6
 dnl # 	 make
 dnl # 4) Set GZIP compression in ZFS dataset:
 dnl # 	 zfs set compression = gzip <dataset>
 dnl #
 dnl # Then the data written to this ZFS pool is compressed by QAT accelerator
 dnl # automatically, and de-compressed by QAT when read from the pool.
 dnl #
 dnl # 1) Get QAT hardware statistics with:
 dnl #	 cat /proc/icp_dh895xcc_dev/qat
 dnl # 2) To disable QAT:
 dnl # 	 insmod zfs.ko zfs_qat_disable=1
 dnl #
 AC_DEFUN([ZFS_AC_QAT], [
 	AC_ARG_WITH([qat],
 		AS_HELP_STRING([--with-qat=PATH],
 		[Path to qat source]),
 		AS_IF([test "$withval" = "yes"],
 			AC_MSG_ERROR([--with-qat=PATH requires a PATH]),
 			[qatsrc="$withval"]))
 
 	AC_ARG_WITH([qat-obj],
 		AS_HELP_STRING([--with-qat-obj=PATH],
 		[Path to qat build objects]),
 		[qatbuild="$withval"])
 
 	AS_IF([test ! -z "${qatsrc}"], [
 		AC_MSG_CHECKING([qat source directory])
 		AC_MSG_RESULT([$qatsrc])
 		QAT_SRC="${qatsrc}/quickassist"
 		AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [
 			AC_MSG_ERROR([
 	*** Please make sure the qat driver package is installed
 	*** and specify the location of the qat source with the
 	*** '--with-qat=PATH' option then try again. Failed to
 	*** find cpa.h in:
 	${QAT_SRC}/include])
 		])
 	])
 
 	AS_IF([test ! -z "${qatsrc}"], [
 		AC_MSG_CHECKING([qat build directory])
 		AS_IF([test -z "$qatbuild"], [
 			qatbuild="${qatsrc}/build"
 		])
 
 		AC_MSG_RESULT([$qatbuild])
 		QAT_OBJ=${qatbuild}
 		AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko" && ! test -e "$QAT_OBJ/qat_api.ko"], [
 			AC_MSG_ERROR([
 	*** Please make sure the qat driver is installed then try again.
 	*** Failed to find icp_qa_al.ko or qat_api.ko in:
 	$QAT_OBJ])
 		])
 
 		AC_SUBST(QAT_SRC)
 		AC_SUBST(QAT_OBJ)
 
 		AC_DEFINE(HAVE_QAT, 1,
 		[qat is enabled and existed])
 	])
 
 	dnl #
 	dnl # Detect the name used for the QAT Module.symvers file.
 	dnl #
 	AS_IF([test ! -z "${qatsrc}"], [
 		AC_MSG_CHECKING([qat file for module symbols])
 		QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers
 
 		AS_IF([test -r $QAT_SYMBOLS], [
 			AC_MSG_RESULT([$QAT_SYMBOLS])
 			AC_SUBST(QAT_SYMBOLS)
 		],[
 			AC_MSG_ERROR([
 	*** Please make sure the qat driver is installed then try again.
 	*** Failed to find Module.symvers in:
 	$QAT_SYMBOLS
 			])
 		])
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_CONFTEST_H
 dnl #
 AC_DEFUN([ZFS_LINUX_CONFTEST_H], [
 test -d build/$2 || mkdir -p build/$2
 cat - <<_ACEOF >build/$2/$2.h
 $1
 _ACEOF
 ])
 
 dnl #
 dnl # ZFS_LINUX_CONFTEST_C
 dnl #
 AC_DEFUN([ZFS_LINUX_CONFTEST_C], [
 test -d build/$2 || mkdir -p build/$2
 cat confdefs.h - <<_ACEOF >build/$2/$2.c
 $1
 _ACEOF
 ])
 
 dnl #
 dnl # ZFS_LINUX_CONFTEST_MAKEFILE
 dnl #
 dnl # $1 - test case name
 dnl # $2 - add to top-level Makefile
 dnl # $3 - additional build flags
 dnl #
 AC_DEFUN([ZFS_LINUX_CONFTEST_MAKEFILE], [
 	test -d build || mkdir -p build
 	test -d build/$1 || mkdir -p build/$1
 
 	file=build/$1/Makefile
 
 	dnl # Example command line to manually build source.
 	cat - <<_ACEOF >$file
 # Example command line to manually build source
 # make modules -C $LINUX_OBJ $ARCH_UM M=$PWD/build/$1
 
 ccflags-y := -Werror $FRAME_LARGER_THAN
 _ACEOF
 
 	dnl # Additional custom CFLAGS as requested.
 	m4_ifval($3, [echo "ccflags-y += $3" >>$file], [])
 
 	dnl # Test case source
 	echo "obj-m := $1.o" >>$file
 
 	AS_IF([test "x$2" = "xyes"], [echo "obj-m += $1/" >>build/Makefile], [])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_PROGRAM(C)([PROLOGUE], [BODY])
 dnl #
 m4_define([ZFS_LINUX_TEST_PROGRAM], [
 #include <linux/module.h>
 $1
 
 int
 main (void)
 {
 $2
 	;
 	return 0;
 }
 
 MODULE_DESCRIPTION("conftest");
 MODULE_AUTHOR(ZFS_META_AUTHOR);
 MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE);
 MODULE_LICENSE($3);
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_REMOVE
 dnl #
 dnl # Removes the specified test source and results.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_REMOVE], [
 	test -d build/$1 && rm -Rf build/$1
 	test -f build/Makefile && sed '/$1/d' build/Makefile
 ])
 
 dnl #
 dnl # ZFS_LINUX_COMPILE
 dnl #
 dnl # $1 - build dir
 dnl # $2 - test command
 dnl # $3 - pass command
 dnl # $4 - fail command
 dnl # $5 - set KBUILD_MODPOST_NOFINAL='yes'
 dnl # $6 - set KBUILD_MODPOST_WARN='yes'
 dnl #
 dnl # Used internally by ZFS_LINUX_TEST_{COMPILE,MODPOST}
 dnl #
 AC_DEFUN([ZFS_LINUX_COMPILE], [
 	AC_ARG_VAR([KERNEL_CC], [C compiler for
 		building kernel modules])
 	AC_ARG_VAR([KERNEL_LD], [Linker for
 		building kernel modules])
 	AC_ARG_VAR([KERNEL_LLVM], [Binary option to
 		build kernel modules with LLVM/CLANG toolchain])
 	AC_TRY_COMMAND([
 	    KBUILD_MODPOST_NOFINAL="$5" KBUILD_MODPOST_WARN="$6"
 	    make modules -k -j$TEST_JOBS ${KERNEL_CC:+CC=$KERNEL_CC}
 	    ${KERNEL_LD:+LD=$KERNEL_LD} ${KERNEL_LLVM:+LLVM=$KERNEL_LLVM}
 	    CONFIG_MODULES=y CFLAGS_MODULE=-DCONFIG_MODULES
 	    -C $LINUX_OBJ $ARCH_UM M=$PWD/$1 >$1/build.log 2>&1])
 	AS_IF([AC_TRY_COMMAND([$2])], [$3], [$4])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_COMPILE
 dnl #
 dnl # Perform a full compile excluding the final modpost phase.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_COMPILE], [
 	ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [
 		mv $2/Makefile $2/Makefile.compile.$1
 		mv $2/build.log $2/build.log.$1
 	],[
 	        AC_MSG_ERROR([
         *** Unable to compile test source to determine kernel interfaces.])
 	], [yes], [])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_MODPOST
 dnl #
 dnl # Perform a full compile including the modpost phase.  This may
 dnl # be an incremental build if the objects have already been built.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_MODPOST], [
 	ZFS_LINUX_COMPILE([$2], [test -f $2/build.log], [
 		mv $2/Makefile $2/Makefile.modpost.$1
 		cat $2/build.log >>build/build.log.$1
 	],[
 	        AC_MSG_ERROR([
         *** Unable to modpost test source to determine kernel interfaces.])
 	], [], [yes])
 ])
 
 dnl #
 dnl # Perform the compilation of the test cases in two phases.
 dnl #
 dnl # Phase 1) attempt to build the object files for all of the tests
 dnl #          defined by the ZFS_LINUX_TEST_SRC macro.  But do not
 dnl #          perform the final modpost stage.
 dnl #
 dnl # Phase 2) disable all tests which failed the initial compilation,
 dnl #          then invoke the final modpost step for the remaining tests.
 dnl #
 dnl # This allows us efficiently build the test cases in parallel while
 dnl # remaining resilient to build failures which are expected when
 dnl # detecting the available kernel interfaces.
 dnl #
 dnl # The maximum allowed parallelism can be controlled by setting the
 dnl # TEST_JOBS environment variable.  Otherwise, it default to $(nproc).
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_COMPILE_ALL], [
 	dnl # Phase 1 - Compilation only, final linking is skipped.
 	ZFS_LINUX_TEST_COMPILE([$1], [build])
 
 	dnl #
 	dnl # Phase 2 - When building external modules disable test cases
 	dnl # which failed to compile and invoke modpost to verify the
 	dnl # final linking.
 	dnl #
 	dnl # Test names suffixed with '_license' call modpost independently
 	dnl # to ensure that a single incompatibility does not result in the
 	dnl # modpost phase exiting early.  This check is not performed on
 	dnl # every symbol since the majority are compatible and doing so
 	dnl # would significantly slow down this phase.
 	dnl #
 	dnl # When configuring for builtin (--enable-linux-builtin)
 	dnl # fake the linking step artificially create the expected .ko
 	dnl # files for tests which did compile.  This is required for
 	dnl # kernels which do not have loadable module support or have
 	dnl # not yet been built.
 	dnl #
 	AS_IF([test "x$enable_linux_builtin" = "xno"], [
 		for dir in $(awk '/^obj-m/ { print [$]3 }' \
 		    build/Makefile.compile.$1); do
 			name=${dir%/}
 			AS_IF([test -f build/$name/$name.o], [
 				AS_IF([test "${name##*_}" = "license"], [
 					ZFS_LINUX_TEST_MODPOST([$1],
 					    [build/$name])
 					echo "obj-n += $dir" >>build/Makefile
 				], [
 					echo "obj-m += $dir" >>build/Makefile
 				])
 			], [
 				echo "obj-n += $dir" >>build/Makefile
 			])
 		done
 
 		ZFS_LINUX_TEST_MODPOST([$1], [build])
 	], [
 		for dir in $(awk '/^obj-m/ { print [$]3 }' \
 		    build/Makefile.compile.$1); do
 			name=${dir%/}
 			AS_IF([test -f build/$name/$name.o], [
 				touch build/$name/$name.ko
 			])
 		done
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_SRC
 dnl #
 dnl # $1 - name
 dnl # $2 - global
 dnl # $3 - source
 dnl # $4 - extra cflags
 dnl # $5 - check license-compatibility
 dnl #
 dnl # Check if the test source is buildable at all and then if it is
 dnl # license compatible.
 dnl #
 dnl # N.B because all of the test cases are compiled in parallel they
 dnl # must never depend on the results of previous tests.  Each test
 dnl # needs to be entirely independent.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_SRC], [
 	ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM([[$2]], [[$3]],
 	    [["Dual BSD/GPL"]])], [$1])
 	ZFS_LINUX_CONFTEST_MAKEFILE([$1], [yes], [$4])
 
 	AS_IF([ test -n "$5" ], [
 		ZFS_LINUX_CONFTEST_C([ZFS_LINUX_TEST_PROGRAM(
 		    [[$2]], [[$3]], [[$5]])], [$1_license])
 		ZFS_LINUX_CONFTEST_MAKEFILE([$1_license], [yes], [$4])
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_RESULT
 dnl #
 dnl # $1 - name of a test source (ZFS_LINUX_TEST_SRC)
 dnl # $2 - run on success (valid .ko generated)
 dnl # $3 - run on failure (unable to compile)
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_RESULT], [
 	AS_IF([test -d build/$1], [
 		AS_IF([test -f build/$1/$1.ko], [$2], [$3])
 	], [
 		AC_MSG_ERROR([
 	*** No matching source for the "$1" test, check that
 	*** both the test source and result macros refer to the same name.
 		])
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_ERROR
 dnl #
 dnl # Generic error message which can be used when none of the expected
 dnl # kernel interfaces were detected.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_ERROR], [
 	AC_MSG_ERROR([
 	*** None of the expected "$1" interfaces were detected.
 	*** This may be because your kernel version is newer than what is
 	*** supported, or you are using a patched custom kernel with
 	*** incompatible modifications.
 	***
 	*** ZFS Version: $ZFS_META_ALIAS
 	*** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TEST_RESULT_SYMBOL
 dnl #
 dnl # Like ZFS_LINUX_TEST_RESULT except ZFS_CHECK_SYMBOL_EXPORT is called to
 dnl # verify symbol exports, unless --enable-linux-builtin was provided to
 dnl # configure.
 dnl #
 AC_DEFUN([ZFS_LINUX_TEST_RESULT_SYMBOL], [
 	AS_IF([ ! test -f build/$1/$1.ko], [
 		$5
 	], [
 		AS_IF([test "x$enable_linux_builtin" != "xyes"], [
 			ZFS_CHECK_SYMBOL_EXPORT([$2], [$3], [$4], [$5])
 		], [
 			$4
 		])
 	])
 ])
 
 dnl #
 dnl # ZFS_LINUX_COMPILE_IFELSE
 dnl #
 AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [
 	ZFS_LINUX_TEST_REMOVE([conftest])
 
 	m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1], [conftest])])
 	m4_ifvaln([$5], [ZFS_LINUX_CONFTEST_H([$5], [conftest])],
 	    [ZFS_LINUX_CONFTEST_H([], [conftest])])
 
 	ZFS_LINUX_CONFTEST_MAKEFILE([conftest], [no],
 	    [m4_ifvaln([$5], [-I$PWD/build/conftest], [])])
 	ZFS_LINUX_COMPILE([build/conftest], [$2], [$3], [$4], [], [])
 ])
 
 dnl #
 dnl # ZFS_LINUX_TRY_COMPILE
 dnl #
 dnl # $1 - global
 dnl # $2 - source
 dnl # $3 - run on success (valid .ko generated)
 dnl # $4 - run on failure (unable to compile)
 dnl #
 dnl # When configuring as builtin (--enable-linux-builtin) for kernels
 dnl # without loadable module support (CONFIG_MODULES=n) only the object
 dnl # file is created.  See ZFS_LINUX_TEST_COMPILE_ALL for details.
 dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE], [
 	AS_IF([test "x$enable_linux_builtin" = "xyes"], [
 		ZFS_LINUX_COMPILE_IFELSE(
 		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.o], [$3], [$4])
 	], [
 		ZFS_LINUX_COMPILE_IFELSE(
 		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.ko], [$3], [$4])
 	])
 ])
 
 dnl #
 dnl # ZFS_CHECK_SYMBOL_EXPORT
 dnl #
 dnl # Check if a symbol is exported on not by consulting the symbols
 dnl # file, or optionally the source code.
 dnl #
 AC_DEFUN([ZFS_CHECK_SYMBOL_EXPORT], [
 	grep -q -E '[[[:space:]]]$1[[[:space:]]]' \
 		$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
 	rc=$?
 	if test $rc -ne 0; then
 		export=0
 		for file in $2; do
 			grep -q -E "EXPORT_SYMBOL.*($1)" \
 				"$LINUX/$file" 2>/dev/null
 			rc=$?
 			if test $rc -eq 0; then
 				export=1
 				break;
 			fi
 		done
 		if test $export -eq 0; then :
 			$4
 		else :
 			$3
 		fi
 	else :
 		$3
 	fi
 ])
 
 dnl #
 dnl # ZFS_LINUX_TRY_COMPILE_SYMBOL
 dnl #
 dnl # Like ZFS_LINUX_TRY_COMPILER except ZFS_CHECK_SYMBOL_EXPORT is called
 dnl # to verify symbol exports, unless --enable-linux-builtin was provided
 dnl # to configure.
 dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [
 	ZFS_LINUX_TRY_COMPILE([$1], [$2], [rc=0], [rc=1])
 	if test $rc -ne 0; then :
 		$6
 	else
 		if test "x$enable_linux_builtin" != xyes; then
 			ZFS_CHECK_SYMBOL_EXPORT([$3], [$4], [rc=0], [rc=1])
 		fi
 		if test $rc -ne 0; then :
 			$6
 		else :
 			$5
 		fi
 	fi
 ])
 
 dnl #
 dnl # ZFS_LINUX_TRY_COMPILE_HEADER
 dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are
 dnl # provided via the fifth parameter
 dnl #
 AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER], [
 	AS_IF([test "x$enable_linux_builtin" = "xyes"], [
 		ZFS_LINUX_COMPILE_IFELSE(
 		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.o], [$3], [$4], [$5])
 	], [
 		ZFS_LINUX_COMPILE_IFELSE(
 		    [ZFS_LINUX_TEST_PROGRAM([[$1]], [[$2]],
 		    [[ZFS_META_LICENSE]])],
 		    [test -f build/conftest/conftest.ko], [$3], [$4], [$5])
 	])
 ])
 
 dnl #
 dnl # AS_VERSION_COMPARE_LE
 dnl # like AS_VERSION_COMPARE_LE, but runs $3 if (and only if) $1 <= $2
 dnl # AS_VERSION_COMPARE_LE (version-1, version-2, [action-if-less-or-equal], [action-if-greater])
 dnl #
 AC_DEFUN([AS_VERSION_COMPARE_LE], [
 	AS_VERSION_COMPARE([$1], [$2], [$3], [$3], [$4])
 ])
 
 dnl #
 dnl # ZFS_LINUX_REQUIRE_API
 dnl # like ZFS_LINUX_TEST_ERROR, except only fails if the kernel is
 dnl # at least some specified version.
 dnl #
 AC_DEFUN([ZFS_LINUX_REQUIRE_API], [
 	AS_VERSION_COMPARE_LE([$2], [$kernsrcver], [
 		AC_MSG_ERROR([
 		*** None of the expected "$1" interfaces were detected. This
 		*** interface is expected for kernels version "$2" and above.
 		*** This may be because your kernel version is newer than what is
 		*** supported, or you are using a patched custom kernel with
 		*** incompatible modifications.  Newer kernels may have incompatible
 		*** APIs.
 		***
 		*** ZFS Version: $ZFS_META_ALIAS
 		*** Compatible Kernels: $ZFS_META_KVER_MIN - $ZFS_META_KVER_MAX
 		])
 	], [
 		AC_MSG_RESULT(no)
 	])
 ])
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
index 658f546213de..c2e818b4d4ee 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
@@ -1,810 +1,819 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
  */
 
 #ifndef _ZFS_BLKDEV_H
 #define	_ZFS_BLKDEV_H
 
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/hdreg.h>
 #include <linux/major.h>
 #include <linux/msdos_fs.h>	/* for SECTOR_* */
 #include <linux/bio.h>
 
 #ifdef HAVE_BLK_MQ
 #include <linux/blk-mq.h>
 #endif
 
 #ifndef HAVE_BLK_QUEUE_FLAG_SET
 static inline void
 blk_queue_flag_set(unsigned int flag, struct request_queue *q)
 {
 	queue_flag_set(flag, q);
 }
 #endif
 
 #ifndef HAVE_BLK_QUEUE_FLAG_CLEAR
 static inline void
 blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
 {
 	queue_flag_clear(flag, q);
 }
 #endif
 
 /*
+ * 6.11 API
+ * Setting the flush flags directly is no longer possible; flush flags are set
+ * on the queue_limits structure and passed to blk_disk_alloc(). In this case
+ * we remove this function entirely.
+ *
  * 4.7 API,
  * The blk_queue_write_cache() interface has replaced blk_queue_flush()
  * interface.  However, the new interface is GPL-only thus we implement
  * our own trivial wrapper when the GPL-only version is detected.
  *
  * 2.6.36 - 4.6 API,
  * The blk_queue_flush() interface has replaced blk_queue_ordered()
  * interface.  However, while the old interface was available to all the
  * new one is GPL-only.   Thus if the GPL-only version is detected we
  * implement our own trivial helper.
  */
+#if !defined(HAVE_BLK_ALLOC_DISK_2ARG) || \
+	!defined(HAVE_BLKDEV_QUEUE_LIMITS_FEATURES)
 static inline void
-blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
+blk_queue_set_write_cache(struct request_queue *q, bool on)
 {
 #if defined(HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY)
-	if (wc)
+	if (on) {
 		blk_queue_flag_set(QUEUE_FLAG_WC, q);
-	else
-		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
-	if (fua)
 		blk_queue_flag_set(QUEUE_FLAG_FUA, q);
-	else
+	} else {
+		blk_queue_flag_clear(QUEUE_FLAG_WC, q);
 		blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
+	}
 #elif defined(HAVE_BLK_QUEUE_WRITE_CACHE)
-	blk_queue_write_cache(q, wc, fua);
+	blk_queue_write_cache(q, on, on);
 #elif defined(HAVE_BLK_QUEUE_FLUSH_GPL_ONLY)
-	if (wc)
-		q->flush_flags |= REQ_FLUSH;
-	if (fua)
-		q->flush_flags |= REQ_FUA;
+	if (on)
+		q->flush_flags |= REQ_FLUSH | REQ_FUA;
+	else
+		q->flush_flags &= ~(REQ_FLUSH | REQ_FUA);
 #elif defined(HAVE_BLK_QUEUE_FLUSH)
-	blk_queue_flush(q, (wc ? REQ_FLUSH : 0) | (fua ? REQ_FUA : 0));
+	blk_queue_flush(q, on ? (REQ_FLUSH | REQ_FUA) : 0);
 #else
 #error "Unsupported kernel"
 #endif
 }
+#endif /* !HAVE_BLK_ALLOC_DISK_2ARG || !HAVE_BLKDEV_QUEUE_LIMITS_FEATURES */
 
 /*
  * Detect if a device has a write cache. Used to set the intial value for the
  * vdev nowritecache flag.
  *
  * 4.10: QUEUE_FLAG_WC added. Initialised by the driver, but can be changed
  *       later by the operator. If not set, kernel will return flush requests
  *       immediately without doing anything.
  * 6.6: QUEUE_FLAG_HW_WC added. Initialised by the driver, can't be changed.
  *      Only controls if the operator is allowed to change _WC. Initial version
  *      buggy; aliased to QUEUE_FLAG_FUA, so unuseable.
  * 6.6.10, 6.7: QUEUE_FLAG_HW_WC fixed.
  *
  * Older than 4.10 we just assume write cache, and let the normal flush fail
  * detection apply.
  */
 static inline boolean_t
 zfs_bdev_has_write_cache(struct block_device *bdev)
 {
 #if defined(QUEUE_FLAG_HW_WC) && QUEUE_FLAG_HW_WC != QUEUE_FLAG_FUA
 	return (test_bit(QUEUE_FLAG_HW_WC, &bdev_get_queue(bdev)->queue_flags));
 #elif defined(QUEUE_FLAG_WC)
 	return (test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags));
 #else
 	return (B_TRUE);
 #endif
 }
 
 static inline void
 blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
 {
 #if !defined(HAVE_BLK_QUEUE_UPDATE_READAHEAD) && \
 	!defined(HAVE_DISK_UPDATE_READAHEAD)
-#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
+#if defined(HAVE_BLK_QUEUE_BDI_DYNAMIC)
 	q->backing_dev_info->ra_pages = ra_pages;
+#elif defined(HAVE_BLK_QUEUE_DISK_BDI)
+	q->disk->bdi->ra_pages = ra_pages;
 #else
 	q->backing_dev_info.ra_pages = ra_pages;
 #endif
 #endif
 }
 
 #ifdef HAVE_BIO_BVEC_ITER
 #define	BIO_BI_SECTOR(bio)	(bio)->bi_iter.bi_sector
 #define	BIO_BI_SIZE(bio)	(bio)->bi_iter.bi_size
 #define	BIO_BI_IDX(bio)		(bio)->bi_iter.bi_idx
 #define	BIO_BI_SKIP(bio)	(bio)->bi_iter.bi_bvec_done
 #define	bio_for_each_segment4(bv, bvp, b, i)	\
 	bio_for_each_segment((bv), (b), (i))
 typedef struct bvec_iter bvec_iterator_t;
 #else
 #define	BIO_BI_SECTOR(bio)	(bio)->bi_sector
 #define	BIO_BI_SIZE(bio)	(bio)->bi_size
 #define	BIO_BI_IDX(bio)		(bio)->bi_idx
 #define	BIO_BI_SKIP(bio)	(0)
 #define	bio_for_each_segment4(bv, bvp, b, i)	\
 	bio_for_each_segment((bvp), (b), (i))
 typedef int bvec_iterator_t;
 #endif
 
 static inline void
 bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev,
     bool transport, bool driver)
 {
 #ifdef CONFIG_BUG
 	/*
 	 * Disable FAILFAST for loopback devices because of the
 	 * following incorrect BUG_ON() in loop_make_request().
 	 * This support is also disabled for md devices because the
 	 * test suite layers md devices on top of loopback devices.
 	 * This may be removed when the loopback driver is fixed.
 	 *
 	 *   BUG_ON(!lo || (rw != READ && rw != WRITE));
 	 */
 	if ((MAJOR(bdev->bd_dev) == LOOP_MAJOR) ||
 	    (MAJOR(bdev->bd_dev) == MD_MAJOR))
 		return;
 
 #ifdef BLOCK_EXT_MAJOR
 	if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
 		return;
 #endif /* BLOCK_EXT_MAJOR */
 #endif /* CONFIG_BUG */
 
 	if (dev)
 		*flags |= REQ_FAILFAST_DEV;
 	if (transport)
 		*flags |= REQ_FAILFAST_TRANSPORT;
 	if (driver)
 		*flags |= REQ_FAILFAST_DRIVER;
 }
 
 /*
  * Maximum disk label length, it may be undefined for some kernels.
  */
 #if !defined(DISK_NAME_LEN)
 #define	DISK_NAME_LEN	32
 #endif /* DISK_NAME_LEN */
 
 #ifdef HAVE_BIO_BI_STATUS
 static inline int
 bi_status_to_errno(blk_status_t status)
 {
 	switch (status)	{
 	case BLK_STS_OK:
 		return (0);
 	case BLK_STS_NOTSUPP:
 		return (EOPNOTSUPP);
 	case BLK_STS_TIMEOUT:
 		return (ETIMEDOUT);
 	case BLK_STS_NOSPC:
 		return (ENOSPC);
 	case BLK_STS_TRANSPORT:
 		return (ENOLINK);
 	case BLK_STS_TARGET:
 		return (EREMOTEIO);
 #ifdef HAVE_BLK_STS_RESV_CONFLICT
 	case BLK_STS_RESV_CONFLICT:
 #else
 	case BLK_STS_NEXUS:
 #endif
 		return (EBADE);
 	case BLK_STS_MEDIUM:
 		return (ENODATA);
 	case BLK_STS_PROTECTION:
 		return (EILSEQ);
 	case BLK_STS_RESOURCE:
 		return (ENOMEM);
 	case BLK_STS_AGAIN:
 		return (EAGAIN);
 	case BLK_STS_IOERR:
 		return (EIO);
 	default:
 		return (EIO);
 	}
 }
 
 static inline blk_status_t
 errno_to_bi_status(int error)
 {
 	switch (error) {
 	case 0:
 		return (BLK_STS_OK);
 	case EOPNOTSUPP:
 		return (BLK_STS_NOTSUPP);
 	case ETIMEDOUT:
 		return (BLK_STS_TIMEOUT);
 	case ENOSPC:
 		return (BLK_STS_NOSPC);
 	case ENOLINK:
 		return (BLK_STS_TRANSPORT);
 	case EREMOTEIO:
 		return (BLK_STS_TARGET);
 	case EBADE:
 #ifdef HAVE_BLK_STS_RESV_CONFLICT
 		return (BLK_STS_RESV_CONFLICT);
 #else
 		return (BLK_STS_NEXUS);
 #endif
 	case ENODATA:
 		return (BLK_STS_MEDIUM);
 	case EILSEQ:
 		return (BLK_STS_PROTECTION);
 	case ENOMEM:
 		return (BLK_STS_RESOURCE);
 	case EAGAIN:
 		return (BLK_STS_AGAIN);
 	case EIO:
 		return (BLK_STS_IOERR);
 	default:
 		return (BLK_STS_IOERR);
 	}
 }
 #endif /* HAVE_BIO_BI_STATUS */
 
 /*
  * 4.3 API change
  * The bio_endio() prototype changed slightly.  These are helper
  * macro's to ensure the prototype and invocation are handled.
  */
 #ifdef HAVE_1ARG_BIO_END_IO_T
 #ifdef HAVE_BIO_BI_STATUS
 #define	BIO_END_IO_ERROR(bio)		bi_status_to_errno(bio->bi_status)
 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
 #define	BIO_END_IO(bio, error)		bio_set_bi_status(bio, error)
 static inline void
 bio_set_bi_status(struct bio *bio, int error)
 {
 	ASSERT3S(error, <=, 0);
 	bio->bi_status = errno_to_bi_status(-error);
 	bio_endio(bio);
 }
 #else
 #define	BIO_END_IO_ERROR(bio)		(-(bio->bi_error))
 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x)
 #define	BIO_END_IO(bio, error)		bio_set_bi_error(bio, error)
 static inline void
 bio_set_bi_error(struct bio *bio, int error)
 {
 	ASSERT3S(error, <=, 0);
 	bio->bi_error = error;
 	bio_endio(bio);
 }
 #endif /* HAVE_BIO_BI_STATUS */
 
 #else
 #define	BIO_END_IO_PROTO(fn, x, z)	static void fn(struct bio *x, int z)
 #define	BIO_END_IO(bio, error)		bio_endio(bio, error);
 #endif /* HAVE_1ARG_BIO_END_IO_T */
 
 /*
  * 5.15 MACRO,
  *   GD_DEAD
  *
  * 2.6.36 - 5.14 MACRO,
  *   GENHD_FL_UP
  *
  * Check the disk status and return B_TRUE if alive
  * otherwise B_FALSE
  */
 static inline boolean_t
 zfs_check_disk_status(struct block_device *bdev)
 {
 #if defined(GENHD_FL_UP)
 	return (!!(bdev->bd_disk->flags & GENHD_FL_UP));
 #elif defined(GD_DEAD)
 	return (!test_bit(GD_DEAD, &bdev->bd_disk->state));
 #else
 /*
  * This is encountered if neither GENHD_FL_UP nor GD_DEAD is available in
  * the kernel - likely due to an MACRO change that needs to be chased down.
  */
 #error "Unsupported kernel: no usable disk status check"
 #endif
 }
 
 /*
  * 4.1 API,
  * 3.10.0 CentOS 7.x API,
  *   blkdev_reread_part()
  *
  * For older kernels trigger a re-reading of the partition table by calling
  * check_disk_change() which calls flush_disk() to invalidate the device.
  *
  * For newer kernels (as of 5.10), bdev_check_media_change is used, in favor of
  * check_disk_change(), with the modification that invalidation is no longer
  * forced.
  */
 #ifdef HAVE_CHECK_DISK_CHANGE
 #define	zfs_check_media_change(bdev)	check_disk_change(bdev)
 #ifdef HAVE_BLKDEV_REREAD_PART
 #define	vdev_bdev_reread_part(bdev)	blkdev_reread_part(bdev)
 #else
 #define	vdev_bdev_reread_part(bdev)	check_disk_change(bdev)
 #endif /* HAVE_BLKDEV_REREAD_PART */
 #else
 #ifdef HAVE_BDEV_CHECK_MEDIA_CHANGE
 static inline int
 zfs_check_media_change(struct block_device *bdev)
 {
 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 	struct gendisk *gd = bdev->bd_disk;
 	const struct block_device_operations *bdo = gd->fops;
 #endif
 
 	if (!bdev_check_media_change(bdev))
 		return (0);
 
 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 	/*
 	 * Force revalidation, to mimic the old behavior of
 	 * check_disk_change()
 	 */
 	if (bdo->revalidate_disk)
 		bdo->revalidate_disk(gd);
 #endif
 
 	return (0);
 }
 #define	vdev_bdev_reread_part(bdev)	zfs_check_media_change(bdev)
 #elif defined(HAVE_DISK_CHECK_MEDIA_CHANGE)
 #define	vdev_bdev_reread_part(bdev)	disk_check_media_change(bdev->bd_disk)
 #define	zfs_check_media_change(bdev)	disk_check_media_change(bdev->bd_disk)
 #else
 /*
  * This is encountered if check_disk_change() and bdev_check_media_change()
  * are not available in the kernel - likely due to an API change that needs
  * to be chased down.
  */
 #error "Unsupported kernel: no usable disk change check"
 #endif /* HAVE_BDEV_CHECK_MEDIA_CHANGE */
 #endif /* HAVE_CHECK_DISK_CHANGE */
 
 /*
  * 2.6.27 API change
  * The function was exported for use, prior to this it existed but the
  * symbol was not exported.
  *
  * 4.4.0-6.21 API change for Ubuntu
  * lookup_bdev() gained a second argument, FMODE_*, to check inode permissions.
  *
  * 5.11 API change
  * Changed to take a dev_t argument which is set on success and return a
  * non-zero error code on failure.
  */
 static inline int
 vdev_lookup_bdev(const char *path, dev_t *dev)
 {
 #if defined(HAVE_DEVT_LOOKUP_BDEV)
 	return (lookup_bdev(path, dev));
 #elif defined(HAVE_1ARG_LOOKUP_BDEV)
 	struct block_device *bdev = lookup_bdev(path);
 	if (IS_ERR(bdev))
 		return (PTR_ERR(bdev));
 
 	*dev = bdev->bd_dev;
 	bdput(bdev);
 
 	return (0);
 #elif defined(HAVE_MODE_LOOKUP_BDEV)
 	struct block_device *bdev = lookup_bdev(path, FMODE_READ);
 	if (IS_ERR(bdev))
 		return (PTR_ERR(bdev));
 
 	*dev = bdev->bd_dev;
 	bdput(bdev);
 
 	return (0);
 #else
 #error "Unsupported kernel"
 #endif
 }
 
 #if defined(HAVE_BLK_MODE_T)
 #define	blk_mode_is_open_write(flag)	((flag) & BLK_OPEN_WRITE)
 #else
 #define	blk_mode_is_open_write(flag)	((flag) & FMODE_WRITE)
 #endif
 
 /*
  * Kernels without bio_set_op_attrs use bi_rw for the bio flags.
  */
 #if !defined(HAVE_BIO_SET_OP_ATTRS)
 static inline void
 bio_set_op_attrs(struct bio *bio, unsigned rw, unsigned flags)
 {
 #if defined(HAVE_BIO_BI_OPF)
 	bio->bi_opf = rw | flags;
 #else
 	bio->bi_rw |= rw | flags;
 #endif /* HAVE_BIO_BI_OPF */
 }
 #endif
 
 /*
  * bio_set_flush - Set the appropriate flags in a bio to guarantee
  * data are on non-volatile media on completion.
  *
  * 2.6.37 - 4.8 API,
  *   Introduce WRITE_FLUSH, WRITE_FUA, and WRITE_FLUSH_FUA flags as a
  *   replacement for WRITE_BARRIER to allow expressing richer semantics
  *   to the block layer.  It's up to the block layer to implement the
  *   semantics correctly. Use the WRITE_FLUSH_FUA flag combination.
  *
  * 4.8 - 4.9 API,
  *   REQ_FLUSH was renamed to REQ_PREFLUSH.  For consistency with previous
  *   OpenZFS releases, prefer the WRITE_FLUSH_FUA flag set if it's available.
  *
  * 4.10 API,
  *   The read/write flags and their modifiers, including WRITE_FLUSH,
  *   WRITE_FUA and WRITE_FLUSH_FUA were removed from fs.h in
  *   torvalds/linux@70fd7614 and replaced by direct flag modification
  *   of the REQ_ flags in bio->bi_opf.  Use REQ_PREFLUSH.
  */
 static inline void
 bio_set_flush(struct bio *bio)
 {
 #if defined(HAVE_REQ_PREFLUSH)	/* >= 4.10 */
 	bio_set_op_attrs(bio, 0, REQ_PREFLUSH | REQ_OP_WRITE);
 #elif defined(WRITE_FLUSH_FUA)	/* >= 2.6.37 and <= 4.9 */
 	bio_set_op_attrs(bio, 0, WRITE_FLUSH_FUA);
 #else
 #error	"Allowing the build will cause bio_set_flush requests to be ignored."
 #endif
 }
 
 /*
  * 4.8 API,
  *   REQ_OP_FLUSH
  *
  * 4.8-rc0 - 4.8-rc1,
  *   REQ_PREFLUSH
  *
  * 2.6.36 - 4.7 API,
  *   REQ_FLUSH
  *
  * in all cases but may have a performance impact for some kernels.  It
  * has the advantage of minimizing kernel specific changes in the zvol code.
  *
  */
 static inline boolean_t
 bio_is_flush(struct bio *bio)
 {
 #if defined(HAVE_REQ_OP_FLUSH) && defined(HAVE_BIO_BI_OPF)
 	return ((bio_op(bio) == REQ_OP_FLUSH) || (bio->bi_opf & REQ_PREFLUSH));
 #elif defined(HAVE_REQ_PREFLUSH) && defined(HAVE_BIO_BI_OPF)
 	return (bio->bi_opf & REQ_PREFLUSH);
 #elif defined(HAVE_REQ_PREFLUSH) && !defined(HAVE_BIO_BI_OPF)
 	return (bio->bi_rw & REQ_PREFLUSH);
 #elif defined(HAVE_REQ_FLUSH)
 	return (bio->bi_rw & REQ_FLUSH);
 #else
 #error	"Unsupported kernel"
 #endif
 }
 
 /*
  * 4.8 API,
  *   REQ_FUA flag moved to bio->bi_opf
  *
  * 2.6.x - 4.7 API,
  *   REQ_FUA
  */
 static inline boolean_t
 bio_is_fua(struct bio *bio)
 {
 #if defined(HAVE_BIO_BI_OPF)
 	return (bio->bi_opf & REQ_FUA);
 #elif defined(REQ_FUA)
 	return (bio->bi_rw & REQ_FUA);
 #else
 #error	"Allowing the build will cause fua requests to be ignored."
 #endif
 }
 
 /*
  * 4.8 API,
  *   REQ_OP_DISCARD
  *
  * 2.6.36 - 4.7 API,
  *   REQ_DISCARD
  *
  * In all cases the normal I/O path is used for discards.  The only
  * difference is how the kernel tags individual I/Os as discards.
  */
 static inline boolean_t
 bio_is_discard(struct bio *bio)
 {
 #if defined(HAVE_REQ_OP_DISCARD)
 	return (bio_op(bio) == REQ_OP_DISCARD);
 #elif defined(HAVE_REQ_DISCARD)
 	return (bio->bi_rw & REQ_DISCARD);
 #else
 #error "Unsupported kernel"
 #endif
 }
 
 /*
  * 4.8 API,
  *   REQ_OP_SECURE_ERASE
  *
  * 2.6.36 - 4.7 API,
  *   REQ_SECURE
  */
 static inline boolean_t
 bio_is_secure_erase(struct bio *bio)
 {
 #if defined(HAVE_REQ_OP_SECURE_ERASE)
 	return (bio_op(bio) == REQ_OP_SECURE_ERASE);
 #elif defined(REQ_SECURE)
 	return (bio->bi_rw & REQ_SECURE);
 #else
 	return (0);
 #endif
 }
 
 /*
  * 2.6.33 API change
  * Discard granularity and alignment restrictions may now be set.  For
  * older kernels which do not support this it is safe to skip it.
  */
 static inline void
 blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
 {
 	q->limits.discard_granularity = dg;
 }
 
 /*
  * 5.19 API,
  *   bdev_max_discard_sectors()
  *
  * 2.6.32 API,
  *   blk_queue_discard()
  */
 static inline boolean_t
 bdev_discard_supported(struct block_device *bdev)
 {
 #if defined(HAVE_BDEV_MAX_DISCARD_SECTORS)
 	return (bdev_max_discard_sectors(bdev) > 0 &&
 	    bdev_discard_granularity(bdev) > 0);
 #elif defined(HAVE_BLK_QUEUE_DISCARD)
 	return (blk_queue_discard(bdev_get_queue(bdev)) > 0 &&
 	    bdev_get_queue(bdev)->limits.discard_granularity > 0);
 #else
 #error "Unsupported kernel"
 #endif
 }
 
 /*
  * 5.19 API,
  *   bdev_max_secure_erase_sectors()
  *
  * 4.8 API,
  *   blk_queue_secure_erase()
  *
  * 2.6.36 - 4.7 API,
  *   blk_queue_secdiscard()
  */
 static inline boolean_t
 bdev_secure_discard_supported(struct block_device *bdev)
 {
 #if defined(HAVE_BDEV_MAX_SECURE_ERASE_SECTORS)
 	return (!!bdev_max_secure_erase_sectors(bdev));
 #elif defined(HAVE_BLK_QUEUE_SECURE_ERASE)
 	return (!!blk_queue_secure_erase(bdev_get_queue(bdev)));
 #elif defined(HAVE_BLK_QUEUE_SECDISCARD)
 	return (!!blk_queue_secdiscard(bdev_get_queue(bdev)));
 #else
 #error "Unsupported kernel"
 #endif
 }
 
 /*
  * A common holder for vdev_bdev_open() is used to relax the exclusive open
  * semantics slightly.  Internal vdev disk callers may pass VDEV_HOLDER to
  * allow them to open the device multiple times.  Other kernel callers and
  * user space processes which don't pass this value will get EBUSY.  This is
  * currently required for the correct operation of hot spares.
  */
 #define	VDEV_HOLDER			((void *)0x2401de7)
 
 static inline unsigned long
 blk_generic_start_io_acct(struct request_queue *q __attribute__((unused)),
     struct gendisk *disk __attribute__((unused)),
     int rw __attribute__((unused)), struct bio *bio)
 {
 #if defined(HAVE_BDEV_IO_ACCT_63)
 	return (bdev_start_io_acct(bio->bi_bdev, bio_op(bio),
 	    jiffies));
 #elif defined(HAVE_BDEV_IO_ACCT_OLD)
 	return (bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
 	    bio_op(bio), jiffies));
 #elif defined(HAVE_DISK_IO_ACCT)
 	return (disk_start_io_acct(disk, bio_sectors(bio), bio_op(bio)));
 #elif defined(HAVE_BIO_IO_ACCT)
 	return (bio_start_io_acct(bio));
 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
 	unsigned long start_time = jiffies;
 	generic_start_io_acct(rw, bio_sectors(bio), &disk->part0);
 	return (start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
 	unsigned long start_time = jiffies;
 	generic_start_io_acct(q, rw, bio_sectors(bio), &disk->part0);
 	return (start_time);
 #else
 	/* Unsupported */
 	return (0);
 #endif
 }
 
 static inline void
 blk_generic_end_io_acct(struct request_queue *q __attribute__((unused)),
     struct gendisk *disk __attribute__((unused)),
     int rw __attribute__((unused)), struct bio *bio, unsigned long start_time)
 {
 #if defined(HAVE_BDEV_IO_ACCT_63)
 	bdev_end_io_acct(bio->bi_bdev, bio_op(bio), bio_sectors(bio),
 	    start_time);
 #elif defined(HAVE_BDEV_IO_ACCT_OLD)
 	bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
 #elif defined(HAVE_DISK_IO_ACCT)
 	disk_end_io_acct(disk, bio_op(bio), start_time);
 #elif defined(HAVE_BIO_IO_ACCT)
 	bio_end_io_acct(bio, start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_3ARG)
 	generic_end_io_acct(rw, &disk->part0, start_time);
 #elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
 	generic_end_io_acct(q, rw, &disk->part0, start_time);
 #endif
 }
 
 #ifndef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
 static inline struct request_queue *
 blk_generic_alloc_queue(make_request_fn make_request, int node_id)
 {
 #if defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN)
 	return (blk_alloc_queue(make_request, node_id));
 #elif defined(HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH)
 	return (blk_alloc_queue_rh(make_request, node_id));
 #else
 	struct request_queue *q = blk_alloc_queue(GFP_KERNEL);
 	if (q != NULL)
 		blk_queue_make_request(q, make_request);
 
 	return (q);
 #endif
 }
 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
 /*
  * All the io_*() helper functions below can operate on a bio, or a rq, but
  * not both.  The older submit_bio() codepath will pass a bio, and the
  * newer blk-mq codepath will pass a rq.
  */
 static inline int
 io_data_dir(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL) {
 		if (op_is_write(req_op(rq))) {
 			return (WRITE);
 		} else {
 			return (READ);
 		}
 	}
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_data_dir(bio));
 }
 
 static inline int
 io_is_flush(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (req_op(rq) == REQ_OP_FLUSH);
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_is_flush(bio));
 }
 
 static inline int
 io_is_discard(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (req_op(rq) == REQ_OP_DISCARD);
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_is_discard(bio));
 }
 
 static inline int
 io_is_secure_erase(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (req_op(rq) == REQ_OP_SECURE_ERASE);
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_is_secure_erase(bio));
 }
 
 static inline int
 io_is_fua(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (rq->cmd_flags & REQ_FUA);
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_is_fua(bio));
 }
 
 
 static inline uint64_t
 io_offset(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (blk_rq_pos(rq) << 9);
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (BIO_BI_SECTOR(bio) << 9);
 }
 
 static inline uint64_t
 io_size(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (blk_rq_bytes(rq));
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (BIO_BI_SIZE(bio));
 }
 
 static inline int
 io_has_data(struct bio *bio, struct request *rq)
 {
 #ifdef HAVE_BLK_MQ
 	if (rq != NULL)
 		return (bio_has_data(rq->bio));
 #else
 	ASSERT3P(rq, ==, NULL);
 #endif
 	return (bio_has_data(bio));
 }
 #endif /* _ZFS_BLKDEV_H */
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/mm_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/mm_compat.h
index 40056c68d6dd..817f6df422de 100644
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/mm_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/mm_compat.h
@@ -1,36 +1,43 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2023, 2024, Klara Inc.
+ * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
  */
 
 #ifndef _ZFS_MM_COMPAT_H
 #define	_ZFS_MM_COMPAT_H
 
 #include <linux/mm.h>
+#include <linux/pagemap.h>
 
 /* 5.4 introduced page_size(). Older kernels can use a trivial macro instead */
 #ifndef HAVE_MM_PAGE_SIZE
 #define	page_size(p) ((unsigned long)(PAGE_SIZE << compound_order(p)))
 #endif
 
+/* 6.11 removed page_mapping(). A simple wrapper around folio_mapping() works */
+#ifndef HAVE_MM_PAGE_MAPPING
+#define	page_mapping(p) folio_mapping(page_folio(p))
+#endif
+
 #endif /* _ZFS_MM_COMPAT_H */
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
index f0f929d3ce90..2c0cdd9febf5 100644
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-proc.c
@@ -1,766 +1,807 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Solaris Porting Layer (SPL) Proc Implementation.
  */
+/*
+ * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
+ */
 
 #include <sys/systeminfo.h>
 #include <sys/kstat.h>
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/vmem.h>
 #include <sys/taskq.h>
 #include <sys/proc.h>
 #include <linux/ctype.h>
 #include <linux/kmod.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/version.h>
 #include "zfs_gitrev.h"
 
 #if defined(CONSTIFY_PLUGIN) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
 typedef struct ctl_table __no_const spl_ctl_table;
 #else
 typedef struct ctl_table spl_ctl_table;
 #endif
 
+#ifdef HAVE_PROC_HANDLER_CTL_TABLE_CONST
+#define	CONST_CTL_TABLE		const struct ctl_table
+#else
+#define	CONST_CTL_TABLE		struct ctl_table
+#endif
+
 static unsigned long table_min = 0;
 static unsigned long table_max = ~0;
 
 static struct ctl_table_header *spl_header = NULL;
 #ifndef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table_header *spl_kmem = NULL;
 static struct ctl_table_header *spl_kstat = NULL;
 #endif
 static struct proc_dir_entry *proc_spl = NULL;
 static struct proc_dir_entry *proc_spl_kmem = NULL;
 static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
 static struct proc_dir_entry *proc_spl_taskq_all = NULL;
 static struct proc_dir_entry *proc_spl_taskq = NULL;
 struct proc_dir_entry *proc_spl_kstat = NULL;
 
 #ifdef DEBUG_KMEM
 static int
-proc_domemused(struct ctl_table *table, int write,
+proc_domemused(CONST_CTL_TABLE *table, int write,
     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int rc = 0;
 	unsigned long val;
 	spl_ctl_table dummy = *table;
 
 	dummy.data = &val;
 	dummy.proc_handler = &proc_dointvec;
 	dummy.extra1 = &table_min;
 	dummy.extra2 = &table_max;
 
 	if (write) {
 		*ppos += *lenp;
 	} else {
 #ifdef HAVE_ATOMIC64_T
 		val = atomic64_read((atomic64_t *)table->data);
 #else
 		val = atomic_read((atomic_t *)table->data);
 #endif /* HAVE_ATOMIC64_T */
 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
 	}
 
 	return (rc);
 }
 #endif /* DEBUG_KMEM */
 
 static int
-proc_doslab(struct ctl_table *table, int write,
+proc_doslab(CONST_CTL_TABLE *table, int write,
     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int rc = 0;
 	unsigned long val = 0, mask;
 	spl_ctl_table dummy = *table;
 	spl_kmem_cache_t *skc = NULL;
 
 	dummy.data = &val;
 	dummy.proc_handler = &proc_dointvec;
 	dummy.extra1 = &table_min;
 	dummy.extra2 = &table_max;
 
 	if (write) {
 		*ppos += *lenp;
 	} else {
 		down_read(&spl_kmem_cache_sem);
 		mask = (unsigned long)table->data;
 
 		list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
 
 			/* Only use slabs of the correct kmem/vmem type */
 			if (!(skc->skc_flags & mask))
 				continue;
 
 			/* Sum the specified field for selected slabs */
 			switch (mask & (KMC_TOTAL | KMC_ALLOC | KMC_MAX)) {
 			case KMC_TOTAL:
 				val += skc->skc_slab_size * skc->skc_slab_total;
 				break;
 			case KMC_ALLOC:
 				val += skc->skc_obj_size * skc->skc_obj_alloc;
 				break;
 			case KMC_MAX:
 				val += skc->skc_obj_size * skc->skc_obj_max;
 				break;
 			}
 		}
 
 		up_read(&spl_kmem_cache_sem);
 		rc = proc_doulongvec_minmax(&dummy, write, buffer, lenp, ppos);
 	}
 
 	return (rc);
 }
 
 static int
-proc_dohostid(struct ctl_table *table, int write,
+proc_dohostid(CONST_CTL_TABLE *table, int write,
     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char *end, str[32];
 	unsigned long hid;
 	spl_ctl_table dummy = *table;
 
 	dummy.data = str;
 	dummy.maxlen = sizeof (str) - 1;
 
 	if (!write)
 		snprintf(str, sizeof (str), "%lx",
 		    (unsigned long) zone_get_hostid(NULL));
 
 	/* always returns 0 */
 	proc_dostring(&dummy, write, buffer, lenp, ppos);
 
 	if (write) {
 		/*
 		 * We can't use proc_doulongvec_minmax() in the write
 		 * case here because hostid, while a hex value, has no
 		 * leading 0x, which confuses the helper function.
 		 */
 
 		hid = simple_strtoul(str, &end, 16);
 		if (str == end)
 			return (-EINVAL);
 		spl_hostid = hid;
 	}
 
 	return (0);
 }
 
 static void
 taskq_seq_show_headers(struct seq_file *f)
 {
 	seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
 	    "taskq", "act", "nthr", "spwn", "maxt", "pri",
 	    "mina", "maxa", "cura", "flags");
 }
 
 /* indices into the lheads array below */
 #define	LHEAD_PEND	0
 #define	LHEAD_PRIO	1
 #define	LHEAD_DELAY	2
 #define	LHEAD_WAIT	3
 #define	LHEAD_ACTIVE	4
 #define	LHEAD_SIZE	5
 
 static unsigned int spl_max_show_tasks = 512;
 /* CSTYLED */
 module_param(spl_max_show_tasks, uint, 0644);
 MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
 
 static int
 taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
 {
 	taskq_t *tq = p;
 	taskq_thread_t *tqt = NULL;
 	spl_wait_queue_entry_t *wq;
 	struct task_struct *tsk;
 	taskq_ent_t *tqe;
 	char name[100];
 	struct list_head *lheads[LHEAD_SIZE], *lh;
 	static char *list_names[LHEAD_SIZE] =
 	    {"pend", "prio", "delay", "wait", "active" };
 	int i, j, have_lheads = 0;
 	unsigned long wflags, flags;
 
 	spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
 	spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
 
 	/* get the various lists and check whether they're empty */
 	lheads[LHEAD_PEND] = &tq->tq_pend_list;
 	lheads[LHEAD_PRIO] = &tq->tq_prio_list;
 	lheads[LHEAD_DELAY] = &tq->tq_delay_list;
 #ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
 	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
 #else
 	lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
 #endif
 	lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
 
 	for (i = 0; i < LHEAD_SIZE; ++i) {
 		if (list_empty(lheads[i]))
 			lheads[i] = NULL;
 		else
 			++have_lheads;
 	}
 
 	/* early return in non-"all" mode if lists are all empty */
 	if (!allflag && !have_lheads) {
 		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
 		spin_unlock_irqrestore(&tq->tq_lock, flags);
 		return (0);
 	}
 
 	/* unlock the waitq quickly */
 	if (!lheads[LHEAD_WAIT])
 		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
 
 	/* show the base taskq contents */
 	snprintf(name, sizeof (name), "%s/%d", tq->tq_name, tq->tq_instance);
 	seq_printf(f, "%-25s ", name);
 	seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
 	    tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
 	    tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
 	    tq->tq_nalloc, tq->tq_flags);
 
 	/* show the active list */
 	if (lheads[LHEAD_ACTIVE]) {
 		j = 0;
 		list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
 			if (j == 0)
 				seq_printf(f, "\t%s:",
 				    list_names[LHEAD_ACTIVE]);
 			else if (j == 2) {
 				seq_printf(f, "\n\t       ");
 				j = 0;
 			}
 			seq_printf(f, " [%d]%pf(%ps)",
 			    tqt->tqt_thread->pid,
 			    tqt->tqt_task->tqent_func,
 			    tqt->tqt_task->tqent_arg);
 			++j;
 		}
 		seq_printf(f, "\n");
 	}
 
 	for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
 		if (lheads[i]) {
 			j = 0;
 			list_for_each(lh, lheads[i]) {
 				if (spl_max_show_tasks != 0 &&
 				    j >= spl_max_show_tasks) {
 					seq_printf(f, "\n\t(truncated)");
 					break;
 				}
 				/* show the wait waitq list */
 				if (i == LHEAD_WAIT) {
 #ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
 					wq = list_entry(lh,
 					    spl_wait_queue_entry_t, entry);
 #else
 					wq = list_entry(lh,
 					    spl_wait_queue_entry_t, task_list);
 #endif
 					if (j == 0)
 						seq_printf(f, "\t%s:",
 						    list_names[i]);
 					else if (j % 8 == 0)
 						seq_printf(f, "\n\t     ");
 
 					tsk = wq->private;
 					seq_printf(f, " %d", tsk->pid);
 				/* pend, prio and delay lists */
 				} else {
 					tqe = list_entry(lh, taskq_ent_t,
 					    tqent_list);
 					if (j == 0)
 						seq_printf(f, "\t%s:",
 						    list_names[i]);
 					else if (j % 2 == 0)
 						seq_printf(f, "\n\t     ");
 
 					seq_printf(f, " %pf(%ps)",
 					    tqe->tqent_func,
 					    tqe->tqent_arg);
 				}
 				++j;
 			}
 			seq_printf(f, "\n");
 		}
 	if (lheads[LHEAD_WAIT])
 		spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
 	spin_unlock_irqrestore(&tq->tq_lock, flags);
 
 	return (0);
 }
 
 static int
 taskq_all_seq_show(struct seq_file *f, void *p)
 {
 	return (taskq_seq_show_impl(f, p, B_TRUE));
 }
 
 static int
 taskq_seq_show(struct seq_file *f, void *p)
 {
 	return (taskq_seq_show_impl(f, p, B_FALSE));
 }
 
 static void *
 taskq_seq_start(struct seq_file *f, loff_t *pos)
 {
 	struct list_head *p;
 	loff_t n = *pos;
 
 	down_read(&tq_list_sem);
 	if (!n)
 		taskq_seq_show_headers(f);
 
 	p = tq_list.next;
 	while (n--) {
 		p = p->next;
 		if (p == &tq_list)
 		return (NULL);
 	}
 
 	return (list_entry(p, taskq_t, tq_taskqs));
 }
 
 static void *
 taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
 {
 	taskq_t *tq = p;
 
 	++*pos;
 	return ((tq->tq_taskqs.next == &tq_list) ?
 	    NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
 }
 
 static void
 slab_seq_show_headers(struct seq_file *f)
 {
 	seq_printf(f,
 	    "--------------------- cache ----------"
 	    "---------------------------------------------  "
 	    "----- slab ------  "
 	    "---- object -----  "
 	    "--- emergency ---\n");
 	seq_printf(f,
 	    "name                                  "
 	    "  flags      size     alloc slabsize  objsize  "
 	    "total alloc   max  "
 	    "total alloc   max  "
 	    "dlock alloc   max\n");
 }
 
 static int
 slab_seq_show(struct seq_file *f, void *p)
 {
 	spl_kmem_cache_t *skc = p;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 
 	if (skc->skc_flags & KMC_SLAB) {
 		/*
 		 * This cache is backed by a generic Linux kmem cache which
 		 * has its own accounting. For these caches we only track
 		 * the number of active allocated objects that exist within
 		 * the underlying Linux slabs. For the overall statistics of
 		 * the underlying Linux cache please refer to /proc/slabinfo.
 		 */
 		spin_lock(&skc->skc_lock);
 		uint64_t objs_allocated =
 		    percpu_counter_sum(&skc->skc_linux_alloc);
 		seq_printf(f, "%-36s  ", skc->skc_name);
 		seq_printf(f, "0x%05lx %9s %9lu %8s %8u  "
 		    "%5s %5s %5s  %5s %5lu %5s  %5s %5s %5s\n",
 		    (long unsigned)skc->skc_flags,
 		    "-",
 		    (long unsigned)(skc->skc_obj_size * objs_allocated),
 		    "-",
 		    (unsigned)skc->skc_obj_size,
 		    "-", "-", "-", "-",
 		    (long unsigned)objs_allocated,
 		    "-", "-", "-", "-");
 		spin_unlock(&skc->skc_lock);
 		return (0);
 	}
 
 	spin_lock(&skc->skc_lock);
 	seq_printf(f, "%-36s  ", skc->skc_name);
 	seq_printf(f, "0x%05lx %9lu %9lu %8u %8u  "
 	    "%5lu %5lu %5lu  %5lu %5lu %5lu  %5lu %5lu %5lu\n",
 	    (long unsigned)skc->skc_flags,
 	    (long unsigned)(skc->skc_slab_size * skc->skc_slab_total),
 	    (long unsigned)(skc->skc_obj_size * skc->skc_obj_alloc),
 	    (unsigned)skc->skc_slab_size,
 	    (unsigned)skc->skc_obj_size,
 	    (long unsigned)skc->skc_slab_total,
 	    (long unsigned)skc->skc_slab_alloc,
 	    (long unsigned)skc->skc_slab_max,
 	    (long unsigned)skc->skc_obj_total,
 	    (long unsigned)skc->skc_obj_alloc,
 	    (long unsigned)skc->skc_obj_max,
 	    (long unsigned)skc->skc_obj_deadlock,
 	    (long unsigned)skc->skc_obj_emergency,
 	    (long unsigned)skc->skc_obj_emergency_max);
 	spin_unlock(&skc->skc_lock);
 	return (0);
 }
 
 static void *
 slab_seq_start(struct seq_file *f, loff_t *pos)
 {
 	struct list_head *p;
 	loff_t n = *pos;
 
 	down_read(&spl_kmem_cache_sem);
 	if (!n)
 		slab_seq_show_headers(f);
 
 	p = spl_kmem_cache_list.next;
 	while (n--) {
 		p = p->next;
 		if (p == &spl_kmem_cache_list)
 			return (NULL);
 	}
 
 	return (list_entry(p, spl_kmem_cache_t, skc_list));
 }
 
 static void *
 slab_seq_next(struct seq_file *f, void *p, loff_t *pos)
 {
 	spl_kmem_cache_t *skc = p;
 
 	++*pos;
 	return ((skc->skc_list.next == &spl_kmem_cache_list) ?
 	    NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list));
 }
 
 static void
 slab_seq_stop(struct seq_file *f, void *v)
 {
 	up_read(&spl_kmem_cache_sem);
 }
 
 static const struct seq_operations slab_seq_ops = {
 	.show  = slab_seq_show,
 	.start = slab_seq_start,
 	.next  = slab_seq_next,
 	.stop  = slab_seq_stop,
 };
 
 static int
 proc_slab_open(struct inode *inode, struct file *filp)
 {
 	return (seq_open(filp, &slab_seq_ops));
 }
 
 static const kstat_proc_op_t proc_slab_operations = {
 #ifdef HAVE_PROC_OPS_STRUCT
 	.proc_open	= proc_slab_open,
 	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= seq_release,
 #else
 	.open		= proc_slab_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 #endif
 };
 
 static void
 taskq_seq_stop(struct seq_file *f, void *v)
 {
 	up_read(&tq_list_sem);
 }
 
 static const struct seq_operations taskq_all_seq_ops = {
 	.show	= taskq_all_seq_show,
 	.start	= taskq_seq_start,
 	.next	= taskq_seq_next,
 	.stop	= taskq_seq_stop,
 };
 
 static const struct seq_operations taskq_seq_ops = {
 	.show	= taskq_seq_show,
 	.start	= taskq_seq_start,
 	.next	= taskq_seq_next,
 	.stop	= taskq_seq_stop,
 };
 
 static int
 proc_taskq_all_open(struct inode *inode, struct file *filp)
 {
 	return (seq_open(filp, &taskq_all_seq_ops));
 }
 
 static int
 proc_taskq_open(struct inode *inode, struct file *filp)
 {
 	return (seq_open(filp, &taskq_seq_ops));
 }
 
 static const kstat_proc_op_t proc_taskq_all_operations = {
 #ifdef HAVE_PROC_OPS_STRUCT
 	.proc_open	= proc_taskq_all_open,
 	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= seq_release,
 #else
 	.open		= proc_taskq_all_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 #endif
 };
 
 static const kstat_proc_op_t proc_taskq_operations = {
 #ifdef HAVE_PROC_OPS_STRUCT
 	.proc_open	= proc_taskq_open,
 	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= seq_release,
 #else
 	.open		= proc_taskq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 #endif
 };
 
 static struct ctl_table spl_kmem_table[] = {
 #ifdef DEBUG_KMEM
 	{
 		.procname	= "kmem_used",
 		.data		= &kmem_alloc_used,
 #ifdef HAVE_ATOMIC64_T
 		.maxlen		= sizeof (atomic64_t),
 #else
 		.maxlen		= sizeof (atomic_t),
 #endif /* HAVE_ATOMIC64_T */
 		.mode		= 0444,
 		.proc_handler	= &proc_domemused,
 	},
 	{
 		.procname	= "kmem_max",
 		.data		= &kmem_alloc_max,
 		.maxlen		= sizeof (unsigned long),
 		.extra1		= &table_min,
 		.extra2		= &table_max,
 		.mode		= 0444,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
 #endif /* DEBUG_KMEM */
 	{
 		.procname	= "slab_kvmem_total",
 		.data		= (void *)(KMC_KVMEM | KMC_TOTAL),
 		.maxlen		= sizeof (unsigned long),
 		.extra1		= &table_min,
 		.extra2		= &table_max,
 		.mode		= 0444,
 		.proc_handler	= &proc_doslab,
 	},
 	{
 		.procname	= "slab_kvmem_alloc",
 		.data		= (void *)(KMC_KVMEM | KMC_ALLOC),
 		.maxlen		= sizeof (unsigned long),
 		.extra1		= &table_min,
 		.extra2		= &table_max,
 		.mode		= 0444,
 		.proc_handler	= &proc_doslab,
 	},
 	{
 		.procname	= "slab_kvmem_max",
 		.data		= (void *)(KMC_KVMEM | KMC_MAX),
 		.maxlen		= sizeof (unsigned long),
 		.extra1		= &table_min,
 		.extra2		= &table_max,
 		.mode		= 0444,
 		.proc_handler	= &proc_doslab,
 	},
 	{},
 };
 
 static struct ctl_table spl_kstat_table[] = {
 	{},
 };
 
 static struct ctl_table spl_table[] = {
 	/*
 	 * NB No .strategy entries have been provided since
 	 * sysctl(8) prefers to go via /proc for portability.
 	 */
 	{
 		.procname	= "gitrev",
 		.data		= (char *)ZFS_META_GITREV,
 		.maxlen		= sizeof (ZFS_META_GITREV),
 		.mode		= 0444,
 		.proc_handler	= &proc_dostring,
 	},
 	{
 		.procname	= "hostid",
 		.data		= &spl_hostid,
 		.maxlen		= sizeof (unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &proc_dohostid,
 	},
 #ifdef HAVE_REGISTER_SYSCTL_TABLE
 	{
 		.procname	= "kmem",
 		.mode		= 0555,
 		.child		= spl_kmem_table,
 	},
 	{
 		.procname	= "kstat",
 		.mode		= 0555,
 		.child		= spl_kstat_table,
 	},
 #endif
 	{},
 };
 
 #ifdef HAVE_REGISTER_SYSCTL_TABLE
 static struct ctl_table spl_dir[] = {
 	{
 		.procname	= "spl",
 		.mode		= 0555,
 		.child		= spl_table,
 	},
 	{}
 };
 
 static struct ctl_table spl_root[] = {
 	{
 		.procname	= "kernel",
 		.mode		= 0555,
 		.child		= spl_dir,
 	},
 	{}
 };
 #endif
 
 static void spl_proc_cleanup(void)
 {
 	remove_proc_entry("kstat", proc_spl);
 	remove_proc_entry("slab", proc_spl_kmem);
 	remove_proc_entry("kmem", proc_spl);
 	remove_proc_entry("taskq-all", proc_spl);
 	remove_proc_entry("taskq", proc_spl);
 	remove_proc_entry("spl", NULL);
 
 #ifndef HAVE_REGISTER_SYSCTL_TABLE
 	if (spl_kstat) {
 		unregister_sysctl_table(spl_kstat);
 		spl_kstat = NULL;
 	}
 	if (spl_kmem) {
 		unregister_sysctl_table(spl_kmem);
 		spl_kmem = NULL;
 	}
 #endif
 	if (spl_header) {
 		unregister_sysctl_table(spl_header);
 		spl_header = NULL;
 	}
 }
 
+#ifndef HAVE_REGISTER_SYSCTL_TABLE
+
+/*
+ * Traditionally, struct ctl_table arrays have been terminated by an "empty"
+ * sentinel element (specifically, one with .procname == NULL).
+ *
+ * Linux 6.6 began migrating away from this, adding register_sysctl_sz() so
+ * that callers could provide the size directly, and redefining
+ * register_sysctl() to just call register_sysctl_sz() with the array size. It
+ * retained support for the terminating element so that existing callers would
+ * continue to work.
+ *
+ * Linux 6.11 removed support for the terminating element, instead interpreting
+ * it as a real malformed element, and rejecting it.
+ *
+ * In order to continue support older kernels, we retain the terminating
+ * sentinel element for our sysctl tables, but instead detect availability of
+ * register_sysctl_sz(). If it exists, we pass it the array size -1, stopping
+ * the kernel from trying to process the terminator. For pre-6.6 kernels that
+ * don't have register_sysctl_sz(), we just use register_sysctl(), which can
+ * handle the terminating element as it always has.
+ */
+#ifdef HAVE_REGISTER_SYSCTL_SZ
+#define	spl_proc_register_sysctl(p, t)	\
+	register_sysctl_sz(p, t, ARRAY_SIZE(t)-1)
+#else
+#define	spl_proc_register_sysctl(p, t)	\
+	register_sysctl(p, t)
+#endif
+#endif
+
 int
 spl_proc_init(void)
 {
 	int rc = 0;
 
 #ifdef HAVE_REGISTER_SYSCTL_TABLE
 	spl_header = register_sysctl_table(spl_root);
 	if (spl_header == NULL)
 		return (-EUNATCH);
 #else
-	spl_header = register_sysctl("kernel/spl", spl_table);
+	spl_header = spl_proc_register_sysctl("kernel/spl", spl_table);
 	if (spl_header == NULL)
 		return (-EUNATCH);
 
-	spl_kmem = register_sysctl("kernel/spl/kmem", spl_kmem_table);
+	spl_kmem = spl_proc_register_sysctl("kernel/spl/kmem", spl_kmem_table);
 	if (spl_kmem == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
-	spl_kstat = register_sysctl("kernel/spl/kstat", spl_kstat_table);
+	spl_kstat = spl_proc_register_sysctl("kernel/spl/kstat",
+	    spl_kstat_table);
 	if (spl_kstat == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 #endif
 
 	proc_spl = proc_mkdir("spl", NULL);
 	if (proc_spl == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 
 	proc_spl_taskq_all = proc_create_data("taskq-all", 0444, proc_spl,
 	    &proc_taskq_all_operations, NULL);
 	if (proc_spl_taskq_all == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 
 	proc_spl_taskq = proc_create_data("taskq", 0444, proc_spl,
 	    &proc_taskq_operations, NULL);
 	if (proc_spl_taskq == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 
 	proc_spl_kmem = proc_mkdir("kmem", proc_spl);
 	if (proc_spl_kmem == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 
 	proc_spl_kmem_slab = proc_create_data("slab", 0444, proc_spl_kmem,
 	    &proc_slab_operations, NULL);
 	if (proc_spl_kmem_slab == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 
 	proc_spl_kstat = proc_mkdir("kstat", proc_spl);
 	if (proc_spl_kstat == NULL) {
 		rc = -EUNATCH;
 		goto out;
 	}
 out:
 	if (rc)
 		spl_proc_cleanup();
 
 	return (rc);
 }
 
 void
 spl_proc_fini(void)
 {
 	spl_proc_cleanup();
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
index 1cecad9f7755..9803c7fecb5c 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@@ -1,4255 +1,4268 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/vfs.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/taskq.h>
 #include <sys/uio.h>
 #include <sys/vmsystm.h>
 #include <sys/atomic.h>
 #include <sys/pathname.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/policy.h>
 #include <sys/sunddi.h>
 #include <sys/sid.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_rlock.h>
 #include <sys/cred.h>
 #include <sys/zpl.h>
 #include <sys/zil.h>
 #include <sys/sa_impl.h>
+#include <linux/mm_compat.h>
 
 /*
  * Programming rules.
  *
  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
  * properly lock its in-core state, create a DMU transaction, do the work,
  * record this work in the intent log (ZIL), commit the DMU transaction,
  * and wait for the intent log to commit if it is a synchronous operation.
  * Moreover, the vnode ops must work in both normal and log replay context.
  * The ordering of events is important to avoid deadlocks and references
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1) A check must be made in each zfs thread for a mounted file system.
  *	This is done avoiding races using zfs_enter(zfsvfs).
  *      A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
  *      must be checked with zfs_verify_zp(zp).  Both of these macros
  *      can return EIO from the calling function.
  *
  *  (2) zrele() should always be the last thing except for zil_commit() (if
  *	necessary) and zfs_exit(). This is for 3 reasons: First, if it's the
  *	last reference, the vnode/znode can be freed, so the zp may point to
  *	freed memory.  Second, the last reference will call zfs_zinactive(),
  *	which may induce a lot of work -- pushing cached pages (which acquires
  *	range locks) and syncing out cached atime changes.  Third,
  *	zfs_zinactive() may require a new tx, which could deadlock the system
  *	if you were already holding one. This deadlock occurs because the tx
  *	currently being operated on prevents a txg from syncing, which
  *	prevents the new tx from progressing, resulting in a deadlock.  If you
  *	must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
  *	is a synonym for zrele().
  *
  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
  *	as they can span dmu_tx_assign() calls.
  *
  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
  *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
  *
  *      Note, in particular, that if a lock is sometimes acquired before
  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
  *      to use a non-blocking assign can deadlock the system.  The scenario:
  *
  *	Thread A has grabbed a lock before calling dmu_tx_assign().
  *	Thread B is in an already-assigned tx, and blocks for this lock.
  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
  *	forever, because the previous txg can't quiesce until B's tx commits.
  *
  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
  *	to indicate that this operation has already called dmu_tx_wait().
  *	This will ensure that we don't retry forever, waiting a short bit
  *	each time.
  *
  *  (5)	If the operation succeeded, generate the intent log entry for it
  *	before dropping locks.  This ensures that the ordering of events
  *	in the intent log matches the order in which they actually occurred.
  *	During ZIL replay the zfs_log_* functions will update the sequence
  *	number to indicate the zil transaction has replayed.
  *
  *  (6)	At the end of each vnode op, the DMU tx must always commit,
  *	regardless of whether there were any errors.
  *
  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
  *	to ensure that synchronous semantics are provided when necessary.
  *
  * In general, this is how things should be ordered in each vnode op:
  *
  *	zfs_enter(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lock(&dl, ...)	// lock directory entry (may igrab())
  *	rw_enter(...);			// grab any other locks you need
  *	tx = dmu_tx_create(...);	// get DMU tx
  *	dmu_tx_hold_*();		// hold each object you might modify
  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
  *	if (error) {
  *		rw_exit(...);		// drop locks
  *		zfs_dirent_unlock(dl);	// unlock directory entry
  *		zrele(...);		// release held znodes
  *		if (error == ERESTART) {
  *			waited = B_TRUE;
  *			dmu_tx_wait(tx);
  *			dmu_tx_abort(tx);
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
  *		zfs_exit(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
  *	if (error == 0)
  *		zfs_log_*(...);		// on success, make ZIL entry
  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
  *	rw_exit(...);			// drop locks
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	zrele(...);			// release held znodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
  *	zfs_exit(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 int
 zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
 {
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	/* Honor ZFS_APPENDONLY file attribute */
 	if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & O_APPEND) == 0)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * Keep a count of the synchronous opens in the znode.  On first
 	 * synchronous open we must convert all previous async transactions
 	 * into sync to keep correct ordering.
 	 */
 	if (flag & O_SYNC) {
 		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
 			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
 	}
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 int
 zfs_close(struct inode *ip, int flag, cred_t *cr)
 {
 	(void) cr;
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	/* Decrement the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_dec_32(&zp->z_sync_cnt);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 #if defined(_KERNEL)
 
 static int zfs_fillpage(struct inode *ip, struct page *pp);
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  Update all mapped
  * pages with the contents of the coresponding dmu buffer.
  */
 void
 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 {
 	struct address_space *mp = ZTOI(zp)->i_mapping;
 	int64_t off = start & (PAGE_SIZE - 1);
 
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
 		uint64_t nbytes = MIN(PAGE_SIZE - off, len);
 
 		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);
 
 			void *pb = kmap(pp);
 			int error = dmu_read(os, zp->z_id, start + off,
 			    nbytes, pb + off, DMU_READ_PREFETCH);
 			kunmap(pp);
 
 			if (error) {
 				SetPageError(pp);
 				ClearPageUptodate(pp);
 			} else {
 				ClearPageError(pp);
 				SetPageUptodate(pp);
 
 				if (mapping_writably_mapped(mp))
 					flush_dcache_page(pp);
 
 				mark_page_accessed(pp);
 			}
 
 			unlock_page(pp);
 			put_page(pp);
 		}
 
 		len -= nbytes;
 		off = 0;
 	}
 }
 
 /*
  * When a file is memory mapped, we must keep the I/O data synchronized
  * between the DMU cache and the memory mapped pages.  Preferentially read
  * from memory mapped pages, otherwise fallback to reading through the dmu.
  */
 int
 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	struct inode *ip = ZTOI(zp);
 	struct address_space *mp = ip->i_mapping;
 	int64_t start = uio->uio_loffset;
 	int64_t off = start & (PAGE_SIZE - 1);
 	int len = nbytes;
 	int error = 0;
 
 	for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
 		uint64_t bytes = MIN(PAGE_SIZE - off, len);
 
 		struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
 		if (pp) {
 			/*
 			 * If filemap_fault() retries there exists a window
 			 * where the page will be unlocked and not up to date.
 			 * In this case we must try and fill the page.
 			 */
 			if (unlikely(!PageUptodate(pp))) {
 				error = zfs_fillpage(ip, pp);
 				if (error) {
 					unlock_page(pp);
 					put_page(pp);
 					return (error);
 				}
 			}
 
 			ASSERT(PageUptodate(pp) || PageDirty(pp));
 
 			unlock_page(pp);
 
 			void *pb = kmap(pp);
 			error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
 			kunmap(pp);
 
 			if (mapping_writably_mapped(mp))
 				flush_dcache_page(pp);
 
 			mark_page_accessed(pp);
 			put_page(pp);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, bytes);
 		}
 
 		len -= bytes;
 		off = 0;
 
 		if (error)
 			break;
 	}
 
 	return (error);
 }
 #endif /* _KERNEL */
 
 static unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
 
 /*
  * Write the bytes to a file.
  *
  *	IN:	zp	- znode of file to be written to
  *		data	- bytes to write
  *		len	- number of bytes to write
  *		pos	- offset to start writing at
  *
  *	OUT:	resid	- remaining bytes to write
  *
  *	RETURN:	0 if success
  *		positive error code if failure.  EIO is	returned
  *		for a short write when residp isn't provided.
  *
  * Timestamps:
  *	zp - ctime|mtime updated if byte count > 0
  */
 int
 zfs_write_simple(znode_t *zp, const void *data, size_t len,
     loff_t pos, size_t *residp)
 {
 	fstrans_cookie_t cookie;
 	int error;
 
 	struct iovec iov;
 	iov.iov_base = (void *)data;
 	iov.iov_len = len;
 
 	zfs_uio_t uio;
 	zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
 
 	cookie = spl_fstrans_mark();
 	error = zfs_write(zp, &uio, 0, kcred);
 	spl_fstrans_unmark(cookie);
 
 	if (error == 0) {
 		if (residp != NULL)
 			*residp = zfs_uio_resid(&uio);
 		else if (zfs_uio_resid(&uio) != 0)
 			error = SET_ERROR(EIO);
 	}
 
 	return (error);
 }
 
 static void
 zfs_rele_async_task(void *arg)
 {
 	iput(arg);
 }
 
 void
 zfs_zrele_async(znode_t *zp)
 {
 	struct inode *ip = ZTOI(zp);
 	objset_t *os = ITOZSB(ip)->z_os;
 
 	ASSERT(atomic_read(&ip->i_count) > 0);
 	ASSERT(os != NULL);
 
 	/*
 	 * If decrementing the count would put us at 0, we can't do it inline
 	 * here, because that would be synchronous. Instead, dispatch an iput
 	 * to run later.
 	 *
 	 * For more information on the dangers of a synchronous iput, see the
 	 * header comment of this file.
 	 */
 	if (!atomic_add_unless(&ip->i_count, -1, 1)) {
 		VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
 		    zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
 	}
 }
 
 
 /*
  * Lookup an entry in a directory, or an extended attribute directory.
  * If it exists, return a held inode reference for it.
  *
  *	IN:	zdp	- znode of directory to search.
  *		nm	- name of entry to lookup.
  *		flags	- LOOKUP_XATTR set if looking for an attribute.
  *		cr	- credentials of caller.
  *		direntflags - directory lookup flags
  *		realpnp - returned pathname.
  *
  *	OUT:	zpp	- znode of located entry, NULL if not found.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	NA
  */
 int
 zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
     int *direntflags, pathname_t *realpnp)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zdp);
 	int error = 0;
 
 	/*
 	 * Fast path lookup, however we must skip DNLC lookup
 	 * for case folding or normalizing lookups because the
 	 * DNLC code only stores the passed in name.  This means
 	 * creating 'a' and removing 'A' on a case insensitive
 	 * file system would work, but DNLC still thinks 'a'
 	 * exists and won't let you create it again on the next
 	 * pass through fast path.
 	 */
 	if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
 
 		if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
 			return (SET_ERROR(ENOTDIR));
 		} else if (zdp->z_sa_hdl == NULL) {
 			return (SET_ERROR(EIO));
 		}
 
 		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
 			error = zfs_fastaccesschk_execute(zdp, cr);
 			if (!error) {
 				*zpp = zdp;
 				zhold(*zpp);
 				return (0);
 			}
 			return (error);
 		}
 	}
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
 		return (error);
 
 	*zpp = NULL;
 
 	if (flags & LOOKUP_XATTR) {
 		/*
 		 * We don't allow recursive attributes..
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
 			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 
 		/*
 		 * Do we have permission to get into attribute directory?
 		 */
 
 		if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
 		    B_TRUE, cr, zfs_init_idmap))) {
 			zrele(*zpp);
 			*zpp = NULL;
 		}
 
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOTDIR));
 	}
 
 	/*
 	 * Check accessibility of directory.
 	 */
 
 	if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
 	    zfs_init_idmap))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
 	if ((error == 0) && (*zpp))
 		zfs_znode_update_vfs(*zpp);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Attempt to create a new entry in a directory.  If the entry
  * already exists, truncate the file if permissible, else return
  * an error.  Return the ip of the created or trunc'd file.
  *
  *	IN:	dzp	- znode of directory to put new file entry in.
  *		name	- name of new file entry.
  *		vap	- attributes of new file.
  *		excl	- flag indicating exclusive or non-exclusive mode.
  *		mode	- mode to open file with.
  *		cr	- credentials of caller.
  *		flag	- file flag.
  *		vsecp	- ACL to be set
  *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- znode of created or trunc'd entry.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dzp - ctime|mtime updated if new entry created
  *	 zp - ctime|mtime always, atime if new
  */
 int
 zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
     int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
     zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	objset_t	*os;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid;
 	gid_t		gid;
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	boolean_t	have_acl = B_FALSE;
 	boolean_t	waited = B_FALSE;
 	boolean_t	skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	gid = crgetgid(cr);
 	uid = crgetuid(cr);
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 top:
 	*zpp = NULL;
 	if (*name == '\0') {
 		/*
 		 * Null component name refers to the directory itself.
 		 */
 		zhold(dzp);
 		zp = dzp;
 		dl = NULL;
 		error = 0;
 	} else {
 		/* possible igrab(zp) */
 		int zflg = 0;
 
 		if (flag & FIGNORECASE)
 			zflg |= ZCILOOK;
 
 		error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 		    NULL, NULL);
 		if (error) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			if (strcmp(name, "..") == 0)
 				error = SET_ERROR(EISDIR);
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if (zp == NULL) {
 		uint64_t txtype;
 		uint64_t projid = ZFS_DEFAULT_PROJID;
 
 		/*
 		 * Create a new file object and update the directory
 		 * to reference it.
 		 */
 		if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr,
 		    mnt_ns))) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			goto out;
 		}
 
 		/*
 		 * We only support the creation of regular files in
 		 * extended attribute directories.
 		 */
 
 		if ((dzp->z_pflags & ZFS_XATTR) && !S_ISREG(vap->va_mode)) {
 			if (have_acl)
 				zfs_acl_ids_free(&acl_ids);
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 
 		if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
 		    cr, vsecp, &acl_ids, mnt_ns)) != 0)
 			goto out;
 		have_acl = B_TRUE;
 
 		if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
 			projid = zfs_inherit_projid(dzp);
 		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
 			zfs_acl_ids_free(&acl_ids);
 			error = SET_ERROR(EDQUOT);
 			goto out;
 		}
 
 		tx = dmu_tx_create(os);
 
 		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 		    ZFS_SA_BASE_ATTR_SIZE);
 
 		fuid_dirtied = zfsvfs->z_fuid_dirty;
 		if (fuid_dirtied)
 			zfs_fuid_txhold(zfsvfs, tx);
 		dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 		dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 		if (!zfsvfs->z_use_sa &&
 		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 
 		error = dmu_tx_assign(tx,
 		    (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 		if (error) {
 			zfs_dirent_unlock(dl);
 			if (error == ERESTART) {
 				waited = B_TRUE;
 				dmu_tx_wait(tx);
 				dmu_tx_abort(tx);
 				goto top;
 			}
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_abort(tx);
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 		zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 		error = zfs_link_create(dl, zp, tx, ZNEW);
 		if (error != 0) {
 			/*
 			 * Since, we failed to add the directory entry for it,
 			 * delete the newly created dnode.
 			 */
 			zfs_znode_delete(zp, tx);
 			remove_inode_hash(ZTOI(zp));
 			zfs_acl_ids_free(&acl_ids);
 			dmu_tx_commit(tx);
 			goto out;
 		}
 
 		if (fuid_dirtied)
 			zfs_fuid_sync(zfsvfs, tx);
 
 		txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 		if (flag & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_create(zilog, tx, txtype, dzp, zp, name,
 		    vsecp, acl_ids.z_fuidp, vap);
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_commit(tx);
 	} else {
 		int aflags = (flag & O_APPEND) ? V_APPEND : 0;
 
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
 
 		/*
 		 * A directory entry already exists for this name.
 		 */
 		/*
 		 * Can't truncate an existing file if in exclusive mode.
 		 */
 		if (excl) {
 			error = SET_ERROR(EEXIST);
 			goto out;
 		}
 		/*
 		 * Can't open a directory for writing.
 		 */
 		if (S_ISDIR(ZTOI(zp)->i_mode)) {
 			error = SET_ERROR(EISDIR);
 			goto out;
 		}
 		/*
 		 * Verify requested access to file.
 		 */
 		if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr,
 		    mnt_ns))) {
 			goto out;
 		}
 
 		mutex_enter(&dzp->z_lock);
 		dzp->z_seq++;
 		mutex_exit(&dzp->z_lock);
 
 		/*
 		 * Truncate regular files if requested.
 		 */
 		if (S_ISREG(ZTOI(zp)->i_mode) &&
 		    (vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
 			/* we can't hold any locks when calling zfs_freesp() */
 			if (dl) {
 				zfs_dirent_unlock(dl);
 				dl = NULL;
 			}
 			error = zfs_freesp(zp, 0, 0, mode, TRUE);
 		}
 	}
 out:
 
 	if (dl)
 		zfs_dirent_unlock(dl);
 
 	if (error) {
 		if (zp)
 			zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 		*zpp = zp;
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 int
 zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
     int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp,
     zidmap_t *mnt_ns)
 {
 	(void) excl, (void) mode, (void) flag;
 	znode_t		*zp = NULL, *dzp = ITOZ(dip);
 	zfsvfs_t	*zfsvfs = ITOZSB(dip);
 	objset_t	*os;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid;
 	gid_t		gid;
 	zfs_acl_ids_t   acl_ids;
 	uint64_t	projid = ZFS_DEFAULT_PROJID;
 	boolean_t	fuid_dirtied;
 	boolean_t	have_acl = B_FALSE;
 	boolean_t	waited = B_FALSE;
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	gid = crgetgid(cr);
 	uid = crgetuid(cr);
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	os = zfsvfs->z_os;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 top:
 	*ipp = NULL;
 
 	/*
 	 * Create a new file object and update the directory
 	 * to reference it.
 	 */
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
 		if (have_acl)
 			zfs_acl_ids_free(&acl_ids);
 		goto out;
 	}
 
 	if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
 	    cr, vsecp, &acl_ids, mnt_ns)) != 0)
 		goto out;
 	have_acl = B_TRUE;
 
 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
 		projid = zfs_inherit_projid(dzp);
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
 		zfs_acl_ids_free(&acl_ids);
 		error = SET_ERROR(EDQUOT);
 		goto out;
 	}
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa &&
 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, acl_ids.z_aclp->z_acl_bytes);
 	}
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	/* Add to unlinked set */
 	zp->z_unlinked = B_TRUE;
 	zfs_unlinked_add(zp, tx);
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
 out:
 
 	if (error) {
 		if (zp)
 			zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 		*ipp = ZTOI(zp);
 	}
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Remove an entry from a directory.
  *
  *	IN:	dzp	- znode of directory to remove entry from.
  *		name	- name of entry to remove.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	dzp - ctime|mtime
  *	 ip - ctime (if nlink > 0)
  */
 
 static uint64_t null_xattr = 0;
 
 int
 zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
 {
 	znode_t		*zp;
 	znode_t		*xzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	uint64_t	acl_obj, xattr_obj;
 	uint64_t	xattr_obj_unlinked = 0;
 	uint64_t	obj = 0;
 	uint64_t	links;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	boolean_t	may_delete_now, delete_now = FALSE;
 	boolean_t	unlinked, toobig = FALSE;
 	uint64_t	txtype;
 	pathname_t	*realnmp = NULL;
 	pathname_t	realnm;
 	int		error;
 	int		zflg = ZEXISTS;
 	boolean_t	waited = B_FALSE;
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE) {
 		zflg |= ZCILOOK;
 		pn_alloc(&realnm);
 		realnmp = &realnm;
 	}
 
 top:
 	xattr_obj = 0;
 	xzp = NULL;
 	/*
 	 * Attempt to lock directory; fail if entry doesn't exist.
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, realnmp))) {
 		if (realnmp)
 			pn_free(realnmp);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
 		goto out;
 	}
 
 	/*
 	 * Need to use rmdir for removing directories.
 	 */
 	if (S_ISDIR(ZTOI(zp)->i_mode)) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 	mutex_enter(&zp->z_lock);
 	may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
 	    !zn_has_cached_data(zp, 0, LLONG_MAX);
 	mutex_exit(&zp->z_lock);
 
 	/*
 	 * We may delete the znode now, or we may put it in the unlinked set;
 	 * it depends on whether we're the last link, and on whether there are
 	 * other holds on the inode.  So we dmu_tx_hold() the right things to
 	 * allow for either case.
 	 */
 	obj = zp->z_id;
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	if (may_delete_now) {
 		toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
 		/* if the file is too big, only hold_free a token amount */
 		dmu_tx_hold_free(tx, zp->z_id, 0,
 		    (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
 	}
 
 	/* are there any extended attributes? */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 	    &xattr_obj, sizeof (xattr_obj));
 	if (error == 0 && xattr_obj) {
 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 		ASSERT0(error);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
 	mutex_enter(&zp->z_lock);
 	if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
 		dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 	mutex_exit(&zp->z_lock);
 
 	/* charge as an update -- would be nice not to charge at all */
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	/*
 	 * Mark this transaction as typically resulting in a net free of space
 	 */
 	dmu_tx_mark_netfree(tx);
 
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(zp);
 			if (xzp)
 				zrele(xzp);
 			goto top;
 		}
 		if (realnmp)
 			pn_free(realnmp);
 		dmu_tx_abort(tx);
 		zrele(zp);
 		if (xzp)
 			zrele(xzp);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * Remove the directory entry.
 	 */
 	error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		goto out;
 	}
 
 	if (unlinked) {
 		/*
 		 * Hold z_lock so that we can make sure that the ACL obj
 		 * hasn't changed.  Could have been deleted due to
 		 * zfs_sa_upgrade().
 		 */
 		mutex_enter(&zp->z_lock);
 		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
 		delete_now = may_delete_now && !toobig &&
 		    atomic_read(&ZTOI(zp)->i_count) == 1 &&
 		    !zn_has_cached_data(zp, 0, LLONG_MAX) &&
 		    xattr_obj == xattr_obj_unlinked &&
 		    zfs_external_acl(zp) == acl_obj;
 		VERIFY_IMPLY(xattr_obj_unlinked, xzp);
 	}
 
 	if (delete_now) {
 		if (xattr_obj_unlinked) {
 			ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
 			mutex_enter(&xzp->z_lock);
 			xzp->z_unlinked = B_TRUE;
 			clear_nlink(ZTOI(xzp));
 			links = 0;
 			error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
 			    &links, sizeof (links), tx);
 			ASSERT3U(error,  ==,  0);
 			mutex_exit(&xzp->z_lock);
 			zfs_unlinked_add(xzp, tx);
 
 			if (zp->z_is_sa)
 				error = sa_remove(zp->z_sa_hdl,
 				    SA_ZPL_XATTR(zfsvfs), tx);
 			else
 				error = sa_update(zp->z_sa_hdl,
 				    SA_ZPL_XATTR(zfsvfs), &null_xattr,
 				    sizeof (uint64_t), tx);
 			ASSERT0(error);
 		}
 		/*
 		 * Add to the unlinked set because a new reference could be
 		 * taken concurrently resulting in a deferred destruction.
 		 */
 		zfs_unlinked_add(zp, tx);
 		mutex_exit(&zp->z_lock);
 	} else if (unlinked) {
 		mutex_exit(&zp->z_lock);
 		zfs_unlinked_add(zp, tx);
 	}
 
 	txtype = TX_REMOVE;
 	if (flags & FIGNORECASE)
 		txtype |= TX_CI;
 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
 
 	dmu_tx_commit(tx);
 out:
 	if (realnmp)
 		pn_free(realnmp);
 
 	zfs_dirent_unlock(dl);
 	zfs_znode_update_vfs(dzp);
 	zfs_znode_update_vfs(zp);
 
 	if (delete_now)
 		zrele(zp);
 	else
 		zfs_zrele_async(zp);
 
 	if (xzp) {
 		zfs_znode_update_vfs(xzp);
 		zfs_zrele_async(xzp);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Create a new directory and insert it into dzp using the name
  * provided.  Return a pointer to the inserted directory.
  *
  *	IN:	dzp	- znode of directory to add subdir to.
  *		dirname	- name of new directory.
  *		vap	- attributes of new directory.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *		vsecp	- ACL to be set
  *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- znode of created directory.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	dzp - ctime|mtime updated
  *	zpp - ctime|mtime|atime updated
  */
 int
 zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
     cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	uint64_t	txtype;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zf = ZNEW;
 	uid_t		uid;
 	gid_t		gid = crgetgid(cr);
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	boolean_t	waited = B_FALSE;
 
 	ASSERT(S_ISDIR(vap->va_mode));
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 
 	uid = crgetuid(cr);
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	if (dirname == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
 
 	if (vap->va_mask & ATTR_XVATTR) {
 		if ((error = secpolicy_xvattr((xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_mode)) != 0) {
 			zfs_exit(zfsvfs, FTAG);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    vsecp, &acl_ids, mnt_ns)) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/*
 	 * First make sure the new directory doesn't exist.
 	 *
 	 * Existence is checked first to make sure we don't return
 	 * EACCES instead of EEXIST which can cause some applications
 	 * to fail.
 	 */
 top:
 	*zpp = NULL;
 
 	if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
 	    NULL, NULL))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
 	    mnt_ns))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	/*
 	 * Add a new entry to the directory.
 	 */
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * Create new node.
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	/*
 	 * Now put new name in parent dir.
 	 */
 	error = zfs_link_create(dl, zp, tx, ZNEW);
 	if (error != 0) {
 		zfs_znode_delete(zp, tx);
 		remove_inode_hash(ZTOI(zp));
 		goto out;
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	*zpp = zp;
 
 	txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
 	if (flags & FIGNORECASE)
 		txtype |= TX_CI;
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
 	    acl_ids.z_fuidp, vap);
 
 out:
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	if (error != 0) {
 		zrele(zp);
 	} else {
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Remove a directory subdir entry.  If the current working
  * directory is the same as the subdir to be removed, the
  * remove will fail.
  *
  *	IN:	dzp	- znode of directory to remove from.
  *		name	- name of directory to be removed.
  *		cwd	- inode of current working directory.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dzp - ctime|mtime updated
  */
 int
 zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
     int flags)
 {
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zflg = ZEXISTS;
 	boolean_t	waited = B_FALSE;
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 top:
 	zp = NULL;
 
 	/*
 	 * Attempt to lock directory; fail if entry doesn't exist.
 	 */
 	if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
 	    NULL, NULL))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
 		goto out;
 	}
 
 	if (!S_ISDIR(ZTOI(zp)->i_mode)) {
 		error = SET_ERROR(ENOTDIR);
 		goto out;
 	}
 
 	if (zp == cwd) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	/*
 	 * Grab a lock on the directory to make sure that no one is
 	 * trying to add (or lookup) entries while we are removing it.
 	 */
 	rw_enter(&zp->z_name_lock, RW_WRITER);
 
 	/*
 	 * Grab a lock on the parent pointer to make sure we play well
 	 * with the treewalk and directory rename code.
 	 */
 	rw_enter(&zp->z_parent_lock, RW_WRITER);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		rw_exit(&zp->z_parent_lock);
 		rw_exit(&zp->z_name_lock);
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(zp);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zrele(zp);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
 
 	if (error == 0) {
 		uint64_t txtype = TX_RMDIR;
 		if (flags & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT,
 		    B_FALSE);
 	}
 
 	dmu_tx_commit(tx);
 
 	rw_exit(&zp->z_parent_lock);
 	rw_exit(&zp->z_name_lock);
 out:
 	zfs_dirent_unlock(dl);
 
 	zfs_znode_update_vfs(dzp);
 	zfs_znode_update_vfs(zp);
 	zrele(zp);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Read directory entries from the given directory cursor position and emit
  * name and position for each entry.
  *
  *	IN:	ip	- inode of directory to read.
  *		ctx	- directory entry context.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - atime updated
  *
  * Note that the low 4 bits of the cookie returned by zap is always zero.
  * This allows us to use the low range for "special" directory entries:
  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
  * we use the offset 2 for the '.zfs' directory.
  */
 int
 zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 {
 	(void) cr;
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	objset_t	*os;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	int		error;
 	uint8_t		prefetch;
 	uint8_t		type;
 	int		done = 0;
 	uint64_t	parent;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0)
 		goto out;
 
 	/*
 	 * Quit if directory has been removed (posix)
 	 */
 	if (zp->z_unlinked)
 		goto out;
 
 	error = 0;
 	os = zfsvfs->z_os;
 	offset = ctx->pos;
 	prefetch = zp->z_zn_prefetch;
 
 	/*
 	 * Initialize the iterator cursor.
 	 */
 	if (offset <= 3) {
 		/*
 		 * Start iteration from the beginning of the directory.
 		 */
 		zap_cursor_init(&zc, os, zp->z_id);
 	} else {
 		/*
 		 * The offset is a serialized cursor.
 		 */
 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
 	}
 
 	/*
 	 * Transform to file-system independent format
 	 */
 	while (!done) {
 		uint64_t objnum;
 		/*
 		 * Special case `.', `..', and `.zfs'.
 		 */
 		if (offset == 0) {
 			(void) strcpy(zap.za_name, ".");
 			zap.za_normalization_conflict = 0;
 			objnum = zp->z_id;
 			type = DT_DIR;
 		} else if (offset == 1) {
 			(void) strcpy(zap.za_name, "..");
 			zap.za_normalization_conflict = 0;
 			objnum = parent;
 			type = DT_DIR;
 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
 			zap.za_normalization_conflict = 0;
 			objnum = ZFSCTL_INO_ROOT;
 			type = DT_DIR;
 		} else {
 			/*
 			 * Grab next entry.
 			 */
 			if ((error = zap_cursor_retrieve(&zc, &zap))) {
 				if (error == ENOENT)
 					break;
 				else
 					goto update;
 			}
 
 			/*
 			 * Allow multiple entries provided the first entry is
 			 * the object id.  Non-zpl consumers may safely make
 			 * use of the additional space.
 			 *
 			 * XXX: This should be a feature flag for compatibility
 			 */
 			if (zap.za_integer_length != 8 ||
 			    zap.za_num_integers == 0) {
 				cmn_err(CE_WARN, "zap_readdir: bad directory "
 				    "entry, obj = %lld, offset = %lld, "
 				    "length = %d, num = %lld\n",
 				    (u_longlong_t)zp->z_id,
 				    (u_longlong_t)offset,
 				    zap.za_integer_length,
 				    (u_longlong_t)zap.za_num_integers);
 				error = SET_ERROR(ENXIO);
 				goto update;
 			}
 
 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 		}
 
 		done = !zpl_dir_emit(ctx, zap.za_name, strlen(zap.za_name),
 		    objnum, type);
 		if (done)
 			break;
 
 		if (prefetch)
 			dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
 
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */
 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
 			zap_cursor_advance(&zc);
 			offset = zap_cursor_serialize(&zc);
 		} else {
 			offset += 1;
 		}
 		ctx->pos = offset;
 	}
 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
 
 update:
 	zap_cursor_fini(&zc);
 	if (error == ENOENT)
 		error = 0;
 out:
 	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * Get the basic file attributes and place them in the provided kstat
  * structure.  The inode is assumed to be the authoritative source
  * for most of the attributes.  However, the znode currently has the
  * authoritative atime, blksize, and block count.
  *
  *	IN:	ip	- inode of file.
  *
  *	OUT:	sp	- kstat values.
  *
  *	RETURN:	0 (always succeeds)
  */
 int
 #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
 zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
     struct kstat *sp)
 #else
 zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
 #endif
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint32_t blksize;
 	u_longlong_t nblocks;
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	mutex_enter(&zp->z_lock);
 
 #ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
 	zpl_generic_fillattr(user_ns, request_mask, ip, sp);
 #else
 	zpl_generic_fillattr(user_ns, ip, sp);
 #endif
 	/*
 	 * +1 link count for root inode with visible '.zfs' directory.
 	 */
 	if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
 		if (sp->nlink < ZFS_LINK_MAX)
 			sp->nlink++;
 
 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
 	sp->blksize = blksize;
 	sp->blocks = nblocks;
 
 	if (unlikely(zp->z_blksz == 0)) {
 		/*
 		 * Block size hasn't been set; suggest maximal I/O transfers.
 		 */
 		sp->blksize = zfsvfs->z_max_blksz;
 	}
 
 	mutex_exit(&zp->z_lock);
 
 	/*
 	 * Required to prevent NFS client from detecting different inode
 	 * numbers of snapshot root dentry before and after snapshot mount.
 	 */
 	if (zfsvfs->z_issnap) {
 		if (ip->i_sb->s_root->d_inode == ip)
 			sp->ino = ZFSCTL_INO_SNAPDIRS -
 			    dmu_objset_id(zfsvfs->z_os);
 	}
 
 	zfs_exit(zfsvfs, FTAG);
 
 	return (0);
 }
 
 /*
  * For the operation of changing file's user/group/project, we need to
  * handle not only the main object that is assigned to the file directly,
  * but also the ones that are used by the file via hidden xattr directory.
  *
  * Because the xattr directory may contains many EA entries, as to it may
  * be impossible to change all of them via the transaction of changing the
  * main object's user/group/project attributes. Then we have to change them
  * via other multiple independent transactions one by one. It may be not good
  * solution, but we have no better idea yet.
  */
 static int
 zfs_setattr_dir(znode_t *dzp)
 {
 	struct inode	*dxip = ZTOI(dzp);
 	struct inode	*xip = NULL;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	objset_t	*os = zfsvfs->z_os;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	zfs_dirlock_t	*dl;
 	znode_t		*zp = NULL;
 	dmu_tx_t	*tx = NULL;
 	uint64_t	uid, gid;
 	sa_bulk_attr_t	bulk[4];
 	int		count;
 	int		err;
 
 	zap_cursor_init(&zc, os, dzp->z_id);
 	while ((err = zap_cursor_retrieve(&zc, &zap)) == 0) {
 		count = 0;
 		if (zap.za_integer_length != 8 || zap.za_num_integers != 1) {
 			err = ENXIO;
 			break;
 		}
 
 		err = zfs_dirent_lock(&dl, dzp, (char *)zap.za_name, &zp,
 		    ZEXISTS, NULL, NULL);
 		if (err == ENOENT)
 			goto next;
 		if (err)
 			break;
 
 		xip = ZTOI(zp);
 		if (KUID_TO_SUID(xip->i_uid) == KUID_TO_SUID(dxip->i_uid) &&
 		    KGID_TO_SGID(xip->i_gid) == KGID_TO_SGID(dxip->i_gid) &&
 		    zp->z_projid == dzp->z_projid)
 			goto next;
 
 		tx = dmu_tx_create(os);
 		if (!(zp->z_pflags & ZFS_PROJID))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 
 		err = dmu_tx_assign(tx, TXG_WAIT);
 		if (err)
 			break;
 
 		mutex_enter(&dzp->z_lock);
 
 		if (KUID_TO_SUID(xip->i_uid) != KUID_TO_SUID(dxip->i_uid)) {
 			xip->i_uid = dxip->i_uid;
 			uid = zfs_uid_read(dxip);
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &uid, sizeof (uid));
 		}
 
 		if (KGID_TO_SGID(xip->i_gid) != KGID_TO_SGID(dxip->i_gid)) {
 			xip->i_gid = dxip->i_gid;
 			gid = zfs_gid_read(dxip);
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
 			    &gid, sizeof (gid));
 		}
 
-		if (zp->z_projid != dzp->z_projid) {
+
+		uint64_t projid = dzp->z_projid;
+		if (zp->z_projid != projid) {
 			if (!(zp->z_pflags & ZFS_PROJID)) {
-				zp->z_pflags |= ZFS_PROJID;
-				SA_ADD_BULK_ATTR(bulk, count,
-				    SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags,
-				    sizeof (zp->z_pflags));
+				err = sa_add_projid(zp->z_sa_hdl, tx, projid);
+				if (unlikely(err == EEXIST)) {
+					err = 0;
+				} else if (err != 0) {
+					goto sa_add_projid_err;
+				} else {
+					projid = ZFS_INVALID_PROJID;
+				}
 			}
 
-			zp->z_projid = dzp->z_projid;
-			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PROJID(zfsvfs),
-			    NULL, &zp->z_projid, sizeof (zp->z_projid));
+			if (projid != ZFS_INVALID_PROJID) {
+				zp->z_projid = projid;
+				SA_ADD_BULK_ATTR(bulk, count,
+				    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
+				    sizeof (zp->z_projid));
+			}
 		}
 
+sa_add_projid_err:
 		mutex_exit(&dzp->z_lock);
 
 		if (likely(count > 0)) {
 			err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 			dmu_tx_commit(tx);
+		} else if (projid == ZFS_INVALID_PROJID) {
+			dmu_tx_commit(tx);
 		} else {
 			dmu_tx_abort(tx);
 		}
 		tx = NULL;
 		if (err != 0 && err != ENOENT)
 			break;
 
 next:
 		if (zp) {
 			zrele(zp);
 			zp = NULL;
 			zfs_dirent_unlock(dl);
 		}
 		zap_cursor_advance(&zc);
 	}
 
 	if (tx)
 		dmu_tx_abort(tx);
 	if (zp) {
 		zrele(zp);
 		zfs_dirent_unlock(dl);
 	}
 	zap_cursor_fini(&zc);
 
 	return (err == ENOENT ? 0 : err);
 }
 
 /*
  * Set the file attributes to the values contained in the
  * vattr structure.
  *
  *	IN:	zp	- znode of file to be modified.
  *		vap	- new attribute values.
  *			  If ATTR_XVATTR set, then optional attrs are being set
  *		flags	- ATTR_UTIME set if non-default time values provided.
  *			- ATTR_NOACLCHECK (CIFS context only).
  *		cr	- credentials of caller.
  *		mnt_ns	- user namespace of the mount
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - ctime updated, mtime updated if size changed.
  */
 int
 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
 {
 	struct inode	*ip;
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	objset_t	*os;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	vattr_t		oldva;
 	xvattr_t	*tmpxvattr;
 	uint_t		mask = vap->va_mask;
 	uint_t		saved_mask = 0;
 	int		trim_mask = 0;
 	uint64_t	new_mode;
 	uint64_t	new_kuid = 0, new_kgid = 0, new_uid, new_gid;
 	uint64_t	xattr_obj;
 	uint64_t	mtime[2], ctime[2], atime[2];
 	uint64_t	projid = ZFS_INVALID_PROJID;
 	znode_t		*attrzp;
 	int		need_policy = FALSE;
 	int		err, err2 = 0;
 	zfs_fuid_info_t *fuidp = NULL;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t	*xoap;
 	zfs_acl_t	*aclp;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	boolean_t	fuid_dirtied = B_FALSE;
 	boolean_t	handle_eadir = B_FALSE;
 	sa_bulk_attr_t	*bulk, *xattr_bulk;
 	int		count = 0, xattr_count = 0, bulks = 8;
 
 	if (mask == 0)
 		return (0);
 
 	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (err);
 	ip = ZTOI(zp);
 	os = zfsvfs->z_os;
 
 	/*
 	 * If this is a xvattr_t, then get a pointer to the structure of
 	 * optional attributes.  If this is NULL, then we have a vattr_t.
 	 */
 	xoap = xva_getxoptattr(xvap);
 	if (xoap != NULL && (mask & ATTR_XVATTR)) {
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
 				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
 				zfs_exit(zfsvfs, FTAG);
 				return (SET_ERROR(EINVAL));
 			}
 
 			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
 				projid = ZFS_INVALID_PROJID;
 			else
 				need_policy = TRUE;
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
 		    (xoap->xoa_projinherit !=
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
 			zfs_exit(zfsvfs, FTAG);
 			return (SET_ERROR(ENOTSUP));
 		}
 	}
 
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
 	 * that file system is at proper version level
 	 */
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & ATTR_XVATTR))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	tmpxvattr = kmem_alloc(sizeof (xvattr_t), KM_SLEEP);
 	xva_init(tmpxvattr);
 
 	bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
 	xattr_bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * bulks, KM_SLEEP);
 
 	/*
 	 * Immutable files can only alter immutable bit and atime
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (ATTR_SIZE|ATTR_UID|ATTR_GID|ATTR_MTIME|ATTR_MODE)) ||
 	    ((mask & ATTR_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		err = SET_ERROR(EPERM);
 		goto out3;
 	}
 
 	if ((mask & ATTR_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
 		err = SET_ERROR(EPERM);
 		goto out3;
 	}
 
 	/*
 	 * Verify timestamps doesn't overflow 32 bits.
 	 * ZFS can handle large timestamps, but 32bit syscalls can't
 	 * handle times greater than 2039.  This check should be removed
 	 * once large timestamps are fully supported.
 	 */
 	if (mask & (ATTR_ATIME | ATTR_MTIME)) {
 		if (((mask & ATTR_ATIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & ATTR_MTIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_mtime))) {
 			err = SET_ERROR(EOVERFLOW);
 			goto out3;
 		}
 	}
 
 top:
 	attrzp = NULL;
 	aclp = NULL;
 
 	/* Can this be moved to before the top label? */
 	if (zfs_is_readonly(zfsvfs)) {
 		err = SET_ERROR(EROFS);
 		goto out3;
 	}
 
 	/*
 	 * First validate permissions
 	 */
 
 	if (mask & ATTR_SIZE) {
 		err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr,
 		    mnt_ns);
 		if (err)
 			goto out3;
 
 		/*
 		 * XXX - Note, we are not providing any open
 		 * mode flags here (like FNDELAY), so we may
 		 * block if there are locks present... this
 		 * should be addressed in openat().
 		 */
 		/* XXX - would it be OK to generate a log record here? */
 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
 		if (err)
 			goto out3;
 	}
 
 	if (mask & (ATTR_ATIME|ATTR_MTIME) ||
 	    ((mask & ATTR_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
 		    skipaclchk, cr, mnt_ns);
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID)) {
 		int	idmask = (mask & (ATTR_UID|ATTR_GID));
 		int	take_owner;
 		int	take_group;
 		uid_t	uid;
 		gid_t	gid;
 
 		/*
 		 * NOTE: even if a new mode is being set,
 		 * we may clear S_ISUID/S_ISGID bits.
 		 */
 
 		if (!(mask & ATTR_MODE))
 			vap->va_mode = zp->z_mode;
 
 		/*
 		 * Take ownership or chgrp to group we are a member of
 		 */
 
 		uid = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ip),
 		    vap->va_uid);
 		gid = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ip),
 		    vap->va_gid);
 		take_owner = (mask & ATTR_UID) && (uid == crgetuid(cr));
 		take_group = (mask & ATTR_GID) &&
 		    zfs_groupmember(zfsvfs, gid, cr);
 
 		/*
 		 * If both ATTR_UID and ATTR_GID are set then take_owner and
 		 * take_group must both be set in order to allow taking
 		 * ownership.
 		 *
 		 * Otherwise, send the check through secpolicy_vnode_setattr()
 		 *
 		 */
 
 		if (((idmask == (ATTR_UID|ATTR_GID)) &&
 		    take_owner && take_group) ||
 		    ((idmask == ATTR_UID) && take_owner) ||
 		    ((idmask == ATTR_GID) && take_group)) {
 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
 			    skipaclchk, cr, mnt_ns) == 0) {
 				/*
 				 * Remove setuid/setgid for non-privileged users
 				 */
 				(void) secpolicy_setid_clear(vap, cr);
 				trim_mask = (mask & (ATTR_UID|ATTR_GID));
 			} else {
 				need_policy =  TRUE;
 			}
 		} else {
 			need_policy =  TRUE;
 		}
 	}
 
 	mutex_enter(&zp->z_lock);
 	oldva.va_mode = zp->z_mode;
 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
 	if (mask & ATTR_XVATTR) {
 		/*
 		 * Update xvattr mask to include only those attributes
 		 * that are actually changing.
 		 *
 		 * the bits will be restored prior to actually setting
 		 * the attributes so the caller thinks they were set.
 		 */
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			if (xoap->xoa_appendonly !=
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
 				XVA_SET_REQ(tmpxvattr, XAT_APPENDONLY);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
 			if (xoap->xoa_projinherit !=
 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
 				XVA_SET_REQ(tmpxvattr, XAT_PROJINHERIT);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			if (xoap->xoa_nounlink !=
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
 				XVA_SET_REQ(tmpxvattr, XAT_NOUNLINK);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			if (xoap->xoa_immutable !=
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
 				XVA_SET_REQ(tmpxvattr, XAT_IMMUTABLE);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			if (xoap->xoa_nodump !=
 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NODUMP);
 				XVA_SET_REQ(tmpxvattr, XAT_NODUMP);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			if (xoap->xoa_av_modified !=
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
 				XVA_SET_REQ(tmpxvattr, XAT_AV_MODIFIED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			if ((!S_ISREG(ip->i_mode) &&
 			    xoap->xoa_av_quarantined) ||
 			    xoap->xoa_av_quarantined !=
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
 				XVA_SET_REQ(tmpxvattr, XAT_AV_QUARANTINED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			mutex_exit(&zp->z_lock);
 			err = SET_ERROR(EPERM);
 			goto out3;
 		}
 
 		if (need_policy == FALSE &&
 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
 			need_policy = TRUE;
 		}
 	}
 
 	mutex_exit(&zp->z_lock);
 
 	if (mask & ATTR_MODE) {
 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
 		    mnt_ns) == 0) {
 			err = secpolicy_setid_setsticky_clear(ip, vap,
 			    &oldva, cr, mnt_ns, zfs_i_user_ns(ip));
 			if (err)
 				goto out3;
 			trim_mask |= ATTR_MODE;
 		} else {
 			need_policy = TRUE;
 		}
 	}
 
 	if (need_policy) {
 		/*
 		 * If trim_mask is set then take ownership
 		 * has been granted or write_acl is present and user
 		 * has the ability to modify mode.  In that case remove
 		 * UID|GID and or MODE from mask so that
 		 * secpolicy_vnode_setattr() doesn't revoke it.
 		 */
 
 		if (trim_mask) {
 			saved_mask = vap->va_mask;
 			vap->va_mask &= ~trim_mask;
 		}
 		err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
 		    zfs_zaccess_unix, zp);
 		if (err)
 			goto out3;
 
 		if (trim_mask)
 			vap->va_mask |= saved_mask;
 	}
 
 	/*
 	 * secpolicy_vnode_setattr, or take ownership may have
 	 * changed va_mask
 	 */
 	mask = vap->va_mask;
 
 	if ((mask & (ATTR_UID | ATTR_GID)) || projid != ZFS_INVALID_PROJID) {
 		handle_eadir = B_TRUE;
 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj, sizeof (xattr_obj));
 
 		if (err == 0 && xattr_obj) {
 			err = zfs_zget(ZTOZSB(zp), xattr_obj, &attrzp);
 			if (err)
 				goto out2;
 		}
 		if (mask & ATTR_UID) {
 			new_kuid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
 			if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
 			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
 			    new_kuid)) {
 				if (attrzp)
 					zrele(attrzp);
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (mask & ATTR_GID) {
 			new_kgid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
 			if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
 			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 			    new_kgid)) {
 				if (attrzp)
 					zrele(attrzp);
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (projid != ZFS_INVALID_PROJID &&
 		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
 			if (attrzp)
 				zrele(attrzp);
 			err = EDQUOT;
 			goto out2;
 		}
 	}
 	tx = dmu_tx_create(os);
 
 	if (mask & ATTR_MODE) {
 		uint64_t pmode = zp->z_mode;
 		uint64_t acl_obj;
 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
 
 		if (ZTOZSB(zp)->z_acl_mode == ZFS_ACL_RESTRICTED &&
 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
 			err = EPERM;
 			goto out;
 		}
 
 		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
 			goto out;
 
 		mutex_enter(&zp->z_lock);
 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
 			/*
 			 * Are we upgrading ACL from old V0 format
 			 * to V1 format?
 			 */
 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 			    zfs_znode_acl_version(zp) ==
 			    ZFS_ACL_VERSION_INITIAL) {
 				dmu_tx_hold_free(tx, acl_obj, 0,
 				    DMU_OBJECT_END);
 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 				    0, aclp->z_acl_bytes);
 			} else {
 				dmu_tx_hold_write(tx, acl_obj, 0,
 				    aclp->z_acl_bytes);
 			}
 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, aclp->z_acl_bytes);
 		}
 		mutex_exit(&zp->z_lock);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	} else {
 		if (((mask & ATTR_XVATTR) &&
 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
 		    (projid != ZFS_INVALID_PROJID &&
 		    !(zp->z_pflags & ZFS_PROJID)))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	}
 
 	if (attrzp) {
 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
 	}
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err)
 		goto out;
 
 	count = 0;
 	/*
 	 * Set each attribute requested.
 	 * We group settings according to the locks they need to acquire.
 	 *
 	 * Note: you cannot set ctime directly, although it will be
 	 * updated as a side-effect of calling this function.
 	 */
 
 	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
 		/*
 		 * For the existed object that is upgraded from old system,
 		 * its on-disk layout has no slot for the project ID attribute.
 		 * But quota accounting logic needs to access related slots by
 		 * offset directly. So we need to adjust old objects' layout
 		 * to make the project ID to some unified and fixed offset.
 		 */
 		if (attrzp)
 			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
 		if (err == 0)
 			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
 
 		if (unlikely(err == EEXIST))
 			err = 0;
 		else if (err != 0)
 			goto out;
 		else
 			projid = ZFS_INVALID_PROJID;
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 		mutex_enter(&zp->z_acl_lock);
 	mutex_enter(&zp->z_lock);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (attrzp) {
 		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 			mutex_enter(&attrzp->z_acl_lock);
 		mutex_enter(&attrzp->z_lock);
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
 		    sizeof (attrzp->z_pflags));
 		if (projid != ZFS_INVALID_PROJID) {
 			attrzp->z_projid = projid;
 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
 			    sizeof (attrzp->z_projid));
 		}
 	}
 
 	if (mask & (ATTR_UID|ATTR_GID)) {
 
 		if (mask & ATTR_UID) {
 			ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
 			new_uid = zfs_uid_read(ZTOI(zp));
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &new_uid, sizeof (new_uid));
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
 				    sizeof (new_uid));
 				ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
 			}
 		}
 
 		if (mask & ATTR_GID) {
 			ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
 			new_gid = zfs_gid_read(ZTOI(zp));
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
 			    NULL, &new_gid, sizeof (new_gid));
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
 				    sizeof (new_gid));
 				ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
 			}
 		}
 		if (!(mask & ATTR_MODE)) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
 			    NULL, &new_mode, sizeof (new_mode));
 			new_mode = zp->z_mode;
 		}
 		err = zfs_acl_chown_setattr(zp);
 		ASSERT(err == 0);
 		if (attrzp) {
 			err = zfs_acl_chown_setattr(attrzp);
 			ASSERT(err == 0);
 		}
 	}
 
 	if (mask & ATTR_MODE) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 		    &new_mode, sizeof (new_mode));
 		zp->z_mode = ZTOI(zp)->i_mode = new_mode;
 		ASSERT3P(aclp, !=, NULL);
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT0(err);
 		if (zp->z_acl_cached)
 			zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = aclp;
 		aclp = NULL;
 	}
 
 	if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
 		zp->z_atime_dirty = B_FALSE;
 		inode_timespec_t tmp_atime = zpl_inode_get_atime(ip);
 		ZFS_TIME_ENCODE(&tmp_atime, atime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 		    &atime, sizeof (atime));
 	}
 
 	if (mask & (ATTR_MTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 		zpl_inode_set_mtime_to_ts(ZTOI(zp),
 		    zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    mtime, sizeof (mtime));
 	}
 
 	if (mask & (ATTR_CTIME | ATTR_SIZE)) {
 		ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
 		zpl_inode_set_ctime_to_ts(ZTOI(zp),
 		    zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    ctime, sizeof (ctime));
 	}
 
 	if (projid != ZFS_INVALID_PROJID) {
 		zp->z_projid = projid;
 		SA_ADD_BULK_ATTR(bulk, count,
 		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
 		    sizeof (zp->z_projid));
 	}
 
 	if (attrzp && mask) {
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
 		    sizeof (ctime));
 	}
 
 	/*
 	 * Do this after setting timestamps to prevent timestamp
 	 * update from toggling bit
 	 */
 
 	if (xoap && (mask & ATTR_XVATTR)) {
 
 		/*
 		 * restore trimmed off masks
 		 * so that return masks can be set for caller.
 		 */
 
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_APPENDONLY)) {
 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_NOUNLINK)) {
 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_IMMUTABLE)) {
 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_NODUMP)) {
 			XVA_SET_REQ(xvap, XAT_NODUMP);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_MODIFIED)) {
 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_AV_QUARANTINED)) {
 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
 		}
 		if (XVA_ISSET_REQ(tmpxvattr, XAT_PROJINHERIT)) {
 			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			ASSERT(S_ISREG(ip->i_mode));
 
 		zfs_xvattr_set(zp, xvap, tx);
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (mask != 0)
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
 
 	mutex_exit(&zp->z_lock);
 	if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 		mutex_exit(&zp->z_acl_lock);
 
 	if (attrzp) {
 		if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE))
 			mutex_exit(&attrzp->z_acl_lock);
 		mutex_exit(&attrzp->z_lock);
 	}
 out:
 	if (err == 0 && xattr_count > 0) {
 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
 		    xattr_count, tx);
 		ASSERT(err2 == 0);
 	}
 
 	if (aclp)
 		zfs_acl_free(aclp);
 
 	if (fuidp) {
 		zfs_fuid_info_free(fuidp);
 		fuidp = NULL;
 	}
 
 	if (err) {
 		dmu_tx_abort(tx);
 		if (attrzp)
 			zrele(attrzp);
 		if (err == ERESTART)
 			goto top;
 	} else {
 		if (count > 0)
 			err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		dmu_tx_commit(tx);
 		if (attrzp) {
 			if (err2 == 0 && handle_eadir)
 				err = zfs_setattr_dir(attrzp);
 			zrele(attrzp);
 		}
 		zfs_znode_update_vfs(zp);
 	}
 
 out2:
 	if (os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 out3:
 	kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
 	kmem_free(tmpxvattr, sizeof (xvattr_t));
 	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
 typedef struct zfs_zlock {
 	krwlock_t	*zl_rwlock;	/* lock we acquired */
 	znode_t		*zl_znode;	/* znode we held */
 	struct zfs_zlock *zl_next;	/* next in list */
 } zfs_zlock_t;
 
 /*
  * Drop locks and release vnodes that were held by zfs_rename_lock().
  */
 static void
 zfs_rename_unlock(zfs_zlock_t **zlpp)
 {
 	zfs_zlock_t *zl;
 
 	while ((zl = *zlpp) != NULL) {
 		if (zl->zl_znode != NULL)
 			zfs_zrele_async(zl->zl_znode);
 		rw_exit(zl->zl_rwlock);
 		*zlpp = zl->zl_next;
 		kmem_free(zl, sizeof (*zl));
 	}
 }
 
 /*
  * Search back through the directory tree, using the ".." entries.
  * Lock each directory in the chain to prevent concurrent renames.
  * Fail any attempt to move a directory into one of its own descendants.
  * XXX - z_parent_lock can overlap with map or grow locks
  */
 static int
 zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
 {
 	zfs_zlock_t	*zl;
 	znode_t		*zp = tdzp;
 	uint64_t	rootid = ZTOZSB(zp)->z_root;
 	uint64_t	oidp = zp->z_id;
 	krwlock_t	*rwlp = &szp->z_parent_lock;
 	krw_t		rw = RW_WRITER;
 
 	/*
 	 * First pass write-locks szp and compares to zp->z_id.
 	 * Later passes read-lock zp and compare to zp->z_parent.
 	 */
 	do {
 		if (!rw_tryenter(rwlp, rw)) {
 			/*
 			 * Another thread is renaming in this path.
 			 * Note that if we are a WRITER, we don't have any
 			 * parent_locks held yet.
 			 */
 			if (rw == RW_READER && zp->z_id > szp->z_id) {
 				/*
 				 * Drop our locks and restart
 				 */
 				zfs_rename_unlock(&zl);
 				*zlpp = NULL;
 				zp = tdzp;
 				oidp = zp->z_id;
 				rwlp = &szp->z_parent_lock;
 				rw = RW_WRITER;
 				continue;
 			} else {
 				/*
 				 * Wait for other thread to drop its locks
 				 */
 				rw_enter(rwlp, rw);
 			}
 		}
 
 		zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
 		zl->zl_rwlock = rwlp;
 		zl->zl_znode = NULL;
 		zl->zl_next = *zlpp;
 		*zlpp = zl;
 
 		if (oidp == szp->z_id)		/* We're a descendant of szp */
 			return (SET_ERROR(EINVAL));
 
 		if (oidp == rootid)		/* We've hit the top */
 			return (0);
 
 		if (rw == RW_READER) {		/* i.e. not the first pass */
 			int error = zfs_zget(ZTOZSB(zp), oidp, &zp);
 			if (error)
 				return (error);
 			zl->zl_znode = zp;
 		}
 		(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(ZTOZSB(zp)),
 		    &oidp, sizeof (oidp));
 		rwlp = &zp->z_parent_lock;
 		rw = RW_READER;
 
 	} while (zp->z_id != sdzp->z_id);
 
 	return (0);
 }
 
 /*
  * Move an entry from the provided source directory to the target
  * directory.  Change the entry name as indicated.
  *
  *	IN:	sdzp	- Source directory containing the "old entry".
  *		snm	- Old entry name.
  *		tdzp	- Target directory to contain the "new entry".
  *		tnm	- New entry name.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *		rflags  - RENAME_* flags
  *		wa_vap  - attributes for RENAME_WHITEOUT (must be a char 0:0).
  *		mnt_ns	- user namespace of the mount
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	sdzp,tdzp - ctime|mtime updated
  */
 int
 zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
     cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
 {
 	znode_t		*szp, *tzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(sdzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*sdl, *tdl;
 	dmu_tx_t	*tx;
 	zfs_zlock_t	*zl;
 	int		cmp, serr, terr;
 	int		error = 0;
 	int		zflg = 0;
 	boolean_t	waited = B_FALSE;
 	/* Needed for whiteout inode creation. */
 	boolean_t	fuid_dirtied;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	have_acl = B_FALSE;
 	znode_t		*wzp = NULL;
 
 
 	if (snm == NULL || tnm == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return (SET_ERROR(EINVAL));
 
 	/* Already checked by Linux VFS, but just to make sure. */
 	if (rflags & RENAME_EXCHANGE &&
 	    (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
 	 * right kind of vattr_t for the whiteout file. These are set
 	 * internally by ZFS so should never be incorrect.
 	 */
 	VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
 	VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
 	VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	if ((error = zfs_verify_zp(tdzp)) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
 	 * super blocks.
 	 */
 	if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
 	    zfsctl_is_node(ZTOI(tdzp))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 top:
 	szp = NULL;
 	tzp = NULL;
 	zl = NULL;
 
 	/*
 	 * This is to prevent the creation of links into attribute space
 	 * by renaming a linked file into/outof an attribute directory.
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Lock source and target directory entries.  To prevent deadlock,
 	 * a lock ordering must be defined.  We lock the directory with
 	 * the smallest object id first, or if it's a tie, the one with
 	 * the lexically first name.
 	 */
 	if (sdzp->z_id < tdzp->z_id) {
 		cmp = -1;
 	} else if (sdzp->z_id > tdzp->z_id) {
 		cmp = 1;
 	} else {
 		/*
 		 * First compare the two name arguments without
 		 * considering any case folding.
 		 */
 		int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
 
 		cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
 		ASSERT(error == 0 || !zfsvfs->z_utf8);
 		if (cmp == 0) {
 			/*
 			 * POSIX: "If the old argument and the new argument
 			 * both refer to links to the same existing file,
 			 * the rename() function shall return successfully
 			 * and perform no other action."
 			 */
 			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 		/*
 		 * If the file system is case-folding, then we may
 		 * have some more checking to do.  A case-folding file
 		 * system is either supporting mixed case sensitivity
 		 * access or is completely case-insensitive.  Note
 		 * that the file system is always case preserving.
 		 *
 		 * In mixed sensitivity mode case sensitive behavior
 		 * is the default.  FIGNORECASE must be used to
 		 * explicitly request case insensitive behavior.
 		 *
 		 * If the source and target names provided differ only
 		 * by case (e.g., a request to rename 'tim' to 'Tim'),
 		 * we will treat this as a special case in the
 		 * case-insensitive mode: as long as the source name
 		 * is an exact match, we will allow this to proceed as
 		 * a name-change request.
 		 */
 		if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 		    (zfsvfs->z_case == ZFS_CASE_MIXED &&
 		    flags & FIGNORECASE)) &&
 		    u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
 		    &error) == 0) {
 			/*
 			 * case preserving rename request, require exact
 			 * name matches
 			 */
 			zflg |= ZCIEXACT;
 			zflg &= ~ZCILOOK;
 		}
 	}
 
 	/*
 	 * If the source and destination directories are the same, we should
 	 * grab the z_name_lock of that directory only once.
 	 */
 	if (sdzp == tdzp) {
 		zflg |= ZHAVELOCK;
 		rw_enter(&sdzp->z_name_lock, RW_READER);
 	}
 
 	if (cmp < 0) {
 		serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
 		    ZEXISTS | zflg, NULL, NULL);
 		terr = zfs_dirent_lock(&tdl,
 		    tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
 	} else {
 		terr = zfs_dirent_lock(&tdl,
 		    tdzp, tnm, &tzp, zflg, NULL, NULL);
 		serr = zfs_dirent_lock(&sdl,
 		    sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
 		    NULL, NULL);
 	}
 
 	if (serr) {
 		/*
 		 * Source entry invalid or not there.
 		 */
 		if (!terr) {
 			zfs_dirent_unlock(tdl);
 			if (tzp)
 				zrele(tzp);
 		}
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (strcmp(snm, "..") == 0)
 			serr = EINVAL;
 		zfs_exit(zfsvfs, FTAG);
 		return (serr);
 	}
 	if (terr) {
 		zfs_dirent_unlock(sdl);
 		zrele(szp);
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (strcmp(tnm, "..") == 0)
 			terr = EINVAL;
 		zfs_exit(zfsvfs, FTAG);
 		return (terr);
 	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow renames into our tree when the project
 	 * IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	/*
 	 * Must have write access at the source to remove the old entry
 	 * and write access at the target to create the new entry.
 	 * Note that if target and source are the same, this can be
 	 * done in a single check.
 	 */
 	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns)))
 		goto out;
 
 	if (S_ISDIR(ZTOI(szp)->i_mode)) {
 		/*
 		 * Check to make sure rename is valid.
 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
 		 */
 		if ((error = zfs_rename_lock(szp, tdzp, sdzp, &zl)))
 			goto out;
 	}
 
 	/*
 	 * Does target exist?
 	 */
 	if (tzp) {
 		if (rflags & RENAME_NOREPLACE) {
 			error = SET_ERROR(EEXIST);
 			goto out;
 		}
 		/*
 		 * Source and target must be the same type (unless exchanging).
 		 */
 		if (!(rflags & RENAME_EXCHANGE)) {
 			boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
 			boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
 
 			if (s_is_dir != t_is_dir) {
 				error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
 				goto out;
 			}
 		}
 		/*
 		 * POSIX dictates that when the source and target
 		 * entries refer to the same file object, rename
 		 * must do nothing and exit without error.
 		 */
 		if (szp->z_id == tzp->z_id) {
 			error = 0;
 			goto out;
 		}
 	} else if (rflags & RENAME_EXCHANGE) {
 		/* Target must exist for RENAME_EXCHANGE. */
 		error = SET_ERROR(ENOENT);
 		goto out;
 	}
 
 	/* Set up inode creation for RENAME_WHITEOUT. */
 	if (rflags & RENAME_WHITEOUT) {
 		/*
 		 * Whiteout files are not regular files or directories, so to
 		 * match zfs_create() we do not inherit the project id.
 		 */
 		uint64_t wo_projid = ZFS_DEFAULT_PROJID;
 
 		error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns);
 		if (error)
 			goto out;
 
 		if (!have_acl) {
 			error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
 			    &acl_ids, mnt_ns);
 			if (error)
 				goto out;
 			have_acl = B_TRUE;
 		}
 
 		if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
 			error = SET_ERROR(EDQUOT);
 			goto out;
 		}
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, sdzp->z_id,
 	    (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
 	if (sdzp != tdzp) {
 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tdzp);
 	}
 	if (tzp) {
 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tzp);
 	}
 	if (rflags & RENAME_WHITEOUT) {
 		dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 		    ZFS_SA_BASE_ATTR_SIZE);
 
 		dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
 		dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 		if (!zfsvfs->z_use_sa &&
 		    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, acl_ids.z_aclp->z_acl_bytes);
 		}
 	}
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		if (zl != NULL)
 			zfs_rename_unlock(&zl);
 		zfs_dirent_unlock(sdl);
 		zfs_dirent_unlock(tdl);
 
 		if (sdzp == tdzp)
 			rw_exit(&sdzp->z_name_lock);
 
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			zrele(szp);
 			if (tzp)
 				zrele(tzp);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zrele(szp);
 		if (tzp)
 			zrele(tzp);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * Unlink the source.
 	 */
 	szp->z_pflags |= ZFS_AV_MODIFIED;
 	if (tdzp->z_pflags & ZFS_PROJINHERIT)
 		szp->z_pflags |= ZFS_PROJINHERIT;
 
 	error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 	    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
 	VERIFY0(error);
 
 	error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
 	if (error)
 		goto commit;
 
 	/*
 	 * Unlink the target.
 	 */
 	if (tzp) {
 		int tzflg = zflg;
 
 		if (rflags & RENAME_EXCHANGE) {
 			/* This inode will be re-linked soon. */
 			tzflg |= ZRENAMING;
 
 			tzp->z_pflags |= ZFS_AV_MODIFIED;
 			if (sdzp->z_pflags & ZFS_PROJINHERIT)
 				tzp->z_pflags |= ZFS_PROJINHERIT;
 
 			error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 			    (void *)&tzp->z_pflags, sizeof (uint64_t), tx);
 			ASSERT0(error);
 		}
 		error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
 		if (error)
 			goto commit_link_szp;
 	}
 
 	/*
 	 * Create the new target links:
 	 *   * We always link the target.
 	 *   * RENAME_EXCHANGE: Link the old target to the source.
 	 *   * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
 	 */
 	error = zfs_link_create(tdl, szp, tx, ZRENAMING);
 	if (error) {
 		/*
 		 * If we have removed the existing target, a subsequent call to
 		 * zfs_link_create() to add back the same entry, but with a new
 		 * dnode (szp), should not fail.
 		 */
 		ASSERT3P(tzp, ==, NULL);
 		goto commit_link_tzp;
 	}
 
 	switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
 	case RENAME_EXCHANGE:
 		error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
 		/*
 		 * The same argument as zfs_link_create() failing for
 		 * szp applies here, since the source directory must
 		 * have had an entry we are replacing.
 		 */
 		ASSERT0(error);
 		if (error)
 			goto commit_unlink_td_szp;
 		break;
 	case RENAME_WHITEOUT:
 		zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
 		error = zfs_link_create(sdl, wzp, tx, ZNEW);
 		if (error) {
 			zfs_znode_delete(wzp, tx);
 			remove_inode_hash(ZTOI(wzp));
 			goto commit_unlink_td_szp;
 		}
 		break;
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
 	case RENAME_EXCHANGE:
 		zfs_log_rename_exchange(zilog, tx,
 		    (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
 		    tdzp, tdl->dl_name, szp);
 		break;
 	case RENAME_WHITEOUT:
 		zfs_log_rename_whiteout(zilog, tx,
 		    (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
 		    tdzp, tdl->dl_name, szp, wzp);
 		break;
 	default:
 		ASSERT0(rflags & ~RENAME_NOREPLACE);
 		zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0),
 		    sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp);
 		break;
 	}
 
 commit:
 	dmu_tx_commit(tx);
 out:
 	if (have_acl)
 		zfs_acl_ids_free(&acl_ids);
 
 	zfs_znode_update_vfs(sdzp);
 	if (sdzp == tdzp)
 		rw_exit(&sdzp->z_name_lock);
 
 	if (sdzp != tdzp)
 		zfs_znode_update_vfs(tdzp);
 
 	zfs_znode_update_vfs(szp);
 	zrele(szp);
 	if (wzp) {
 		zfs_znode_update_vfs(wzp);
 		zrele(wzp);
 	}
 	if (tzp) {
 		zfs_znode_update_vfs(tzp);
 		zrele(tzp);
 	}
 
 	if (zl != NULL)
 		zfs_rename_unlock(&zl);
 
 	zfs_dirent_unlock(sdl);
 	zfs_dirent_unlock(tdl);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 
 	/*
 	 * Clean-up path for broken link state.
 	 *
 	 * At this point we are in a (very) bad state, so we need to do our
 	 * best to correct the state. In particular, all of the nlinks are
 	 * wrong because we were destroying and creating links with ZRENAMING.
 	 *
 	 * In some form, all of these operations have to resolve the state:
 	 *
 	 *  * link_destroy() *must* succeed. Fortunately, this is very likely
 	 *    since we only just created it.
 	 *
 	 *  * link_create()s are allowed to fail (though they shouldn't because
 	 *    we only just unlinked them and are putting the entries back
 	 *    during clean-up). But if they fail, we can just forcefully drop
 	 *    the nlink value to (at the very least) avoid broken nlink values
 	 *    -- though in the case of non-empty directories we will have to
 	 *    panic (otherwise we'd have a leaked directory with a broken ..).
 	 */
 commit_unlink_td_szp:
 	VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL));
 commit_link_tzp:
 	if (tzp) {
 		if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
 			VERIFY0(zfs_drop_nlink(tzp, tx, NULL));
 	}
 commit_link_szp:
 	if (zfs_link_create(sdl, szp, tx, ZRENAMING))
 		VERIFY0(zfs_drop_nlink(szp, tx, NULL));
 	goto commit;
 }
 
 /*
  * Insert the indicated symbolic reference entry into the directory.
  *
  *	IN:	dzp	- Directory to contain new symbolic link.
  *		name	- Name of directory entry in dip.
  *		vap	- Attributes of new entry.
  *		link	- Name for new symlink entry.
  *		cr	- credentials of caller.
  *		flags	- case flags
  *		mnt_ns	- user namespace of the mount
  *
  *	OUT:	zpp	- Znode for new symbolic link.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dip - ctime|mtime updated
  */
 int
 zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
     znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
 {
 	znode_t		*zp;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
 	zilog_t		*zilog;
 	uint64_t	len = strlen(link);
 	int		error;
 	int		zflg = ZNEW;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	fuid_dirtied;
 	uint64_t	txtype = TX_SYMLINK;
 	boolean_t	waited = B_FALSE;
 
 	ASSERT(S_ISLNK(vap->va_mode));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zflg |= ZCILOOK;
 
 	if (len > MAXPATHLEN) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids, mnt_ns)) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 top:
 	*zpp = NULL;
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
 		zfs_acl_ids_free(&acl_ids);
 		zfs_dirent_unlock(dl);
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EDQUOT));
 	}
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE + len);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * Create a new object for the symlink.
 	 * for version 4 ZPL datasets the symlink will be an SA attribute
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
 		    link, len, tx);
 	else
 		zfs_sa_symlink(zp, link, len, tx);
 	mutex_exit(&zp->z_lock);
 
 	zp->z_size = len;
 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 	    &zp->z_size, sizeof (zp->z_size), tx);
 	/*
 	 * Insert the new object into the directory.
 	 */
 	error = zfs_link_create(dl, zp, tx, ZNEW);
 	if (error != 0) {
 		zfs_znode_delete(zp, tx);
 		remove_inode_hash(ZTOI(zp));
 	} else {
 		if (flags & FIGNORECASE)
 			txtype |= TX_CI;
 		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 
 		zfs_znode_update_vfs(dzp);
 		zfs_znode_update_vfs(zp);
 	}
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (error == 0) {
 		*zpp = zp;
 
 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 			zil_commit(zilog, 0);
 	} else {
 		zrele(zp);
 	}
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Return, in the buffer contained in the provided uio structure,
  * the symbolic path referred to by ip.
  *
  *	IN:	ip	- inode of symbolic link
  *		uio	- structure to contain the link path.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - atime updated
  */
 int
 zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
 {
 	(void) cr;
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	int		error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	mutex_enter(&zp->z_lock);
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
 		    SA_ZPL_SYMLINK(zfsvfs), uio);
 	else
 		error = zfs_sa_readlink(zp, uio);
 	mutex_exit(&zp->z_lock);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 /*
  * Insert a new entry into directory tdzp referencing szp.
  *
  *	IN:	tdzp	- Directory to contain new entry.
  *		szp	- znode of new entry.
  *		name	- name of new entry.
  *		cr	- credentials of caller.
  *		flags	- case flags.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	tdzp - ctime|mtime updated
  *	 szp - ctime updated
  */
 int
 zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
     int flags)
 {
 	struct inode *sip = ZTOI(szp);
 	znode_t		*tzp;
 	zfsvfs_t	*zfsvfs = ZTOZSB(tdzp);
 	zilog_t		*zilog;
 	zfs_dirlock_t	*dl;
 	dmu_tx_t	*tx;
 	int		error;
 	int		zf = ZNEW;
 	uint64_t	parent;
 	uid_t		owner;
 	boolean_t	waited = B_FALSE;
 	boolean_t	is_tmpfile = 0;
 	uint64_t	txg;
 #ifdef HAVE_TMPFILE
 	is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
 #endif
 	ASSERT(S_ISDIR(ZTOI(tdzp)->i_mode));
 
 	if (name == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
 		return (error);
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * POSIX dictates that we return EPERM here.
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (S_ISDIR(sip->i_mode)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_verify_zp(szp)) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow hard link creation in our tree when the
 	 * project IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	/*
 	 * We check i_sb because snapshots and the ctldir must have different
 	 * super blocks.
 	 */
 	if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	/* Prevent links to .zfs/shares files */
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EILSEQ));
 	}
 	if (flags & FIGNORECASE)
 		zf |= ZCILOOK;
 
 	/*
 	 * We do not support links between attributes and non-attributes
 	 * because of the potential security risk of creating links
 	 * into "normal" file space in order to circumvent restrictions
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
 	    cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr,
 	    zfs_init_idmap))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 top:
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
 	if (error) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
 	if (is_tmpfile)
 		dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	zfs_sa_upgrade_txholds(tx, szp);
 	zfs_sa_upgrade_txholds(tx, tdzp);
 	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
 	if (error) {
 		zfs_dirent_unlock(dl);
 		if (error == ERESTART) {
 			waited = B_TRUE;
 			dmu_tx_wait(tx);
 			dmu_tx_abort(tx);
 			goto top;
 		}
 		dmu_tx_abort(tx);
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 	/* unmark z_unlinked so zfs_link_create will not reject */
 	if (is_tmpfile)
 		szp->z_unlinked = B_FALSE;
 	error = zfs_link_create(dl, szp, tx, 0);
 
 	if (error == 0) {
 		uint64_t txtype = TX_LINK;
 		/*
 		 * tmpfile is created to be in z_unlinkedobj, so remove it.
 		 * Also, we don't log in ZIL, because all previous file
 		 * operation on the tmpfile are ignored by ZIL. Instead we
 		 * always wait for txg to sync to make sure all previous
 		 * operation are sync safe.
 		 */
 		if (is_tmpfile) {
 			VERIFY(zap_remove_int(zfsvfs->z_os,
 			    zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
 		} else {
 			if (flags & FIGNORECASE)
 				txtype |= TX_CI;
 			zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
 		}
 	} else if (is_tmpfile) {
 		/* restore z_unlinked since when linking failed */
 		szp->z_unlinked = B_TRUE;
 	}
 	txg = dmu_tx_get_txg(tx);
 	dmu_tx_commit(tx);
 
 	zfs_dirent_unlock(dl);
 
 	if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
 
 	zfs_znode_update_vfs(tdzp);
 	zfs_znode_update_vfs(szp);
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 static void
 zfs_putpage_sync_commit_cb(void *arg)
 {
 	struct page *pp = arg;
 
 	ClearPageError(pp);
 	end_page_writeback(pp);
 }
 
 static void
 zfs_putpage_async_commit_cb(void *arg)
 {
 	struct page *pp = arg;
 	znode_t *zp = ITOZ(pp->mapping->host);
 
 	ClearPageError(pp);
 	end_page_writeback(pp);
 	atomic_dec_32(&zp->z_async_writes_cnt);
 }
 
 /*
  * Push a page out to disk, once the page is on stable storage the
  * registered commit callback will be run as notification of completion.
  *
  *	IN:	ip	 - page mapped for inode.
  *		pp	 - page to push (page is locked)
  *		wbc	 - writeback control data
  *		for_sync - does the caller intend to wait synchronously for the
  *			   page writeback to complete?
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - ctime|mtime updated
  */
 int
 zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
     boolean_t for_sync)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	loff_t		offset;
 	loff_t		pgoff;
 	unsigned int	pglen;
 	dmu_tx_t	*tx;
 	caddr_t		va;
 	int		err = 0;
 	uint64_t	mtime[2], ctime[2];
 	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[3];
 	int		cnt = 0;
 	struct address_space *mapping;
 
 	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (err);
 
 	ASSERT(PageLocked(pp));
 
 	pgoff = page_offset(pp);	/* Page byte-offset in file */
 	offset = i_size_read(ip);	/* File length in bytes */
 	pglen = MIN(PAGE_SIZE,		/* Page length in bytes */
 	    P2ROUNDUP(offset, PAGE_SIZE)-pgoff);
 
 	/* Page is beyond end of file */
 	if (pgoff >= offset) {
 		unlock_page(pp);
 		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 	/* Truncate page length to end of file */
 	if (pgoff + pglen > offset)
 		pglen = offset - pgoff;
 
 #if 0
 	/*
 	 * FIXME: Allow mmap writes past its quota.  The correct fix
 	 * is to register a page_mkwrite() handler to count the page
 	 * against its quota when it is about to be dirtied.
 	 */
 	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT,
 	    KUID_TO_SUID(ip->i_uid)) ||
 	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 	    KGID_TO_SGID(ip->i_gid)) ||
 	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
 	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 	    zp->z_projid))) {
 		err = EDQUOT;
 	}
 #endif
 
 	/*
 	 * The ordering here is critical and must adhere to the following
 	 * rules in order to avoid deadlocking in either zfs_read() or
 	 * zfs_free_range() due to a lock inversion.
 	 *
 	 * 1) The page must be unlocked prior to acquiring the range lock.
 	 *    This is critical because zfs_read() calls find_lock_page()
 	 *    which may block on the page lock while holding the range lock.
 	 *
 	 * 2) Before setting or clearing write back on a page the range lock
 	 *    must be held in order to prevent a lock inversion with the
 	 *    zfs_free_range() function.
 	 *
 	 * This presents a problem because upon entering this function the
 	 * page lock is already held.  To safely acquire the range lock the
 	 * page lock must be dropped.  This creates a window where another
 	 * process could truncate, invalidate, dirty, or write out the page.
 	 *
 	 * Therefore, after successfully reacquiring the range and page locks
 	 * the current page state is checked.  In the common case everything
 	 * will be as is expected and it can be written out.  However, if
 	 * the page state has changed it must be handled accordingly.
 	 */
 	mapping = pp->mapping;
 	redirty_page_for_writepage(wbc, pp);
 	unlock_page(pp);
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
 	    pgoff, pglen, RL_WRITER);
 	lock_page(pp);
 
 	/* Page mapping changed or it was no longer dirty, we're done */
 	if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 	/* Another process started write block if required */
 	if (PageWriteback(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 
 		if (wbc->sync_mode != WB_SYNC_NONE) {
 			/*
 			 * Speed up any non-sync page writebacks since
 			 * they may take several seconds to complete.
 			 * Refer to the comment in zpl_fsync() (when
 			 * HAVE_FSYNC_RANGE is defined) for details.
 			 */
 			if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
 				zil_commit(zfsvfs->z_log, zp->z_id);
 			}
 
 			if (PageWriteback(pp))
 #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
 				folio_wait_bit(page_folio(pp), PG_writeback);
 #else
 				wait_on_page_bit(pp, PG_writeback);
 #endif
 		}
 
 		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 	/* Clear the dirty flag the required locks are held */
 	if (!clear_page_dirty_for_io(pp)) {
 		unlock_page(pp);
 		zfs_rangelock_exit(lr);
 		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 	/*
 	 * Counterpart for redirty_page_for_writepage() above.  This page
 	 * was in fact not skipped and should not be counted as if it were.
 	 */
 	wbc->pages_skipped--;
 	if (!for_sync)
 		atomic_inc_32(&zp->z_async_writes_cnt);
 	set_page_writeback(pp);
 	unlock_page(pp);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
 #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
 		filemap_dirty_folio(page_mapping(pp), page_folio(pp));
 #else
 		__set_page_dirty_nobuffers(pp);
 #endif
 		ClearPageError(pp);
 		end_page_writeback(pp);
 		if (!for_sync)
 			atomic_dec_32(&zp->z_async_writes_cnt);
 		zfs_rangelock_exit(lr);
 		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 
 	va = kmap(pp);
 	ASSERT3U(pglen, <=, PAGE_SIZE);
 	dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
 	kunmap(pp);
 
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 
 	/* Preserve the mtime and ctime provided by the inode */
 	tmp_ts = zpl_inode_get_mtime(ip);
 	ZFS_TIME_ENCODE(&tmp_ts, mtime);
 	tmp_ts = zpl_inode_get_ctime(ip);
 	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	zp->z_atime_dirty = B_FALSE;
 	zp->z_seq++;
 
 	err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
 
 	boolean_t commit = B_FALSE;
 	if (wbc->sync_mode != WB_SYNC_NONE) {
 		/*
 		 * Note that this is rarely called under writepages(), because
 		 * writepages() normally handles the entire commit for
 		 * performance reasons.
 		 */
 		commit = B_TRUE;
 	} else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
 		/*
 		 * If the caller does not intend to wait synchronously
 		 * for this page writeback to complete and there are active
 		 * synchronous calls on this file, do a commit so that
 		 * the latter don't accidentally end up waiting for
 		 * our writeback to complete. Refer to the comment in
 		 * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
 		 */
 		commit = B_TRUE;
 	}
 
 	zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
 	    for_sync ? zfs_putpage_sync_commit_cb :
 	    zfs_putpage_async_commit_cb, pp);
 
 	dmu_tx_commit(tx);
 
 	zfs_rangelock_exit(lr);
 
 	if (commit)
 		zil_commit(zfsvfs->z_log, zp->z_id);
 
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (err);
 }
 
 /*
  * Update the system attributes when the inode has been dirtied.  For the
  * moment we only update the mode, atime, mtime, and ctime.
  */
 int
 zfs_dirty_inode(struct inode *ip, int flags)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	dmu_tx_t	*tx;
 	uint64_t	mode, atime[2], mtime[2], ctime[2];
 	inode_timespec_t tmp_ts;
 	sa_bulk_attr_t	bulk[4];
 	int		error = 0;
 	int		cnt = 0;
 
 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
 		return (0);
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 #ifdef I_DIRTY_TIME
 	/*
 	 * This is the lazytime semantic introduced in Linux 4.0
 	 * This flag will only be called from update_time when lazytime is set.
 	 * (Note, I_DIRTY_SYNC will also set if not lazytime)
 	 * Fortunately mtime and ctime are managed within ZFS itself, so we
 	 * only need to dirty atime.
 	 */
 	if (flags == I_DIRTY_TIME) {
 		zp->z_atime_dirty = B_TRUE;
 		goto out;
 	}
 #endif
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		goto out;
 	}
 
 	mutex_enter(&zp->z_lock);
 	zp->z_atime_dirty = B_FALSE;
 
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 
 	/* Preserve the mode, mtime and ctime provided by the inode */
 	tmp_ts = zpl_inode_get_atime(ip);
 	ZFS_TIME_ENCODE(&tmp_ts, atime);
 	tmp_ts = zpl_inode_get_mtime(ip);
 	ZFS_TIME_ENCODE(&tmp_ts, mtime);
 	tmp_ts = zpl_inode_get_ctime(ip);
 	ZFS_TIME_ENCODE(&tmp_ts, ctime);
 	mode = ip->i_mode;
 
 	zp->z_mode = mode;
 
 	error = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
 	mutex_exit(&zp->z_lock);
 
 	dmu_tx_commit(tx);
 out:
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 void
 zfs_inactive(struct inode *ip)
 {
 	znode_t	*zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	uint64_t atime[2];
 	int error;
 	int need_unlock = 0;
 
 	/* Only read lock if we haven't already write locked, e.g. rollback */
 	if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
 		need_unlock = 1;
 		rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
 	}
 	if (zp->z_sa_hdl == NULL) {
 		if (need_unlock)
 			rw_exit(&zfsvfs->z_teardown_inactive_lock);
 		return;
 	}
 
 	if (zp->z_atime_dirty && zp->z_unlinked == B_FALSE) {
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
 			inode_timespec_t tmp_atime;
 			tmp_atime = zpl_inode_get_atime(ip);
 			ZFS_TIME_ENCODE(&tmp_atime, atime);
 			mutex_enter(&zp->z_lock);
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
 			    (void *)&atime, sizeof (atime), tx);
 			zp->z_atime_dirty = B_FALSE;
 			mutex_exit(&zp->z_lock);
 			dmu_tx_commit(tx);
 		}
 	}
 
 	zfs_zinactive(zp);
 	if (need_unlock)
 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
 }
 
 /*
  * Fill pages with data from the disk.
  */
 static int
 zfs_fillpage(struct inode *ip, struct page *pp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	loff_t i_size = i_size_read(ip);
 	u_offset_t io_off = page_offset(pp);
 	size_t io_len = PAGE_SIZE;
 
 	ASSERT3U(io_off, <, i_size);
 
 	if (io_off + io_len > i_size)
 		io_len = i_size - io_off;
 
 	void *va = kmap(pp);
 	int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
 	    io_len, va, DMU_READ_PREFETCH);
 	if (io_len != PAGE_SIZE)
 		memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
 	kunmap(pp);
 
 	if (error) {
 		/* convert checksum errors into IO errors */
 		if (error == ECKSUM)
 			error = SET_ERROR(EIO);
 
 		SetPageError(pp);
 		ClearPageUptodate(pp);
 	} else {
 		ClearPageError(pp);
 		SetPageUptodate(pp);
 	}
 
 	return (error);
 }
 
 /*
  * Uses zfs_fillpage to read data from the file and fill the page.
  *
  *	IN:	ip	 - inode of file to get data from.
  *		pp	 - page to read
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  */
 int
 zfs_getpage(struct inode *ip, struct page *pp)
 {
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	znode_t *zp = ITOZ(ip);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	error = zfs_fillpage(ip, pp);
 	if (error == 0)
 		dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
 
 	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * Check ZFS specific permissions to memory map a section of a file.
  *
  *	IN:	ip	- inode of the file to mmap
  *		off	- file offset
  *		addrp	- start address in memory region
  *		len	- length of memory region
  *		vm_flags- address flags
  *
  *	RETURN:	0 if success
  *		error code if failure
  */
 int
 zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
     unsigned long vm_flags)
 {
 	(void) addrp;
 	znode_t  *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
 	    (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((vm_flags & (VM_READ | VM_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EACCES));
 	}
 
 	if (off < 0 || len > MAXOFFSET_T - off) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENXIO));
 	}
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 /*
  * Free or allocate space in a file.  Currently, this function only
  * supports the `F_FREESP' command.  However, this command is somewhat
  * misnamed, as its functionality includes the ability to allocate as
  * well as free space.
  *
  *	IN:	zp	- znode of file to free data in.
  *		cmd	- action to take (only F_FREESP supported).
  *		bfp	- section of file to free/alloc.
  *		flag	- current file open mode flags.
  *		offset	- current file offset.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	zp - ctime|mtime updated
  */
 int
 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
     offset_t offset, cred_t *cr)
 {
 	(void) offset;
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	uint64_t	off, len;
 	int		error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	if (cmd != F_FREESP) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Permissions aren't checked on Solaris because on this OS
 	 * zfs_space() can only be called with an opened file handle.
 	 * On Linux we can get here through truncate_range() which
 	 * operates directly on inodes, so we need to check access rights.
 	 */
 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
 	    zfs_init_idmap))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	off = bfp->l_start;
 	len = bfp->l_len; /* 0 means from off to end of file */
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 int
 zfs_fid(struct inode *ip, fid_t *fidp)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
 	uint32_t	gen;
 	uint64_t	gen64;
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (error);
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(ENOSPC));
 	}
 
 	if ((error = zfs_verify_zp(zp)) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	gen = (uint32_t)gen64;
 
 	size = SHORT_FID_LEN;
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = size;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* Must have a non-zero generation number to distinguish from .zfs */
 	if (gen == 0)
 		gen = 1;
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_open);
 EXPORT_SYMBOL(zfs_close);
 EXPORT_SYMBOL(zfs_lookup);
 EXPORT_SYMBOL(zfs_create);
 EXPORT_SYMBOL(zfs_tmpfile);
 EXPORT_SYMBOL(zfs_remove);
 EXPORT_SYMBOL(zfs_mkdir);
 EXPORT_SYMBOL(zfs_rmdir);
 EXPORT_SYMBOL(zfs_readdir);
 EXPORT_SYMBOL(zfs_getattr_fast);
 EXPORT_SYMBOL(zfs_setattr);
 EXPORT_SYMBOL(zfs_rename);
 EXPORT_SYMBOL(zfs_symlink);
 EXPORT_SYMBOL(zfs_readlink);
 EXPORT_SYMBOL(zfs_link);
 EXPORT_SYMBOL(zfs_inactive);
 EXPORT_SYMBOL(zfs_space);
 EXPORT_SYMBOL(zfs_fid);
 EXPORT_SYMBOL(zfs_getpage);
 EXPORT_SYMBOL(zfs_putpage);
 EXPORT_SYMBOL(zfs_dirty_inode);
 EXPORT_SYMBOL(zfs_map);
 
 /* CSTYLED */
 module_param(zfs_delete_blocks, ulong, 0644);
 MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
 #endif
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
index 83f80f62aee7..2beec6436bff 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -1,1926 +1,1971 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
+ * Copyright (c) 2024, Rob Norris <robn@despairlabs.com>
  * Copyright (c) 2024, Klara, Inc.
  */
 
 #include <sys/dataset_kstats.h>
 #include <sys/dbuf.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 #include <sys/zil_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/zio.h>
 #include <sys/zfs_rlock.h>
 #include <sys/spa_impl.h>
 #include <sys/zvol.h>
 #include <sys/zvol_impl.h>
 #include <cityhash.h>
 
 #include <linux/blkdev_compat.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/workqueue.h>
 
 #ifdef HAVE_BLK_MQ
 #include <linux/blk-mq.h>
 #endif
 
 static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
     struct request *rq, boolean_t force_sync);
 
 static unsigned int zvol_major = ZVOL_MAJOR;
 static unsigned int zvol_request_sync = 0;
 static unsigned int zvol_prefetch_bytes = (128 * 1024);
 static unsigned long zvol_max_discard_blocks = 16384;
 
 /*
  * Switch taskq at multiple of 512 MB offset. This can be set to a lower value
  * to utilize more threads for small files but may affect prefetch hits.
  */
 #define	ZVOL_TASKQ_OFFSET_SHIFT 29
 
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
 static unsigned int zvol_open_timeout_ms = 1000;
 #endif
 
 static unsigned int zvol_threads = 0;
 #ifdef HAVE_BLK_MQ
 static unsigned int zvol_blk_mq_threads = 0;
 static unsigned int zvol_blk_mq_actual_threads;
 static boolean_t zvol_use_blk_mq = B_FALSE;
 
 /*
  * The maximum number of volblocksize blocks to process per thread.  Typically,
  * write heavy workloads preform better with higher values here, and read
  * heavy workloads preform better with lower values, but that's not a hard
  * and fast rule.  It's basically a knob to tune between "less overhead with
  * less parallelism" and "more overhead, but more parallelism".
  *
  * '8' was chosen as a reasonable, balanced, default based off of sequential
  * read and write tests to a zvol in an NVMe pool (with 16 CPUs).
  */
 static unsigned int zvol_blk_mq_blocks_per_thread = 8;
 #endif
 
 static unsigned int zvol_num_taskqs = 0;
 
 #ifndef	BLKDEV_DEFAULT_RQ
 /* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
 #define	BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
 #endif
 
 /*
  * Finalize our BIO or request.
  */
 #ifdef	HAVE_BLK_MQ
 #define	END_IO(zv, bio, rq, error)  do { \
 	if (bio) { \
 		BIO_END_IO(bio, error); \
 	} else { \
 		blk_mq_end_request(rq, errno_to_bi_status(error)); \
 	} \
 } while (0)
 #else
 #define	END_IO(zv, bio, rq, error)	BIO_END_IO(bio, error)
 #endif
 
 #ifdef HAVE_BLK_MQ
 static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
 static unsigned int zvol_actual_blk_mq_queue_depth;
 #endif
 
 struct zvol_state_os {
 	struct gendisk		*zvo_disk;	/* generic disk */
 	struct request_queue	*zvo_queue;	/* request queue */
 	dev_t			zvo_dev;	/* device id */
 
 #ifdef HAVE_BLK_MQ
 	struct blk_mq_tag_set tag_set;
 #endif
 
 	/* Set from the global 'zvol_use_blk_mq' at zvol load */
 	boolean_t use_blk_mq;
 };
 
 typedef struct zv_taskq {
 	uint_t tqs_cnt;
 	taskq_t **tqs_taskq;
 } zv_taskq_t;
 static zv_taskq_t zvol_taskqs;
 static struct ida zvol_ida;
 
 typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
 	struct request *rq;
 } zv_request_t;
 
 typedef struct zv_work {
 	struct request  *rq;
 	struct work_struct work;
 } zv_work_t;
 
 typedef struct zv_request_task {
 	zv_request_t zvr;
 	taskq_ent_t	ent;
 } zv_request_task_t;
 
 static zv_request_task_t *
 zv_request_task_create(zv_request_t zvr)
 {
 	zv_request_task_t *task;
 	task = kmem_alloc(sizeof (zv_request_task_t), KM_SLEEP);
 	taskq_init_ent(&task->ent);
 	task->zvr = zvr;
 	return (task);
 }
 
 static void
 zv_request_task_free(zv_request_task_t *task)
 {
 	kmem_free(task, sizeof (*task));
 }
 
 #ifdef HAVE_BLK_MQ
 
 /*
  * This is called when a new block multiqueue request comes in.  A request
  * contains one or more BIOs.
  */
 static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
     const struct blk_mq_queue_data *bd)
 {
 	struct request *rq = bd->rq;
 	zvol_state_t *zv = rq->q->queuedata;
 
 	/* Tell the kernel that we are starting to process this request */
 	blk_mq_start_request(rq);
 
 	if (blk_rq_is_passthrough(rq)) {
 		/* Skip non filesystem request */
 		blk_mq_end_request(rq, BLK_STS_IOERR);
 		return (BLK_STS_IOERR);
 	}
 
 	zvol_request_impl(zv, NULL, rq, 0);
 
 	/* Acknowledge to the kernel that we got this request */
 	return (BLK_STS_OK);
 }
 
 static struct blk_mq_ops zvol_blk_mq_queue_ops = {
 	.queue_rq = zvol_mq_queue_rq,
 };
 
 /* Initialize our blk-mq struct */
 static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
 {
 	struct zvol_state_os *zso = zv->zv_zso;
 
 	memset(&zso->tag_set, 0, sizeof (zso->tag_set));
 
 	/* Initialize tag set. */
 	zso->tag_set.ops = &zvol_blk_mq_queue_ops;
 	zso->tag_set.nr_hw_queues = zvol_blk_mq_actual_threads;
 	zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
 	zso->tag_set.numa_node = NUMA_NO_NODE;
 	zso->tag_set.cmd_size = 0;
 
 	/*
 	 * We need BLK_MQ_F_BLOCKING here since we do blocking calls in
 	 * zvol_request_impl()
 	 */
 	zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
 	zso->tag_set.driver_data = zv;
 
 	return (blk_mq_alloc_tag_set(&zso->tag_set));
 }
 #endif /* HAVE_BLK_MQ */
 
 /*
  * Given a path, return TRUE if path is a ZVOL.
  */
 boolean_t
 zvol_os_is_zvol(const char *path)
 {
 	dev_t dev = 0;
 
 	if (vdev_lookup_bdev(path, &dev) != 0)
 		return (B_FALSE);
 
 	if (MAJOR(dev) == zvol_major)
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 static void
 zvol_write(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
 	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
 	zvol_state_t *zv = zvr->zv;
 	struct request_queue *q;
 	struct gendisk *disk;
 	unsigned long start_time = 0;
 	boolean_t acct = B_FALSE;
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
 	q = zv->zv_zso->zvo_queue;
 	disk = zv->zv_zso->zvo_disk;
 
 	/* bio marked as FLUSH need to flush before write */
 	if (io_is_flush(bio, rq))
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 	/* Some requests are just for flush and nothing else. */
 	if (io_size(bio, rq) == 0) {
 		rw_exit(&zv->zv_suspend_lock);
 		END_IO(zv, bio, rq, 0);
 		return;
 	}
 
 	zfs_uio_bvec_init(&uio, bio, rq);
 
 	ssize_t start_resid = uio.uio_resid;
 
 	/*
 	 * With use_blk_mq, accounting is done by blk_mq_start_request()
 	 * and blk_mq_end_request(), so we can skip it here.
 	 */
 	if (bio) {
 		acct = blk_queue_io_stat(q);
 		if (acct) {
 			start_time = blk_generic_start_io_acct(q, disk, WRITE,
 			    bio);
 		}
 	}
 
 	boolean_t sync =
 	    io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
 
 	uint64_t volsize = zv->zv_volsize;
 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
 		uint64_t off = uio.uio_loffset;
 		dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
 
 		if (bytes > volsize - off)	/* don't write past the end */
 			bytes = volsize - off;
 
 		dmu_tx_hold_write_by_dnode(tx, zv->zv_dn, off, bytes);
 
 		/* This will only fail for ENOSPC */
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			break;
 		}
 		error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
 		if (error == 0) {
 			zvol_log_write(zv, tx, off, bytes, sync);
 		}
 		dmu_tx_commit(tx);
 
 		if (error)
 			break;
 	}
 	zfs_rangelock_exit(lr);
 
 	int64_t nwritten = start_resid - uio.uio_resid;
 	dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten);
 	task_io_account_write(nwritten);
 
 	if (sync)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 	rw_exit(&zv->zv_suspend_lock);
 
 	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
 	}
 
 	END_IO(zv, bio, rq, -error);
 }
 
 static void
 zvol_write_task(void *arg)
 {
 	zv_request_task_t *task = arg;
 	zvol_write(&task->zvr);
 	zv_request_task_free(task);
 }
 
 static void
 zvol_discard(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
 	struct request *rq = zvr->rq;
 	zvol_state_t *zv = zvr->zv;
 	uint64_t start = io_offset(bio, rq);
 	uint64_t size = io_size(bio, rq);
 	uint64_t end = start + size;
 	boolean_t sync;
 	int error = 0;
 	dmu_tx_t *tx;
 	struct request_queue *q = zv->zv_zso->zvo_queue;
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
 	unsigned long start_time = 0;
 	boolean_t acct = B_FALSE;
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
 	if (bio) {
 		acct = blk_queue_io_stat(q);
 		if (acct) {
 			start_time = blk_generic_start_io_acct(q, disk, WRITE,
 			    bio);
 		}
 	}
 
 	sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	if (end > zv->zv_volsize) {
 		error = SET_ERROR(EIO);
 		goto unlock;
 	}
 
 	/*
 	 * Align the request to volume block boundaries when a secure erase is
 	 * not required.  This will prevent dnode_free_range() from zeroing out
 	 * the unaligned parts which is slow (read-modify-write) and useless
 	 * since we are not freeing any space by doing so.
 	 */
 	if (!io_is_secure_erase(bio, rq)) {
 		start = P2ROUNDUP(start, zv->zv_volblocksize);
 		end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t);
 		size = end - start;
 	}
 
 	if (start >= end)
 		goto unlock;
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    start, size, RL_WRITER);
 
 	tx = dmu_tx_create(zv->zv_objset);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error != 0) {
 		dmu_tx_abort(tx);
 	} else {
 		zvol_log_truncate(zv, tx, start, size);
 		dmu_tx_commit(tx);
 		error = dmu_free_long_range(zv->zv_objset,
 		    ZVOL_OBJ, start, size);
 	}
 	zfs_rangelock_exit(lr);
 
 	if (error == 0 && sync)
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 unlock:
 	rw_exit(&zv->zv_suspend_lock);
 
 	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, WRITE, bio,
 		    start_time);
 	}
 
 	END_IO(zv, bio, rq, -error);
 }
 
 static void
 zvol_discard_task(void *arg)
 {
 	zv_request_task_t *task = arg;
 	zvol_discard(&task->zvr);
 	zv_request_task_free(task);
 }
 
 static void
 zvol_read(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
 	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
 	boolean_t acct = B_FALSE;
 	zvol_state_t *zv = zvr->zv;
 	struct request_queue *q;
 	struct gendisk *disk;
 	unsigned long start_time = 0;
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 
 	zfs_uio_bvec_init(&uio, bio, rq);
 
 	q = zv->zv_zso->zvo_queue;
 	disk = zv->zv_zso->zvo_disk;
 
 	ssize_t start_resid = uio.uio_resid;
 
 	/*
 	 * When blk-mq is being used, accounting is done by
 	 * blk_mq_start_request() and blk_mq_end_request().
 	 */
 	if (bio) {
 		acct = blk_queue_io_stat(q);
 		if (acct)
 			start_time = blk_generic_start_io_acct(q, disk, READ,
 			    bio);
 	}
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_READER);
 
 	uint64_t volsize = zv->zv_volsize;
 
 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
 
 		/* don't read past the end */
 		if (bytes > volsize - uio.uio_loffset)
 			bytes = volsize - uio.uio_loffset;
 
 		error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
 			break;
 		}
 	}
 	zfs_rangelock_exit(lr);
 
 	int64_t nread = start_resid - uio.uio_resid;
 	dataset_kstats_update_read_kstats(&zv->zv_kstat, nread);
 	task_io_account_read(nread);
 
 	rw_exit(&zv->zv_suspend_lock);
 
 	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
 	}
 
 	END_IO(zv, bio, rq, -error);
 }
 
 static void
 zvol_read_task(void *arg)
 {
 	zv_request_task_t *task = arg;
 	zvol_read(&task->zvr);
 	zv_request_task_free(task);
 }
 
 
 /*
  * Process a BIO or request
  *
  * Either 'bio' or 'rq' should be set depending on if we are processing a
  * bio or a request (both should not be set).
  *
  * force_sync:	Set to 0 to defer processing to a background taskq
  *			Set to 1 to process data synchronously
  */
 static void
 zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
     boolean_t force_sync)
 {
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	uint64_t offset = io_offset(bio, rq);
 	uint64_t size = io_size(bio, rq);
 	int rw = io_data_dir(bio, rq);
 
 	if (unlikely(zv->zv_flags & ZVOL_REMOVING)) {
 		END_IO(zv, bio, rq, -SET_ERROR(ENXIO));
 		goto out;
 	}
 
 	if (zvol_request_sync || zv->zv_threading == B_FALSE)
 		force_sync = 1;
 
 	zv_request_t zvr = {
 		.zv = zv,
 		.bio = bio,
 		.rq = rq,
 	};
 
 	if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
 		printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
 		    zv->zv_zso->zvo_disk->disk_name,
 		    (long long unsigned)offset,
 		    (long unsigned)size);
 
 		END_IO(zv, bio, rq, -SET_ERROR(EIO));
 		goto out;
 	}
 
 	zv_request_task_t *task;
 	zv_taskq_t *ztqs = &zvol_taskqs;
 	uint_t blk_mq_hw_queue = 0;
 	uint_t tq_idx;
 	uint_t taskq_hash;
 #ifdef HAVE_BLK_MQ
 	if (rq)
 #ifdef HAVE_BLK_MQ_RQ_HCTX
 		blk_mq_hw_queue = rq->mq_hctx->queue_num;
 #else
 		blk_mq_hw_queue =
 		    rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num;
 #endif
 #endif
 	taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
 	    blk_mq_hw_queue, 0);
 	tq_idx = taskq_hash % ztqs->tqs_cnt;
 
 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
 			END_IO(zv, bio, rq, -SET_ERROR(EROFS));
 			goto out;
 		}
 
 		/*
 		 * Prevents the zvol from being suspended, or the ZIL being
 		 * concurrently opened.  Will be released after the i/o
 		 * completes.
 		 */
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/*
 		 * Open a ZIL if this is the first time we have written to this
 		 * zvol. We protect zv->zv_zilog with zv_suspend_lock rather
 		 * than zv_state_lock so that we don't need to acquire an
 		 * additional lock in this path.
 		 */
 		if (zv->zv_zilog == NULL) {
 			rw_exit(&zv->zv_suspend_lock);
 			rw_enter(&zv->zv_suspend_lock, RW_WRITER);
 			if (zv->zv_zilog == NULL) {
 				zv->zv_zilog = zil_open(zv->zv_objset,
 				    zvol_get_data, &zv->zv_kstat.dk_zil_sums);
 				zv->zv_flags |= ZVOL_WRITTEN_TO;
 				/* replay / destroy done in zvol_create_minor */
 				VERIFY0((zv->zv_zilog->zl_header->zh_flags &
 				    ZIL_REPLAY_NEEDED));
 			}
 			rw_downgrade(&zv->zv_suspend_lock);
 		}
 
 		/*
 		 * We don't want this thread to be blocked waiting for i/o to
 		 * complete, so we instead wait from a taskq callback. The
 		 * i/o may be a ZIL write (via zil_commit()), or a read of an
 		 * indirect block, or a read of a data block (if this is a
 		 * partial-block write).  We will indicate that the i/o is
 		 * complete by calling END_IO() from the taskq callback.
 		 *
 		 * This design allows the calling thread to continue and
 		 * initiate more concurrent operations by calling
 		 * zvol_request() again. There are typically only a small
 		 * number of threads available to call zvol_request() (e.g.
 		 * one per iSCSI target), so keeping the latency of
 		 * zvol_request() low is important for performance.
 		 *
 		 * The zvol_request_sync module parameter allows this
 		 * behavior to be altered, for performance evaluation
 		 * purposes.  If the callback blocks, setting
 		 * zvol_request_sync=1 will result in much worse performance.
 		 *
 		 * We can have up to zvol_threads concurrent i/o's being
 		 * processed for all zvols on the system.  This is typically
 		 * a vast improvement over the zvol_request_sync=1 behavior
 		 * of one i/o at a time per zvol.  However, an even better
 		 * design would be for zvol_request() to initiate the zio
 		 * directly, and then be notified by the zio_done callback,
 		 * which would call END_IO().  Unfortunately, the DMU/ZIL
 		 * interfaces lack this functionality (they block waiting for
 		 * the i/o to complete).
 		 */
 		if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
 			if (force_sync) {
 				zvol_discard(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
 			if (force_sync) {
 				zvol_write(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
 				taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 				    zvol_write_task, task, 0, &task->ent);
 			}
 		}
 	} else {
 		/*
 		 * The SCST driver, and possibly others, may issue READ I/Os
 		 * with a length of zero bytes.  These empty I/Os contain no
 		 * data and require no additional handling.
 		 */
 		if (size == 0) {
 			END_IO(zv, bio, rq, 0);
 			goto out;
 		}
 
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/* See comment in WRITE case above. */
 		if (force_sync) {
 			zvol_read(&zvr);
 		} else {
 			task = zv_request_task_create(zvr);
 			taskq_dispatch_ent(ztqs->tqs_taskq[tq_idx],
 			    zvol_read_task, task, 0, &task->ent);
 		}
 	}
 
 out:
 	spl_fstrans_unmark(cookie);
 }
 
 #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
 #ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
 static void
 zvol_submit_bio(struct bio *bio)
 #else
 static blk_qc_t
 zvol_submit_bio(struct bio *bio)
 #endif
 #else
 static MAKE_REQUEST_FN_RET
 zvol_request(struct request_queue *q, struct bio *bio)
 #endif
 {
 #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
 #if defined(HAVE_BIO_BDEV_DISK)
 	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
 #else
 	struct request_queue *q = bio->bi_disk->queue;
 #endif
 #endif
 	zvol_state_t *zv = q->queuedata;
 
 	zvol_request_impl(zv, bio, NULL, 0);
 #if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
 	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
 	!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
 	return (BLK_QC_T_NONE);
 #endif
 }
 
 static int
 #ifdef HAVE_BLK_MODE_T
 zvol_open(struct gendisk *disk, blk_mode_t flag)
 #else
 zvol_open(struct block_device *bdev, fmode_t flag)
 #endif
 {
 	zvol_state_t *zv;
 	int error = 0;
 	boolean_t drop_suspend = B_FALSE;
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
 	hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms);
 	hrtime_t start = gethrtime();
 
 retry:
 #endif
 	rw_enter(&zvol_state_lock, RW_READER);
 	/*
 	 * Obtain a copy of private_data under the zvol_state_lock to make
 	 * sure that either the result of zvol free code path setting
 	 * disk->private_data to NULL is observed, or zvol_os_free()
 	 * is not called on this zv because of the positive zv_open_count.
 	 */
 #ifdef HAVE_BLK_MODE_T
 	zv = disk->private_data;
 #else
 	zv = bdev->bd_disk->private_data;
 #endif
 	if (zv == NULL) {
 		rw_exit(&zvol_state_lock);
 		return (-SET_ERROR(ENXIO));
 	}
 
 	mutex_enter(&zv->zv_state_lock);
 
 	if (unlikely(zv->zv_flags & ZVOL_REMOVING)) {
 		mutex_exit(&zv->zv_state_lock);
 		rw_exit(&zvol_state_lock);
 		return (-SET_ERROR(ENXIO));
 	}
 
 	/*
 	 * Make sure zvol is not suspended during first open
 	 * (hold zv_suspend_lock) and respect proper lock acquisition
 	 * ordering - zv_suspend_lock before zv_state_lock
 	 */
 	if (zv->zv_open_count == 0) {
 		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
 			mutex_exit(&zv->zv_state_lock);
 			rw_enter(&zv->zv_suspend_lock, RW_READER);
 			mutex_enter(&zv->zv_state_lock);
 			/* check to see if zv_suspend_lock is needed */
 			if (zv->zv_open_count != 0) {
 				rw_exit(&zv->zv_suspend_lock);
 			} else {
 				drop_suspend = B_TRUE;
 			}
 		} else {
 			drop_suspend = B_TRUE;
 		}
 	}
 	rw_exit(&zvol_state_lock);
 
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 
 	if (zv->zv_open_count == 0) {
 		boolean_t drop_namespace = B_FALSE;
 
 		ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 
 		/*
 		 * In all other call paths the spa_namespace_lock is taken
 		 * before the bdev->bd_mutex lock.  However, on open(2)
 		 * the __blkdev_get() function calls fops->open() with the
 		 * bdev->bd_mutex lock held.  This can result in a deadlock
 		 * when zvols from one pool are used as vdevs in another.
 		 *
 		 * To prevent a lock inversion deadlock we preemptively
 		 * take the spa_namespace_lock.  Normally the lock will not
 		 * be contended and this is safe because spa_open_common()
 		 * handles the case where the caller already holds the
 		 * spa_namespace_lock.
 		 *
 		 * When the lock cannot be aquired after multiple retries
 		 * this must be the vdev on zvol deadlock case and we have
 		 * no choice but to return an error.  For 5.12 and older
 		 * kernels returning -ERESTARTSYS will result in the
 		 * bdev->bd_mutex being dropped, then reacquired, and
 		 * fops->open() being called again.  This process can be
 		 * repeated safely until both locks are acquired.  For 5.13
 		 * and newer the -ERESTARTSYS retry logic was removed from
 		 * the kernel so the only option is to return the error for
 		 * the caller to handle it.
 		 */
 		if (!mutex_owned(&spa_namespace_lock)) {
 			if (!mutex_tryenter(&spa_namespace_lock)) {
 				mutex_exit(&zv->zv_state_lock);
 				rw_exit(&zv->zv_suspend_lock);
 				drop_suspend = B_FALSE;
 
 #ifdef HAVE_BLKDEV_GET_ERESTARTSYS
 				schedule();
 				return (-SET_ERROR(ERESTARTSYS));
 #else
 				if ((gethrtime() - start) > timeout)
 					return (-SET_ERROR(ERESTARTSYS));
 
 				schedule_timeout_interruptible(
 					MSEC_TO_TICK(10));
 				goto retry;
 #endif
 			} else {
 				drop_namespace = B_TRUE;
 			}
 		}
 
 		error = -zvol_first_open(zv, !(blk_mode_is_open_write(flag)));
 
 		if (drop_namespace)
 			mutex_exit(&spa_namespace_lock);
 	}
 
 	if (error == 0) {
 		if ((blk_mode_is_open_write(flag)) &&
 		    (zv->zv_flags & ZVOL_RDONLY)) {
 			if (zv->zv_open_count == 0)
 				zvol_last_close(zv);
 
 			error = -SET_ERROR(EROFS);
 		} else {
 			zv->zv_open_count++;
 		}
 	}
 
 	mutex_exit(&zv->zv_state_lock);
 	if (drop_suspend)
 		rw_exit(&zv->zv_suspend_lock);
 
 	if (error == 0)
 #ifdef HAVE_BLK_MODE_T
 		disk_check_media_change(disk);
 #else
 		zfs_check_media_change(bdev);
 #endif
 
 	return (error);
 }
 
 static void
 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG
 zvol_release(struct gendisk *disk)
 #else
 zvol_release(struct gendisk *disk, fmode_t unused)
 #endif
 {
 #if !defined(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG)
 	(void) unused;
 #endif
 	zvol_state_t *zv;
 	boolean_t drop_suspend = B_TRUE;
 
 	rw_enter(&zvol_state_lock, RW_READER);
 	zv = disk->private_data;
 
 	mutex_enter(&zv->zv_state_lock);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	/*
 	 * make sure zvol is not suspended during last close
 	 * (hold zv_suspend_lock) and respect proper lock acquisition
 	 * ordering - zv_suspend_lock before zv_state_lock
 	 */
 	if (zv->zv_open_count == 1) {
 		if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
 			mutex_exit(&zv->zv_state_lock);
 			rw_enter(&zv->zv_suspend_lock, RW_READER);
 			mutex_enter(&zv->zv_state_lock);
 			/* check to see if zv_suspend_lock is needed */
 			if (zv->zv_open_count != 1) {
 				rw_exit(&zv->zv_suspend_lock);
 				drop_suspend = B_FALSE;
 			}
 		}
 	} else {
 		drop_suspend = B_FALSE;
 	}
 	rw_exit(&zvol_state_lock);
 
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 
 	zv->zv_open_count--;
 	if (zv->zv_open_count == 0) {
 		ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 		zvol_last_close(zv);
 	}
 
 	mutex_exit(&zv->zv_state_lock);
 
 	if (drop_suspend)
 		rw_exit(&zv->zv_suspend_lock);
 }
 
 static int
 zvol_ioctl(struct block_device *bdev, fmode_t mode,
     unsigned int cmd, unsigned long arg)
 {
 	zvol_state_t *zv = bdev->bd_disk->private_data;
 	int error = 0;
 
 	ASSERT3U(zv->zv_open_count, >, 0);
 
 	switch (cmd) {
 	case BLKFLSBUF:
 #ifdef HAVE_FSYNC_BDEV
 		fsync_bdev(bdev);
 #elif defined(HAVE_SYNC_BLOCKDEV)
 		sync_blockdev(bdev);
 #else
 #error "Neither fsync_bdev() nor sync_blockdev() found"
 #endif
 		invalidate_bdev(bdev);
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		if (!(zv->zv_flags & ZVOL_RDONLY))
 			txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
 
 		rw_exit(&zv->zv_suspend_lock);
 		break;
 
 	case BLKZNAME:
 		mutex_enter(&zv->zv_state_lock);
 		error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
 		mutex_exit(&zv->zv_state_lock);
 		break;
 
 	default:
 		error = -ENOTTY;
 		break;
 	}
 
 	return (SET_ERROR(error));
 }
 
 #ifdef CONFIG_COMPAT
 static int
 zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
     unsigned cmd, unsigned long arg)
 {
 	return (zvol_ioctl(bdev, mode, cmd, arg));
 }
 #else
 #define	zvol_compat_ioctl	NULL
 #endif
 
 static unsigned int
 zvol_check_events(struct gendisk *disk, unsigned int clearing)
 {
 	unsigned int mask = 0;
 
 	rw_enter(&zvol_state_lock, RW_READER);
 
 	zvol_state_t *zv = disk->private_data;
 	if (zv != NULL) {
 		mutex_enter(&zv->zv_state_lock);
 		mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
 		zv->zv_changed = 0;
 		mutex_exit(&zv->zv_state_lock);
 	}
 
 	rw_exit(&zvol_state_lock);
 
 	return (mask);
 }
 
 static int
 zvol_revalidate_disk(struct gendisk *disk)
 {
 	rw_enter(&zvol_state_lock, RW_READER);
 
 	zvol_state_t *zv = disk->private_data;
 	if (zv != NULL) {
 		mutex_enter(&zv->zv_state_lock);
 		set_capacity(zv->zv_zso->zvo_disk,
 		    zv->zv_volsize >> SECTOR_BITS);
 		mutex_exit(&zv->zv_state_lock);
 	}
 
 	rw_exit(&zvol_state_lock);
 
 	return (0);
 }
 
 int
 zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize)
 {
 	struct gendisk *disk = zv->zv_zso->zvo_disk;
 
 #if defined(HAVE_REVALIDATE_DISK_SIZE)
 	revalidate_disk_size(disk, zvol_revalidate_disk(disk) == 0);
 #elif defined(HAVE_REVALIDATE_DISK)
 	revalidate_disk(disk);
 #else
 	zvol_revalidate_disk(disk);
 #endif
 	return (0);
 }
 
 void
 zvol_os_clear_private(zvol_state_t *zv)
 {
 	/*
 	 * Cleared while holding zvol_state_lock as a writer
 	 * which will prevent zvol_open() from opening it.
 	 */
 	zv->zv_zso->zvo_disk->private_data = NULL;
 }
 
 /*
  * Provide a simple virtual geometry for legacy compatibility.  For devices
  * smaller than 1 MiB a small head and sector count is used to allow very
  * tiny devices.  For devices over 1 Mib a standard head and sector count
  * is used to keep the cylinders count reasonable.
  */
 static int
 zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
 	zvol_state_t *zv = bdev->bd_disk->private_data;
 	sector_t sectors;
 
 	ASSERT3U(zv->zv_open_count, >, 0);
 
 	sectors = get_capacity(zv->zv_zso->zvo_disk);
 
 	if (sectors > 2048) {
 		geo->heads = 16;
 		geo->sectors = 63;
 	} else {
 		geo->heads = 2;
 		geo->sectors = 4;
 	}
 
 	geo->start = 0;
 	geo->cylinders = sectors / (geo->heads * geo->sectors);
 
 	return (0);
 }
 
 /*
  * Why have two separate block_device_operations structs?
  *
  * Normally we'd just have one, and assign 'submit_bio' as needed.  However,
  * it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
  * can't just change submit_bio dynamically at runtime.  So just create two
  * separate structs to get around this.
  */
 static const struct block_device_operations zvol_ops_blk_mq = {
 	.open			= zvol_open,
 	.release		= zvol_release,
 	.ioctl			= zvol_ioctl,
 	.compat_ioctl		= zvol_compat_ioctl,
 	.check_events		= zvol_check_events,
 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 	.revalidate_disk	= zvol_revalidate_disk,
 #endif
 	.getgeo			= zvol_getgeo,
 	.owner			= THIS_MODULE,
 };
 
 static const struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
 	.ioctl			= zvol_ioctl,
 	.compat_ioctl		= zvol_compat_ioctl,
 	.check_events		= zvol_check_events,
 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
 	.revalidate_disk	= zvol_revalidate_disk,
 #endif
 	.getgeo			= zvol_getgeo,
 	.owner			= THIS_MODULE,
 #ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
 	.submit_bio		= zvol_submit_bio,
 #endif
 };
 
+/*
+ * Since 6.9, Linux has been removing queue limit setters in favour of an
+ * initial queue_limits struct applied when the device is open. Since 6.11,
+ * queue_limits is being extended to allow more things to be applied when the
+ * device is open. Setters are also being removed for this.
+ *
+ * For OpenZFS, this means that depending on kernel version, some options may
+ * be set up before the device is open, and some applied to an open device
+ * (queue) after the fact.
+ *
+ * We manage this complexity by having our own limits struct,
+ * zvol_queue_limits_t, in which we carry any queue config that we're
+ * interested in setting. This structure is the same on all kernels.
+ *
+ * These limits are then applied to the queue at device open time by the most
+ * appropriate method for the kernel.
+ *
+ * zvol_queue_limits_convert() is used on 6.9+ (where the two-arg form of
+ * blk_alloc_disk() exists). This converts our limits struct to a proper Linux
+ * struct queue_limits, and passes it in. Any fields added in later kernels are
+ * (obviously) not set up here.
+ *
+ * zvol_queue_limits_apply() is called on all kernel versions after the queue
+ * is created, and applies any remaining config. Before 6.9 that will be
+ * everything, via setter methods. After 6.9 that will be whatever couldn't be
+ * put into struct queue_limits. (This implies that zvol_queue_limits_apply()
+ * will always be a no-op on the latest kernel we support).
+ */
 typedef struct zvol_queue_limits {
 	unsigned int	zql_max_hw_sectors;
 	unsigned short	zql_max_segments;
 	unsigned int	zql_max_segment_size;
 	unsigned int	zql_io_opt;
+	unsigned int	zql_physical_block_size;
+	unsigned int	zql_max_discard_sectors;
+	unsigned int	zql_discard_granularity;
 } zvol_queue_limits_t;
 
 static void
 zvol_queue_limits_init(zvol_queue_limits_t *limits, zvol_state_t *zv,
     boolean_t use_blk_mq)
 {
 	limits->zql_max_hw_sectors = (DMU_MAX_ACCESS / 4) >> 9;
 
 	if (use_blk_mq) {
 		/*
 		 * IO requests can be really big (1MB).  When an IO request
 		 * comes in, it is passed off to zvol_read() or zvol_write()
 		 * in a new thread, where it is chunked up into 'volblocksize'
 		 * sized pieces and processed.  So for example, if the request
 		 * is a 1MB write and your volblocksize is 128k, one zvol_write
 		 * thread will take that request and sequentially do ten 128k
 		 * IOs.  This is due to the fact that the thread needs to lock
 		 * each volblocksize sized block.  So you might be wondering:
 		 * "instead of passing the whole 1MB request to one thread,
 		 * why not pass ten individual 128k chunks to ten threads and
 		 * process the whole write in parallel?"  The short answer is
 		 * that there's a sweet spot number of chunks that balances
 		 * the greater parallelism with the added overhead of more
 		 * threads. The sweet spot can be different depending on if you
 		 * have a read or write  heavy workload.  Writes typically want
 		 * high chunk counts while reads typically want lower ones.  On
 		 * a test pool with 6 NVMe drives in a 3x 2-disk mirror
 		 * configuration, with volblocksize=8k, the sweet spot for good
 		 * sequential reads and writes was at 8 chunks.
 		 */
 
 		/*
 		 * Below we tell the kernel how big we want our requests
 		 * to be.  You would think that blk_queue_io_opt() would be
 		 * used to do this since it is used to "set optimal request
 		 * size for the queue", but that doesn't seem to do
 		 * anything - the kernel still gives you huge requests
 		 * with tons of little PAGE_SIZE segments contained within it.
 		 *
 		 * Knowing that the kernel will just give you PAGE_SIZE segments
 		 * no matter what, you can say "ok, I want PAGE_SIZE byte
 		 * segments, and I want 'N' of them per request", where N is
 		 * the correct number of segments for the volblocksize and
 		 * number of chunks you want.
 		 */
 #ifdef HAVE_BLK_MQ
 		if (zvol_blk_mq_blocks_per_thread != 0) {
 			unsigned int chunks;
 			chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
 
 			limits->zql_max_segment_size = PAGE_SIZE;
 			limits->zql_max_segments =
 			    (zv->zv_volblocksize * chunks) / PAGE_SIZE;
 		} else {
 			/*
 			 * Special case: zvol_blk_mq_blocks_per_thread = 0
 			 * Max everything out.
 			 */
 			limits->zql_max_segments = UINT16_MAX;
 			limits->zql_max_segment_size = UINT_MAX;
 		}
 	} else {
 #endif
 		limits->zql_max_segments = UINT16_MAX;
 		limits->zql_max_segment_size = UINT_MAX;
 	}
 
 	limits->zql_io_opt = zv->zv_volblocksize;
+
+	limits->zql_physical_block_size = zv->zv_volblocksize;
+	limits->zql_max_discard_sectors =
+	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9;
+	limits->zql_discard_granularity = zv->zv_volblocksize;
 }
 
 #ifdef HAVE_BLK_ALLOC_DISK_2ARG
 static void
 zvol_queue_limits_convert(zvol_queue_limits_t *limits,
     struct queue_limits *qlimits)
 {
 	memset(qlimits, 0, sizeof (struct queue_limits));
 	qlimits->max_hw_sectors = limits->zql_max_hw_sectors;
 	qlimits->max_segments = limits->zql_max_segments;
 	qlimits->max_segment_size = limits->zql_max_segment_size;
 	qlimits->io_opt = limits->zql_io_opt;
+	qlimits->physical_block_size = limits->zql_physical_block_size;
+	qlimits->max_discard_sectors = limits->zql_max_discard_sectors;
+	qlimits->discard_granularity = limits->zql_discard_granularity;
+#ifdef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES
+	qlimits->features =
+	    BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_IO_STAT;
+#endif
 }
-#else
+#endif
+
 static void
 zvol_queue_limits_apply(zvol_queue_limits_t *limits,
     struct request_queue *queue)
 {
+#ifndef HAVE_BLK_ALLOC_DISK_2ARG
 	blk_queue_max_hw_sectors(queue, limits->zql_max_hw_sectors);
 	blk_queue_max_segments(queue, limits->zql_max_segments);
 	blk_queue_max_segment_size(queue, limits->zql_max_segment_size);
 	blk_queue_io_opt(queue, limits->zql_io_opt);
-}
+	blk_queue_physical_block_size(queue, limits->zql_physical_block_size);
+	blk_queue_max_discard_sectors(queue, limits->zql_max_discard_sectors);
+	blk_queue_discard_granularity(queue, limits->zql_discard_granularity);
+#endif
+#ifndef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES
+	blk_queue_set_write_cache(queue, B_TRUE);
+	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, queue);
 #endif
+}
 
 static int
 zvol_alloc_non_blk_mq(struct zvol_state_os *zso, zvol_queue_limits_t *limits)
 {
 #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
 #if defined(HAVE_BLK_ALLOC_DISK)
 	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
 	if (zso->zvo_disk == NULL)
 		return (1);
 
 	zso->zvo_disk->minors = ZVOL_MINORS;
 	zso->zvo_queue = zso->zvo_disk->queue;
 	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
 	struct queue_limits qlimits;
 	zvol_queue_limits_convert(limits, &qlimits);
 	struct gendisk *disk = blk_alloc_disk(&qlimits, NUMA_NO_NODE);
 	if (IS_ERR(disk)) {
 		zso->zvo_disk = NULL;
 		return (1);
 	}
 
+#ifndef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES
+	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE);
+#endif
+
 	zso->zvo_disk = disk;
 	zso->zvo_disk->minors = ZVOL_MINORS;
 	zso->zvo_queue = zso->zvo_disk->queue;
 #else
 	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
 	if (zso->zvo_queue == NULL)
 		return (1);
 
 	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
 	if (zso->zvo_disk == NULL) {
 		blk_cleanup_queue(zso->zvo_queue);
 		return (1);
 	}
 
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif /* HAVE_BLK_ALLOC_DISK */
 #else
 	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
 	if (zso->zvo_queue == NULL)
 		return (1);
 
 	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
 	if (zso->zvo_disk == NULL) {
 		blk_cleanup_queue(zso->zvo_queue);
 		return (1);
 	}
 
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+
+	zvol_queue_limits_apply(limits, zso->zvo_queue);
+
 	return (0);
 
 }
 
 static int
 zvol_alloc_blk_mq(zvol_state_t *zv, zvol_queue_limits_t *limits)
 {
 #ifdef HAVE_BLK_MQ
 	struct zvol_state_os *zso = zv->zv_zso;
 
 	/* Allocate our blk-mq tag_set */
 	if (zvol_blk_mq_alloc_tag_set(zv) != 0)
 		return (1);
 
 #if defined(HAVE_BLK_ALLOC_DISK)
 	zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
 	if (zso->zvo_disk == NULL) {
 		blk_mq_free_tag_set(&zso->tag_set);
 		return (1);
 	}
 	zso->zvo_queue = zso->zvo_disk->queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
 	zso->zvo_disk->minors = ZVOL_MINORS;
 #elif defined(HAVE_BLK_ALLOC_DISK_2ARG)
 	struct queue_limits qlimits;
 	zvol_queue_limits_convert(limits, &qlimits);
 	struct gendisk *disk = blk_mq_alloc_disk(&zso->tag_set, &qlimits, zv);
 	if (IS_ERR(disk)) {
 		zso->zvo_disk = NULL;
 		blk_mq_free_tag_set(&zso->tag_set);
 		return (1);
 	}
 
 	zso->zvo_disk = disk;
 	zso->zvo_queue = zso->zvo_disk->queue;
 	zso->zvo_disk->minors = ZVOL_MINORS;
 #else
 	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
 	if (zso->zvo_disk == NULL) {
 		blk_cleanup_queue(zso->zvo_queue);
 		blk_mq_free_tag_set(&zso->tag_set);
 		return (1);
 	}
 	/* Allocate queue */
 	zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
 	if (IS_ERR(zso->zvo_queue)) {
 		blk_mq_free_tag_set(&zso->tag_set);
 		return (1);
 	}
 
 	/* Our queue is now created, assign it to our disk */
 	zso->zvo_disk->queue = zso->zvo_queue;
-	zvol_queue_limits_apply(limits, zso->zvo_queue);
-
 #endif
+
+	zvol_queue_limits_apply(limits, zso->zvo_queue);
 #endif
+
 	return (0);
 }
 
 /*
  * Allocate memory for a new zvol_state_t and setup the required
  * request queue and generic disk structures for the block device.
  */
 static zvol_state_t *
 zvol_alloc(dev_t dev, const char *name)
 {
 	zvol_state_t *zv;
 	struct zvol_state_os *zso;
 	uint64_t volmode;
 	int ret;
 
 	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
 		return (NULL);
 
 	if (volmode == ZFS_VOLMODE_DEFAULT)
 		volmode = zvol_volmode;
 
 	if (volmode == ZFS_VOLMODE_NONE)
 		return (NULL);
 
 	zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
 	zso = kmem_zalloc(sizeof (struct zvol_state_os), KM_SLEEP);
 	zv->zv_zso = zso;
 	zv->zv_volmode = volmode;
 
 	list_link_init(&zv->zv_next);
 	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&zv->zv_removing_cv, NULL, CV_DEFAULT, NULL);
 
 #ifdef HAVE_BLK_MQ
 	zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
 #endif
 
 	zvol_queue_limits_t limits;
 	zvol_queue_limits_init(&limits, zv, zv->zv_zso->use_blk_mq);
 
 	/*
 	 * The block layer has 3 interfaces for getting BIOs:
 	 *
 	 * 1. blk-mq request queues (new)
 	 * 2. submit_bio() (oldest)
 	 * 3. regular request queues (old).
 	 *
 	 * Each of those interfaces has two permutations:
 	 *
 	 * a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
 	 *    both the disk and its queue (5.14 kernel or newer)
 	 *
 	 * b) We don't have blk_*alloc_disk(), and have to allocate the
 	 *    disk and the queue separately. (5.13 kernel or older)
 	 */
 	if (zv->zv_zso->use_blk_mq) {
 		ret = zvol_alloc_blk_mq(zv, &limits);
 		zso->zvo_disk->fops = &zvol_ops_blk_mq;
 	} else {
 		ret = zvol_alloc_non_blk_mq(zso, &limits);
 		zso->zvo_disk->fops = &zvol_ops;
 	}
 	if (ret != 0)
 		goto out_kmem;
 
-	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
-
 	/* Limit read-ahead to a single page to prevent over-prefetching. */
 	blk_queue_set_read_ahead(zso->zvo_queue, 1);
 
 	if (!zv->zv_zso->use_blk_mq) {
 		/* Disable write merging in favor of the ZIO pipeline. */
 		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
 	}
 
-	/* Enable /proc/diskstats */
-	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
-
 	zso->zvo_queue->queuedata = zv;
 	zso->zvo_dev = dev;
 	zv->zv_open_count = 0;
 	strlcpy(zv->zv_name, name, sizeof (zv->zv_name));
 
 	zfs_rangelock_init(&zv->zv_rangelock, NULL, NULL);
 	rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
 
 	zso->zvo_disk->major = zvol_major;
 	zso->zvo_disk->events = DISK_EVENT_MEDIA_CHANGE;
 
 	/*
 	 * Setting ZFS_VOLMODE_DEV disables partitioning on ZVOL devices.
 	 * This is accomplished by limiting the number of minors for the
 	 * device to one and explicitly disabling partition scanning.
 	 */
 	if (volmode == ZFS_VOLMODE_DEV) {
 		zso->zvo_disk->minors = 1;
 		zso->zvo_disk->flags &= ~ZFS_GENHD_FL_EXT_DEVT;
 		zso->zvo_disk->flags |= ZFS_GENHD_FL_NO_PART;
 	}
 
 	zso->zvo_disk->first_minor = (dev & MINORMASK);
 	zso->zvo_disk->private_data = zv;
 	snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
 	    ZVOL_DEV_NAME, (dev & MINORMASK));
 
 	return (zv);
 
 out_kmem:
 	kmem_free(zso, sizeof (struct zvol_state_os));
 	kmem_free(zv, sizeof (zvol_state_t));
 	return (NULL);
 }
 
 /*
  * Cleanup then free a zvol_state_t which was created by zvol_alloc().
  * At this time, the structure is not opened by anyone, is taken off
  * the zvol_state_list, and has its private data set to NULL.
  * The zvol_state_lock is dropped.
  *
  * This function may take many milliseconds to complete (e.g. we've seen
  * it take over 256ms), due to the calls to "blk_cleanup_queue" and
  * "del_gendisk". Thus, consumers need to be careful to account for this
  * latency when calling this function.
  */
 void
 zvol_os_free(zvol_state_t *zv)
 {
 
 	ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
 	ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT0(zv->zv_open_count);
 	ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL);
 
 	rw_destroy(&zv->zv_suspend_lock);
 	zfs_rangelock_fini(&zv->zv_rangelock);
 
 	del_gendisk(zv->zv_zso->zvo_disk);
 #if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
 	(defined(HAVE_BLK_ALLOC_DISK) || defined(HAVE_BLK_ALLOC_DISK_2ARG))
 #if defined(HAVE_BLK_CLEANUP_DISK)
 	blk_cleanup_disk(zv->zv_zso->zvo_disk);
 #else
 	put_disk(zv->zv_zso->zvo_disk);
 #endif
 #else
 	blk_cleanup_queue(zv->zv_zso->zvo_queue);
 	put_disk(zv->zv_zso->zvo_disk);
 #endif
 
 #ifdef HAVE_BLK_MQ
 	if (zv->zv_zso->use_blk_mq)
 		blk_mq_free_tag_set(&zv->zv_zso->tag_set);
 #endif
 
 	ida_simple_remove(&zvol_ida,
 	    MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
 
 	cv_destroy(&zv->zv_removing_cv);
 	mutex_destroy(&zv->zv_state_lock);
 	dataset_kstats_destroy(&zv->zv_kstat);
 
 	kmem_free(zv->zv_zso, sizeof (struct zvol_state_os));
 	kmem_free(zv, sizeof (zvol_state_t));
 }
 
 void
 zvol_wait_close(zvol_state_t *zv)
 {
 }
 
 struct add_disk_work {
 	struct delayed_work work;
 	struct gendisk *disk;
 	int error;
 };
 
 static int
 __zvol_os_add_disk(struct gendisk *disk)
 {
 	int error = 0;
 #ifdef HAVE_ADD_DISK_RET
 	error = add_disk(disk);
 #else
 	add_disk(disk);
 #endif
 	return (error);
 }
 
 #if defined(HAVE_BDEV_FILE_OPEN_BY_PATH)
 static void
 zvol_os_add_disk_work(struct work_struct *work)
 {
 	struct add_disk_work *add_disk_work;
 	add_disk_work = container_of(work, struct add_disk_work, work.work);
 	add_disk_work->error = __zvol_os_add_disk(add_disk_work->disk);
 }
 #endif
 
 /*
  * SPECIAL CASE:
  *
  * This function basically calls add_disk() from a workqueue.   You may be
  * thinking: why not just call add_disk() directly?
  *
  * When you call add_disk(), the zvol appears to the world.  When this happens,
  * the kernel calls disk_scan_partitions() on the zvol, which behaves
  * differently on the 6.9+ kernels:
  *
  * - 6.8 and older kernels -
  * disk_scan_partitions()
  *	handle = bdev_open_by_dev(
  *		zvol_open()
  *	bdev_release(handle);
  *		zvol_release()
  *
  *
  * - 6.9+ kernels -
  * disk_scan_partitions()
  * 	file = bdev_file_open_by_dev()
  *		zvol_open()
  *	fput(file)
  *	< wait for return to userspace >
  *		zvol_release()
  *
  * The difference is that the bdev_release() from the 6.8 kernel is synchronous
  * while the fput() from the 6.9 kernel is async.  Or more specifically it's
  * async that has to wait until we return to userspace (since it adds the fput
  * into the caller's work queue with the TWA_RESUME flag set).  This is not the
  * behavior we want, since we want do things like create+destroy a zvol within
  * a single ZFS_IOC_CREATE ioctl, and the "create" part needs to release the
  * reference to the zvol while we're in the IOCTL, which can't wait until we
  * return to userspace.
  *
  * We can get around this since fput() has a special codepath for when it's
  * running in a kernel thread or interrupt.  In those cases, it just puts the
  * fput into the system workqueue, which we can force to run with
  * __flush_workqueue().  That is why we call add_disk() from a workqueue - so it
  * run from a kernel thread and "tricks" the fput() codepaths.
  *
  * Note that __flush_workqueue() is slowly getting deprecated.  This may be ok
  * though, since our IOCTL will spin on EBUSY waiting for the zvol release (via
  * fput) to happen, which it eventually, naturally, will from the system_wq
  * without us explicitly calling __flush_workqueue().
  */
 static int
 zvol_os_add_disk(struct gendisk *disk)
 {
 #if defined(HAVE_BDEV_FILE_OPEN_BY_PATH)	/* 6.9+ kernel */
 	struct add_disk_work add_disk_work;
 
 	INIT_DELAYED_WORK(&add_disk_work.work, zvol_os_add_disk_work);
 	add_disk_work.disk = disk;
 	add_disk_work.error = 0;
 
 	/* Use *_delayed_work functions since they're not GPL'd */
 	schedule_delayed_work(&add_disk_work.work, 0);
 	flush_delayed_work(&add_disk_work.work);
 
 	__flush_workqueue(system_wq);
 	return (add_disk_work.error);
 #else	/* <= 6.8 kernel */
 	return (__zvol_os_add_disk(disk));
 #endif
 }
 
 /*
  * Create a block device minor node and setup the linkage between it
  * and the specified volume.  Once this function returns the block
  * device is live and ready for use.
  */
 int
 zvol_os_create_minor(const char *name)
 {
 	zvol_state_t *zv;
 	objset_t *os;
 	dmu_object_info_t *doi;
 	uint64_t volsize;
 	uint64_t len;
 	unsigned minor = 0;
 	int error = 0;
 	int idx;
 	uint64_t hash = zvol_name_hash(name);
 	uint64_t volthreading;
 	bool replayed_zil = B_FALSE;
 
 	if (zvol_inhibit_dev)
 		return (0);
 
 	idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
 	if (idx < 0)
 		return (SET_ERROR(-idx));
 	minor = idx << ZVOL_MINOR_BITS;
 	if (MINOR(minor) != minor) {
 		/* too many partitions can cause an overflow */
 		zfs_dbgmsg("zvol: create minor overflow: %s, minor %u/%u",
 		    name, minor, MINOR(minor));
 		ida_simple_remove(&zvol_ida, idx);
 		return (SET_ERROR(EINVAL));
 	}
 
 	zv = zvol_find_by_name_hash(name, hash, RW_NONE);
 	if (zv) {
 		ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 		mutex_exit(&zv->zv_state_lock);
 		ida_simple_remove(&zvol_ida, idx);
 		return (SET_ERROR(EEXIST));
 	}
 
 	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
 
 	error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, B_TRUE, FTAG, &os);
 	if (error)
 		goto out_doi;
 
 	error = dmu_object_info(os, ZVOL_OBJ, doi);
 	if (error)
 		goto out_dmu_objset_disown;
 
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 	if (error)
 		goto out_dmu_objset_disown;
 
 	zv = zvol_alloc(MKDEV(zvol_major, minor), name);
 	if (zv == NULL) {
 		error = SET_ERROR(EAGAIN);
 		goto out_dmu_objset_disown;
 	}
 	zv->zv_hash = hash;
 
 	if (dmu_objset_is_snapshot(os))
 		zv->zv_flags |= ZVOL_RDONLY;
 
 	zv->zv_volblocksize = doi->doi_data_block_size;
 	zv->zv_volsize = volsize;
 	zv->zv_objset = os;
 
 	/* Default */
 	zv->zv_threading = B_TRUE;
 	if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL)
 	    == 0)
 		zv->zv_threading = volthreading;
 
 	set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9);
 
-
-
-	blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
-	    zv->zv_volblocksize);
-	blk_queue_max_discard_sectors(zv->zv_zso->zvo_queue,
-	    (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
-	blk_queue_discard_granularity(zv->zv_zso->zvo_queue,
-	    zv->zv_volblocksize);
 #ifdef QUEUE_FLAG_DISCARD
 	blk_queue_flag_set(QUEUE_FLAG_DISCARD, zv->zv_zso->zvo_queue);
 #endif
 #ifdef QUEUE_FLAG_NONROT
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, zv->zv_zso->zvo_queue);
 #endif
 #ifdef QUEUE_FLAG_ADD_RANDOM
 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zv->zv_zso->zvo_queue);
 #endif
 	/* This flag was introduced in kernel version 4.12. */
 #ifdef QUEUE_FLAG_SCSI_PASSTHROUGH
 	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, zv->zv_zso->zvo_queue);
 #endif
 
 	ASSERT3P(zv->zv_kstat.dk_kstats, ==, NULL);
 	error = dataset_kstats_create(&zv->zv_kstat, zv->zv_objset);
 	if (error)
 		goto out_dmu_objset_disown;
 	ASSERT3P(zv->zv_zilog, ==, NULL);
 	zv->zv_zilog = zil_open(os, zvol_get_data, &zv->zv_kstat.dk_zil_sums);
 	if (spa_writeable(dmu_objset_spa(os))) {
 		if (zil_replay_disable)
 			replayed_zil = zil_destroy(zv->zv_zilog, B_FALSE);
 		else
 			replayed_zil = zil_replay(os, zv, zvol_replay_vector);
 	}
 	if (replayed_zil)
 		zil_close(zv->zv_zilog);
 	zv->zv_zilog = NULL;
 
 	/*
 	 * When udev detects the addition of the device it will immediately
 	 * invoke blkid(8) to determine the type of content on the device.
 	 * Prefetching the blocks commonly scanned by blkid(8) will speed
 	 * up this process.
 	 */
 	len = MIN(zvol_prefetch_bytes, SPA_MAXBLOCKSIZE);
 	if (len > 0) {
 		dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
 		dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
 		    ZIO_PRIORITY_SYNC_READ);
 	}
 
 	zv->zv_objset = NULL;
 out_dmu_objset_disown:
 	dmu_objset_disown(os, B_TRUE, FTAG);
 out_doi:
 	kmem_free(doi, sizeof (dmu_object_info_t));
 
 	/*
 	 * Keep in mind that once add_disk() is called, the zvol is
 	 * announced to the world, and zvol_open()/zvol_release() can
 	 * be called at any time. Incidentally, add_disk() itself calls
 	 * zvol_open()->zvol_first_open() and zvol_release()->zvol_last_close()
 	 * directly as well.
 	 */
 	if (error == 0) {
 		rw_enter(&zvol_state_lock, RW_WRITER);
 		zvol_insert(zv);
 		rw_exit(&zvol_state_lock);
 		error = zvol_os_add_disk(zv->zv_zso->zvo_disk);
 	} else {
 		ida_simple_remove(&zvol_ida, idx);
 	}
 
 	return (error);
 }
 
 void
 zvol_os_rename_minor(zvol_state_t *zv, const char *newname)
 {
 	int readonly = get_disk_ro(zv->zv_zso->zvo_disk);
 
 	ASSERT(RW_LOCK_HELD(&zvol_state_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 
 	strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
 
 	/* move to new hashtable entry  */
 	zv->zv_hash = zvol_name_hash(newname);
 	hlist_del(&zv->zv_hlink);
 	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
 
 	/*
 	 * The block device's read-only state is briefly changed causing
 	 * a KOBJ_CHANGE uevent to be issued.  This ensures udev detects
 	 * the name change and fixes the symlinks.  This does not change
 	 * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
 	 * changes.  This would normally be done using kobject_uevent() but
 	 * that is a GPL-only symbol which is why we need this workaround.
 	 */
 	set_disk_ro(zv->zv_zso->zvo_disk, !readonly);
 	set_disk_ro(zv->zv_zso->zvo_disk, readonly);
 
 	dataset_kstats_rename(&zv->zv_kstat, newname);
 }
 
 void
 zvol_os_set_disk_ro(zvol_state_t *zv, int flags)
 {
 
 	set_disk_ro(zv->zv_zso->zvo_disk, flags);
 }
 
 void
 zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity)
 {
 
 	set_capacity(zv->zv_zso->zvo_disk, capacity);
 }
 
 int
 zvol_init(void)
 {
 	int error;
 
 	/*
 	 * zvol_threads is the module param the user passes in.
 	 *
 	 * zvol_actual_threads is what we use internally, since the user can
 	 * pass zvol_thread = 0 to mean "use all the CPUs" (the default).
 	 */
 	static unsigned int zvol_actual_threads;
 
 	if (zvol_threads == 0) {
 		/*
 		 * See dde9380a1 for why 32 was chosen here.  This should
 		 * probably be refined to be some multiple of the number
 		 * of CPUs.
 		 */
 		zvol_actual_threads = MAX(num_online_cpus(), 32);
 	} else {
 		zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
 	}
 
 	/*
 	 * Use atleast 32 zvol_threads but for many core system,
 	 * prefer 6 threads per taskq, but no more taskqs
 	 * than threads in them on large systems.
 	 *
 	 *                 taskq   total
 	 * cpus    taskqs  threads threads
 	 * ------- ------- ------- -------
 	 * 1       1       32       32
 	 * 2       1       32       32
 	 * 4       1       32       32
 	 * 8       2       16       32
 	 * 16      3       11       33
 	 * 32      5       7        35
 	 * 64      8       8        64
 	 * 128     11      12       132
 	 * 256     16      16       256
 	 */
 	zv_taskq_t *ztqs = &zvol_taskqs;
 	uint_t num_tqs = MIN(num_online_cpus(), zvol_num_taskqs);
 	if (num_tqs == 0) {
 		num_tqs = 1 + num_online_cpus() / 6;
 		while (num_tqs * num_tqs > zvol_actual_threads)
 			num_tqs--;
 	}
 	uint_t per_tq_thread = zvol_actual_threads / num_tqs;
 	if (per_tq_thread * num_tqs < zvol_actual_threads)
 		per_tq_thread++;
 	ztqs->tqs_cnt = num_tqs;
 	ztqs->tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *), KM_SLEEP);
 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
 		kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt * sizeof (taskq_t *));
 		ztqs->tqs_taskq = NULL;
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
 		return (error);
 	}
 
 #ifdef HAVE_BLK_MQ
 	if (zvol_blk_mq_queue_depth == 0) {
 		zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
 	} else {
 		zvol_actual_blk_mq_queue_depth =
 		    MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
 	}
 
 	if (zvol_blk_mq_threads == 0) {
 		zvol_blk_mq_actual_threads = num_online_cpus();
 	} else {
 		zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1),
 		    1024);
 	}
 #endif
 	for (uint_t i = 0; i < num_tqs; i++) {
 		char name[32];
 		(void) snprintf(name, sizeof (name), "%s_tq-%u",
 		    ZVOL_DRIVER, i);
 		ztqs->tqs_taskq[i] = taskq_create(name, per_tq_thread,
 		    maxclsyspri, per_tq_thread, INT_MAX,
 		    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 		if (ztqs->tqs_taskq[i] == NULL) {
 			for (int j = i - 1; j >= 0; j--)
 				taskq_destroy(ztqs->tqs_taskq[j]);
 			unregister_blkdev(zvol_major, ZVOL_DRIVER);
 			kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
 			    sizeof (taskq_t *));
 			ztqs->tqs_taskq = NULL;
 			return (-ENOMEM);
 		}
 	}
 
 	zvol_init_impl();
 	ida_init(&zvol_ida);
 	return (0);
 }
 
 void
 zvol_fini(void)
 {
 	zv_taskq_t *ztqs = &zvol_taskqs;
 	zvol_fini_impl();
 	unregister_blkdev(zvol_major, ZVOL_DRIVER);
 
 	if (ztqs->tqs_taskq == NULL) {
 		ASSERT3U(ztqs->tqs_cnt, ==, 0);
 	} else {
 		for (uint_t i = 0; i < ztqs->tqs_cnt; i++) {
 			ASSERT3P(ztqs->tqs_taskq[i], !=, NULL);
 			taskq_destroy(ztqs->tqs_taskq[i]);
 		}
 		kmem_free(ztqs->tqs_taskq, ztqs->tqs_cnt *
 		    sizeof (taskq_t *));
 		ztqs->tqs_taskq = NULL;
 	}
 
 	ida_destroy(&zvol_ida);
 }
 
 /* BEGIN CSTYLED */
 module_param(zvol_inhibit_dev, uint, 0644);
 MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
 
 module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
 
 module_param(zvol_threads, uint, 0444);
 MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
     "to 0 to use all active CPUs");
 
 module_param(zvol_request_sync, uint, 0644);
 MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
 
 module_param(zvol_max_discard_blocks, ulong, 0444);
 MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
 
 module_param(zvol_num_taskqs, uint, 0444);
 MODULE_PARM_DESC(zvol_num_taskqs, "Number of zvol taskqs");
 
 module_param(zvol_prefetch_bytes, uint, 0644);
 MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
 
 module_param(zvol_volmode, uint, 0644);
 MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
 
 #ifdef HAVE_BLK_MQ
 module_param(zvol_blk_mq_queue_depth, uint, 0644);
 MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
 
 module_param(zvol_use_blk_mq, uint, 0644);
 MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
 
 module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
 MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
     "Process volblocksize blocks per thread");
 #endif
 
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
 module_param(zvol_open_timeout_ms, uint, 0644);
 MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
 #endif
 
 /* END CSTYLED */
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
index e204f43b3bcd..795e71b26b5a 100644
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/alloc_class/alloc_class.kshlib
@@ -1,68 +1,68 @@
 #
 # This file and its contents are supplied under the terms of the
 # Common Development and Distribution License ("CDDL"), version 1.0.
 # You may only use this file in accordance with the terms of version
 # 1.0 of the CDDL.
 #
 # A full copy of the text of the CDDL should have accompanied this
 # source.  A copy of the CDDL is also available via the Internet at
 # http://www.illumos.org/license/CDDL.
 #
 
 #
 # Copyright (c) 2017, Intel Corporation.
 # Copyright (c) 2018 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/alloc_class/alloc_class.cfg
 
 function disk_setup
 {
 	truncate -s $ZPOOL_DEVSIZE $ZPOOL_DISKS
 	truncate -s $CLASS_DEVSIZE $CLASS_DISKS
 }
 
 function disk_cleanup
 {
 	rm -f $ZPOOL_DEVSIZE $ZPOOL_DISKS 2> /dev/null
 	rm -f $CLASS_DEVSIZE $CLASS_DISKS 2> /dev/null
 }
 
 function cleanup
 {
 	if datasetexists $TESTPOOL ; then
 		zpool destroy -f $TESTPOOL 2> /dev/null
 	fi
 
 	disk_cleanup
 }
 
 #
 # Try zpool status/iostat for given pool
 #
 # $1 pool
 #
 function display_status
 {
 	typeset pool=$1
 
 	typeset -i ret=0
 	zpool status -xv $pool > /dev/null 2>&1
 	ret=$?
 
 	zpool iostat > /dev/null 2>&1
 	((ret |= $?))
 
 	typeset mntpnt=$(get_prop mountpoint $pool)
-	dd if=/dev/random of=$mntpnt/testfile.$$ &
+	dd if=/dev/urandom of=$mntpnt/testfile.$$ &
 	typeset pid=$!
 
 	zpool iostat -v 1 3 > /dev/null
 	((ret |= $?))
 
 	kill -9 $pid
 	wait $pid 2> /dev/null
 
 	return $ret
 }
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh
index a8a64e52491a..3632fc9a4df0 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_rlimit_fsize.ksh
@@ -1,64 +1,64 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or https://opensource.org/licenses/CDDL-1.0.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 . $STF_SUITE/include/libtest.shlib
 . $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib
 
 #
 # DESCRIPTION:
 #	When block cloning is used to implement copy_file_range(2), the
 #	RLIMIT_FSIZE limit must be respected.
 #
 # STRATEGY:
 #	1. Create a pool.
 #	2. ???
 #
 
 verify_runnable "global"
 
 VDIR=$TEST_BASE_DIR/disk-bclone
 VDEV="$VDIR/a"
 
 function cleanup
 {
 	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
 	rm -rf $VDIR
 }
 
 log_onexit cleanup
 
 log_assert "Test for RLIMIT_FSIZE handling with block cloning enabled"
 
 log_must rm -rf $VDIR
 log_must mkdir -p $VDIR
 log_must truncate -s 1G $VDEV
 
 log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV
 
-log_must dd if=/dev/random of=/$TESTPOOL/file1 bs=1 count=1000
+log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1 count=1000
 
 ulimit -f 2
 log_must clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file2 0 0 all
 ulimit -f 1
 log_mustnot clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file3 0 0 all
 
 log_pass "copy_file_range(2) respects RLIMIT_FSIZE"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh
index 041dadb1eadb..05f3ac708477 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fault/suspend_resume_single.ksh
@@ -1,102 +1,102 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or https://opensource.org/licenses/CDDL-1.0.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright (c) 2024, Klara Inc.
 #
 
 . $STF_SUITE/include/libtest.shlib
 
 set -x
 
 DATAFILE="$TMPDIR/datafile"
 
 function cleanup
 {
 	destroy_pool $TESTPOOL
 	unload_scsi_debug
 	rm -f $DATA_FILE
 }
 
 log_onexit cleanup
 
 log_assert "ensure single-disk pool resumes properly after suspend and clear"
 
 # create a file, and take a checksum, so we can compare later
-log_must dd if=/dev/random of=$DATAFILE bs=128K count=1
+log_must dd if=/dev/urandom of=$DATAFILE bs=128K count=1
 typeset sum1=$(cat $DATAFILE | md5sum)
 
 # make a debug device that we can "unplug"
 load_scsi_debug 100 1 1 1 '512b'
 sd=$(get_debug_device)
 
 # create a single-device pool
 log_must zpool create $TESTPOOL $sd
 log_must zpool sync
 
 # "pull" the disk
 log_must eval "echo offline > /sys/block/$sd/device/state"
 
 # copy data onto the pool. it'll appear to succeed, but only be in memory
 log_must cp $DATAFILE /$TESTPOOL/file
 
 # wait until sync starts, and the pool suspends
 log_note "waiting for pool to suspend"
 typeset -i tries=10
 until [[ $(cat /proc/spl/kstat/zfs/$TESTPOOL/state) == "SUSPENDED" ]] ; do
 	if ((tries-- == 0)); then
 		log_fail "pool didn't suspend"
 	fi
 	sleep 1
 done
 
 # return the disk
 log_must eval "echo running > /sys/block/$sd/device/state"
 
 # clear the error states, which should reopen the vdev, get the pool back
 # online, and replay the failed IO
 log_must zpool clear $TESTPOOL
 
 # wait a while for everything to sync out. if something is going to go wrong,
 # this is where it will happen
 log_note "giving pool time to settle and complete txg"
 sleep 7
 
 # if the pool suspended, then everything is bad
 if [[ $(cat /proc/spl/kstat/zfs/$TESTPOOL/state) == "SUSPENDED" ]] ; then
 	log_fail "pool suspended"
 fi
 
 # export the pool, to make sure it exports clean, and also to clear the file
 # out of the cache
 log_must zpool export $TESTPOOL
 
 # import the pool
 log_must zpool import $TESTPOOL
 
 # sum the file we wrote earlier
 typeset sum2=$(cat /$TESTPOOL/file | md5sum)
 
 # make sure the checksums match
 log_must test "$sum1" = "$sum2"
 
 log_pass "single-disk pool resumes properly after disk suspend and clear"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_001_pos.ksh
index 2ad37e06a5f1..2c365e37af23 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/upgrade/upgrade_projectquota_001_pos.ksh
@@ -1,128 +1,143 @@
 #!/bin/ksh -p
 #
 # CDDL HEADER START
 #
 # The contents of this file are subject to the terms of the
 # Common Development and Distribution License (the "License").
 # You may not use this file except in compliance with the License.
 #
 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 # or https://opensource.org/licenses/CDDL-1.0.
 # See the License for the specific language governing permissions
 # and limitations under the License.
 #
 # When distributing Covered Code, include this CDDL HEADER in each
 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 # If applicable, add the following below this CDDL HEADER, with the
 # fields enclosed by brackets "[]" replaced with your own identifying
 # information: Portions Copyright [yyyy] [name of copyright owner]
 #
 # CDDL HEADER END
 #
 
 #
 # Copyright (c) 2017 by Fan Yong. All rights reserved.
 #
 
 . $STF_SUITE/tests/functional/upgrade/upgrade_common.kshlib
 
 #
 # DESCRIPTION:
 #
 # Check whether zfs upgrade for project quota works or not.
 # The project quota is per dataset based feature, this test
 # will create multiple datasets and try different upgrade methods.
 #
 # STRATEGY:
 # 1. Create a pool with all features disabled
 # 2. Create a few dataset for testing
 # 3. Make sure automatic upgrade work
 # 4. Make sure manual upgrade work
 #
 
 verify_runnable "global"
 
 if ! lsattr -pd > /dev/null 2>&1; then
 	log_unsupported "Current lsattr does not support set/show project ID"
 fi
 
 log_assert "pool upgrade for projectquota should work"
 log_onexit cleanup_upgrade
 
 log_must zpool create -d -m $TESTDIR $TESTPOOL $TMPDEV
 
 log_must mkfiles $TESTDIR/tf $((RANDOM % 100 + 1))
 log_must zfs create $TESTPOOL/fs1
 log_must mkfiles $TESTDIR/fs1/tf $((RANDOM % 100 + 1))
 log_must zfs umount $TESTPOOL/fs1
 
 log_must zfs create $TESTPOOL/fs2
 log_must mkdir $TESTDIR/fs2/dir
 log_must mkfiles $TESTDIR/fs2/tf $((RANDOM % 100 + 1))
 
 log_must zfs create $TESTPOOL/fs3
 log_must mkdir $TESTDIR/fs3/dir
 log_must mkfiles $TESTDIR/fs3/tf $((RANDOM % 100 + 1))
+log_must set_xattr_stdin passwd $TESTDIR/fs3/dir < /etc/passwd
 
 # Make sure project quota is disabled
 zfs projectspace -o used $TESTPOOL | grep -q "USED" &&
 	log_fail "project quota should be disabled initially"
 
 # set projectquota before upgrade will fail
 log_mustnot zfs set projectquota@100=100m $TESTDIR/fs3
 
 # set projectobjquota before upgrade will fail
 log_mustnot zfs set projectobjquota@100=1000 $TESTDIR/fs3
 
 # 'chattr -p' should fail before upgrade
 log_mustnot chattr -p 100 $TESTDIR/fs3/dir
 
 # 'chattr +P' should fail before upgrade
 log_mustnot chattr +P $TESTDIR/fs3/dir
 
 # Upgrade zpool to support all features
 log_must zpool upgrade $TESTPOOL
 
 # Double check project quota is disabled
 zfs projectspace -o used $TESTPOOL | grep -q "USED" &&
 	log_fail "project quota should be disabled after pool upgrade"
 
 # Mount dataset should trigger upgrade
 log_must zfs mount $TESTPOOL/fs1
 log_must sleep 3 # upgrade done in the background so let's wait for a while
 zfs projectspace -o used $TESTPOOL/fs1 | grep -q "USED" ||
 	log_fail "project quota should be enabled for $TESTPOOL/fs1"
 
 # Create file should trigger dataset upgrade
 log_must mkfile 1m $TESTDIR/fs2/dir/tf
 log_must sleep 3 # upgrade done in the background so let's wait for a while
 zfs projectspace -o used $TESTPOOL/fs2 | grep -q "USED" ||
 	log_fail "project quota should be enabled for $TESTPOOL/fs2"
 
 # "lsattr -p" should NOT trigger upgrade
 log_must lsattr -p -d $TESTDIR/fs3/dir
 zfs projectspace -o used $TESTPOOL/fs3 | grep -q "USED" &&
 	log_fail "project quota should not active for $TESTPOOL/fs3"
 
 # 'chattr -p' should trigger dataset upgrade
 log_must chattr -p 100 $TESTDIR/fs3/dir
 log_must sleep 5 # upgrade done in the background so let's wait for a while
 zfs projectspace -o used $TESTPOOL/fs3 | grep -q "USED" ||
 	log_fail "project quota should be enabled for $TESTPOOL/fs3"
+dirino=$(stat -c '%i' $TESTDIR/fs3/dir)
+log_must zdb -ddddd $TESTPOOL/fs3 $dirino
+xattrdirino=$(zdb -ddddd $TESTPOOL/fs3 $dirino |grep -w "xattr" |awk '{print $2}')
+echo "xattrdirino: $xattrdirino"
+expectedcnt=1
+echo "expectedcnt: $expectedcnt"
+if [ "$xattrdirino" != "" ]; then
+	expectedcnt=$(($expectedcnt + 1))
+	echo "expectedcnt: $expectedcnt"
+	log_must zdb -ddddd $TESTPOOL/fs3 $xattrdirino
+	xattrinocnt=$(zdb -ddddd $TESTPOOL/fs3 $xattrdirino |grep -w "(type:" |wc -l)
+	echo "xattrinocnt: $xattrinocnt"
+	expectedcnt=$(($expectedcnt + $xattrinocnt))
+	echo "expectedcnt: $expectedcnt"
+fi
 cnt=$(get_prop projectobjused@100 $TESTPOOL/fs3)
-# if 'xattr=on', then 'cnt = 2'
-[[ $cnt -ne 1 ]] && [[ $cnt -ne 2 ]] &&
+[[ $cnt -ne $expectedcnt ]] &&
 	log_fail "projectquota accounting failed $cnt"
 
 # All in all, after having been through this, the dataset for testpool
 # still shouldn't be upgraded
 zfs projectspace -o used $TESTPOOL | grep -q "USED" &&
 	log_fail "project quota should be disabled for $TESTPOOL"
 
 # Manual upgrade root dataset
 # uses an ioctl which will wait for the upgrade to be done before returning
 log_must zfs set version=current $TESTPOOL
 zfs projectspace -o used $TESTPOOL | grep -q "USED" ||
 	log_fail "project quota should be enabled for $TESTPOOL"
 
 log_pass "Project Quota upgrade done"
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index 631c83fe8bab..2508de5421df 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1,1239 +1,1254 @@
 /*
  */
 
 /* zfs_config.h.  Generated from zfs_config.h.in by configure.  */
 /* zfs_config.h.in.  Generated from configure.ac by autoheader.  */
 
 /* Define to 1 if translation of program messages to the user's native
    language is requested. */
 /* #undef ENABLE_NLS */
 
 /* bio_end_io_t wants 1 arg */
 /* #undef HAVE_1ARG_BIO_END_IO_T */
 
 /* lookup_bdev() wants 1 arg */
 /* #undef HAVE_1ARG_LOOKUP_BDEV */
 
 /* submit_bio() wants 1 arg */
 /* #undef HAVE_1ARG_SUBMIT_BIO */
 
 /* bdi_setup_and_register() wants 2 args */
 /* #undef HAVE_2ARGS_BDI_SETUP_AND_REGISTER */
 
 /* vfs_getattr wants 2 args */
 /* #undef HAVE_2ARGS_VFS_GETATTR */
 
 /* zlib_deflate_workspacesize() wants 2 args */
 /* #undef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */
 
 /* bdi_setup_and_register() wants 3 args */
 /* #undef HAVE_3ARGS_BDI_SETUP_AND_REGISTER */
 
 /* vfs_getattr wants 3 args */
 /* #undef HAVE_3ARGS_VFS_GETATTR */
 
 /* vfs_getattr wants 4 args */
 /* #undef HAVE_4ARGS_VFS_GETATTR */
 
 /* kernel has access_ok with 'type' parameter */
 /* #undef HAVE_ACCESS_OK_TYPE */
 
 /* posix_acl has refcount_t */
 /* #undef HAVE_ACL_REFCOUNT */
 
 /* add_disk() returns int */
 /* #undef HAVE_ADD_DISK_RET */
 
 /* Define if host toolchain supports AES */
 #define HAVE_AES 1
 
 /* Define if you have [rt] */
 #define HAVE_AIO_H 1
 
 #ifdef __amd64__
 #ifndef RESCUE
 /* Define if host toolchain supports AVX */
 #define HAVE_AVX 1
 #endif
 
 /* Define if host toolchain supports AVX2 */
 #define HAVE_AVX2 1
 
 /* Define if host toolchain supports AVX512BW */
 #define HAVE_AVX512BW 1
 
 /* Define if host toolchain supports AVX512CD */
 #define HAVE_AVX512CD 1
 
 /* Define if host toolchain supports AVX512DQ */
 #define HAVE_AVX512DQ 1
 
 /* Define if host toolchain supports AVX512ER */
 #define HAVE_AVX512ER 1
 
 /* Define if host toolchain supports AVX512F */
 #define HAVE_AVX512F 1
 
 /* Define if host toolchain supports AVX512IFMA */
 #define HAVE_AVX512IFMA 1
 
 /* Define if host toolchain supports AVX512PF */
 #define HAVE_AVX512PF 1
 
 /* Define if host toolchain supports AVX512VBMI */
 #define HAVE_AVX512VBMI 1
 
 /* Define if host toolchain supports AVX512VL */
 #define HAVE_AVX512VL 1
 #endif
 
 /* backtrace() is available */
 /* #undef HAVE_BACKTRACE */
 
 /* bdevname() is available */
 /* #undef HAVE_BDEVNAME */
 
 /* bdev_check_media_change() exists */
 /* #undef HAVE_BDEV_CHECK_MEDIA_CHANGE */
 
 /* bdev_file_open_by_path() exists */
 /* #undef HAVE_BDEV_FILE_OPEN_BY_PATH */
 
 /* bdev_*_io_acct() available */
 /* #undef HAVE_BDEV_IO_ACCT_63 */
 
 /* bdev_*_io_acct() available */
 /* #undef HAVE_BDEV_IO_ACCT_OLD */
 
 /* bdev_kobj() exists */
 /* #undef HAVE_BDEV_KOBJ */
 
 /* bdev_max_discard_sectors() is available */
 /* #undef HAVE_BDEV_MAX_DISCARD_SECTORS */
 
 /* bdev_max_secure_erase_sectors() is available */
 /* #undef HAVE_BDEV_MAX_SECURE_ERASE_SECTORS */
 
 /* bdev_nr_bytes() is available */
 /* #undef HAVE_BDEV_NR_BYTES */
 
 /* bdev_open_by_path() exists */
 /* #undef HAVE_BDEV_OPEN_BY_PATH */
 
 /* bdev_release() exists */
 /* #undef HAVE_BDEV_RELEASE */
 
 /* block_device_operations->submit_bio() returns void */
 /* #undef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID */
 
 /* bdev_whole() is available */
 /* #undef HAVE_BDEV_WHOLE */
 
 /* bio_alloc() takes 4 arguments */
 /* #undef HAVE_BIO_ALLOC_4ARG */
 
 /* bio->bi_bdev->bd_disk exists */
 /* #undef HAVE_BIO_BDEV_DISK */
 
 /* bio->bi_opf is defined */
 /* #undef HAVE_BIO_BI_OPF */
 
 /* bio->bi_status exists */
 /* #undef HAVE_BIO_BI_STATUS */
 
 /* bio has bi_iter */
 /* #undef HAVE_BIO_BVEC_ITER */
 
 /* bio_*_io_acct() available */
 /* #undef HAVE_BIO_IO_ACCT */
 
 /* bio_max_segs() is implemented */
 /* #undef HAVE_BIO_MAX_SEGS */
 
 /* bio_set_dev() is available */
 /* #undef HAVE_BIO_SET_DEV */
 
 /* bio_set_dev() GPL-only */
 /* #undef HAVE_BIO_SET_DEV_GPL_ONLY */
 
 /* bio_set_dev() is a macro */
 /* #undef HAVE_BIO_SET_DEV_MACRO */
 
 /* bio_set_op_attrs is available */
 /* #undef HAVE_BIO_SET_OP_ATTRS */
 
 /* blkdev_get_by_path() exists and takes 4 args */
 /* #undef HAVE_BLKDEV_GET_BY_PATH_4ARG */
 
 /* blkdev_get_by_path() handles ERESTARTSYS */
 /* #undef HAVE_BLKDEV_GET_ERESTARTSYS */
 
 /* __blkdev_issue_discard(flags) is available */
 /* #undef HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_FLAGS */
 
 /* __blkdev_issue_discard() is available */
 /* #undef HAVE_BLKDEV_ISSUE_DISCARD_ASYNC_NOFLAGS */
 
 /* blkdev_issue_discard(flags) is available */
 /* #undef HAVE_BLKDEV_ISSUE_DISCARD_FLAGS */
 
 /* blkdev_issue_discard() is available */
 /* #undef HAVE_BLKDEV_ISSUE_DISCARD_NOFLAGS */
 
 /* blkdev_issue_secure_erase() is available */
 /* #undef HAVE_BLKDEV_ISSUE_SECURE_ERASE */
 
 /* blkdev_put() exists */
 /* #undef HAVE_BLKDEV_PUT */
 
 /* blkdev_put() accepts void* as arg 2 */
 /* #undef HAVE_BLKDEV_PUT_HOLDER */
 
+/* struct queue_limits has a features field */
+/* #undef HAVE_BLKDEV_QUEUE_LIMITS_FEATURES */
+
 /* blkdev_reread_part() exists */
 /* #undef HAVE_BLKDEV_REREAD_PART */
 
 /* blkg_tryget() is available */
 /* #undef HAVE_BLKG_TRYGET */
 
 /* blkg_tryget() GPL-only */
 /* #undef HAVE_BLKG_TRYGET_GPL_ONLY */
 
 /* blk_alloc_disk() exists */
 /* #undef HAVE_BLK_ALLOC_DISK */
 
 /* blk_alloc_disk() exists and takes 2 args */
 /* #undef HAVE_BLK_ALLOC_DISK_2ARG */
 
 /* blk_alloc_queue() expects request function */
 /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN */
 
 /* blk_alloc_queue_rh() expects request function */
 /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH */
 
 /* blk_cleanup_disk() exists */
 /* #undef HAVE_BLK_CLEANUP_DISK */
 
 /* blk_mode_t is defined */
 /* #undef HAVE_BLK_MODE_T */
 
 /* block multiqueue is available */
 /* #undef HAVE_BLK_MQ */
 
 /* block multiqueue hardware context is cached in struct request */
 /* #undef HAVE_BLK_MQ_RQ_HCTX */
 
 /* blk queue backing_dev_info is dynamic */
 /* #undef HAVE_BLK_QUEUE_BDI_DYNAMIC */
 
 /* blk_queue_discard() is available */
 /* #undef HAVE_BLK_QUEUE_DISCARD */
 
+/* backing_dev_info is available through queue gendisk */
+/* #undef HAVE_BLK_QUEUE_DISK_BDI */
+
 /* blk_queue_flag_clear() exists */
 /* #undef HAVE_BLK_QUEUE_FLAG_CLEAR */
 
 /* blk_queue_flag_set() exists */
 /* #undef HAVE_BLK_QUEUE_FLAG_SET */
 
 /* blk_queue_flush() is available */
 /* #undef HAVE_BLK_QUEUE_FLUSH */
 
 /* blk_queue_flush() is GPL-only */
 /* #undef HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
 
 /* blk_queue_secdiscard() is available */
 /* #undef HAVE_BLK_QUEUE_SECDISCARD */
 
 /* blk_queue_secure_erase() is available */
 /* #undef HAVE_BLK_QUEUE_SECURE_ERASE */
 
 /* blk_queue_update_readahead() exists */
 /* #undef HAVE_BLK_QUEUE_UPDATE_READAHEAD */
 
 /* blk_queue_write_cache() exists */
 /* #undef HAVE_BLK_QUEUE_WRITE_CACHE */
 
 /* blk_queue_write_cache() is GPL-only */
 /* #undef HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY */
 
 /* BLK_STS_RESV_CONFLICT is defined */
 /* #undef HAVE_BLK_STS_RESV_CONFLICT */
 
 /* Define if release() in block_device_operations takes 1 arg */
 /* #undef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_1ARG */
 
 /* Define if revalidate_disk() in block_device_operations */
 /* #undef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK */
 
 /* Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the
    CoreFoundation framework. */
 /* #undef HAVE_CFLOCALECOPYCURRENT */
 
 /* Define to 1 if you have the Mac OS X function
    CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
 /* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */
 
 /* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
    the CoreFoundation framework. */
 /* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */
 
 /* check_disk_change() exists */
 /* #undef HAVE_CHECK_DISK_CHANGE */
 
 /* clear_inode() is available */
 /* #undef HAVE_CLEAR_INODE */
 
 /* dentry uses const struct dentry_operations */
 /* #undef HAVE_CONST_DENTRY_OPERATIONS */
 
 /* copy_from_iter() is available */
 /* #undef HAVE_COPY_FROM_ITER */
 
 /* copy_splice_read exists */
 /* #undef HAVE_COPY_SPLICE_READ */
 
 /* copy_to_iter() is available */
 /* #undef HAVE_COPY_TO_ITER */
 
 /* cpu_has_feature() is GPL-only */
 /* #undef HAVE_CPU_HAS_FEATURE_GPL_ONLY */
 
 /* yes */
 /* #undef HAVE_CPU_HOTPLUG */
 
 /* current_time() exists */
 /* #undef HAVE_CURRENT_TIME */
 
 /* Define if the GNU dcgettext() function is already present or preinstalled.
    */
 /* #undef HAVE_DCGETTEXT */
 
 /* DECLARE_EVENT_CLASS() is available */
 /* #undef HAVE_DECLARE_EVENT_CLASS */
 
 /* dentry aliases are in d_u member */
 /* #undef HAVE_DENTRY_D_U_ALIASES */
 
 /* dequeue_signal() takes 4 arguments */
 /* #undef HAVE_DEQUEUE_SIGNAL_4ARG */
 
 /* lookup_bdev() wants dev_t arg */
 /* #undef HAVE_DEVT_LOOKUP_BDEV */
 
 /* sops->dirty_inode() wants flags */
 /* #undef HAVE_DIRTY_INODE_WITH_FLAGS */
 
 /* disk_check_media_change() exists */
 /* #undef HAVE_DISK_CHECK_MEDIA_CHANGE */
 
 /* disk_*_io_acct() available */
 /* #undef HAVE_DISK_IO_ACCT */
 
 /* disk_update_readahead() exists */
 /* #undef HAVE_DISK_UPDATE_READAHEAD */
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #define HAVE_DLFCN_H 1
 
 /* d_make_root() is available */
 /* #undef HAVE_D_MAKE_ROOT */
 
 /* d_prune_aliases() is available */
 /* #undef HAVE_D_PRUNE_ALIASES */
 
 /* dops->d_revalidate() operation takes nameidata */
 /* #undef HAVE_D_REVALIDATE_NAMEIDATA */
 
 /* eops->encode_fh() wants child and parent inodes */
 /* #undef HAVE_ENCODE_FH_WITH_INODE */
 
 /* sops->evict_inode() exists */
 /* #undef HAVE_EVICT_INODE */
 
 /* Define to 1 if you have the 'execvpe' function. */
 #define HAVE_EXECVPE 1
 
 /* FALLOC_FL_ZERO_RANGE is defined */
 /* #undef HAVE_FALLOC_FL_ZERO_RANGE */
 
 /* fault_in_iov_iter_readable() is available */
 /* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */
 
 /* filemap_range_has_page() is available */
 /* #undef HAVE_FILEMAP_RANGE_HAS_PAGE */
 
 /* fops->aio_fsync() exists */
 /* #undef HAVE_FILE_AIO_FSYNC */
 
 /* file_dentry() is available */
 /* #undef HAVE_FILE_DENTRY */
 
 /* fops->fadvise() exists */
 /* #undef HAVE_FILE_FADVISE */
 
 /* file_inode() is available */
 /* #undef HAVE_FILE_INODE */
 
 /* flush_dcache_page() is GPL-only */
 /* #undef HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY */
 
 /* iops->follow_link() cookie */
 /* #undef HAVE_FOLLOW_LINK_COOKIE */
 
 /* iops->follow_link() nameidata */
 /* #undef HAVE_FOLLOW_LINK_NAMEIDATA */
 
 /* Define if compiler supports -Wformat-overflow */
 /* #undef HAVE_FORMAT_OVERFLOW */
 
 /* fsync_bdev() is declared in include/blkdev.h */
 /* #undef HAVE_FSYNC_BDEV */
 
 /* fops->fsync() with range */
 /* #undef HAVE_FSYNC_RANGE */
 
 /* fops->fsync() without dentry */
 /* #undef HAVE_FSYNC_WITHOUT_DENTRY */
 
 /* yes */
 /* #undef HAVE_GENERIC_FADVISE */
 
 /* generic_fillattr requires struct mnt_idmap* */
 /* #undef HAVE_GENERIC_FILLATTR_IDMAP */
 
 /* generic_fillattr requires struct mnt_idmap* and u32 request_mask */
 /* #undef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK */
 
 /* generic_fillattr requires struct user_namespace* */
 /* #undef HAVE_GENERIC_FILLATTR_USERNS */
 
 /* generic_*_io_acct() 3 arg available */
 /* #undef HAVE_GENERIC_IO_ACCT_3ARG */
 
 /* generic_*_io_acct() 4 arg available */
 /* #undef HAVE_GENERIC_IO_ACCT_4ARG */
 
 /* generic_readlink is global */
 /* #undef HAVE_GENERIC_READLINK */
 
 /* generic_setxattr() exists */
 /* #undef HAVE_GENERIC_SETXATTR */
 
 /* generic_write_checks() takes kiocb */
 /* #undef HAVE_GENERIC_WRITE_CHECKS_KIOCB */
 
 /* Define if the GNU gettext() function is already present or preinstalled. */
 /* #undef HAVE_GETTEXT */
 
 /* Define to 1 if you have the 'gettid' function. */
 /* #undef HAVE_GETTID */
 
 /* iops->get_acl() exists */
 /* #undef HAVE_GET_ACL */
 
 /* iops->get_acl() takes rcu */
 /* #undef HAVE_GET_ACL_RCU */
 
 /* has iops->get_inode_acl() */
 /* #undef HAVE_GET_INODE_ACL */
 
 /* iops->get_link() cookie */
 /* #undef HAVE_GET_LINK_COOKIE */
 
 /* iops->get_link() delayed */
 /* #undef HAVE_GET_LINK_DELAYED */
 
 /* group_info->gid exists */
 /* #undef HAVE_GROUP_INFO_GID */
 
 /* has_capability() is available */
 /* #undef HAVE_HAS_CAPABILITY */
 
 /* iattr->ia_vfsuid and iattr->ia_vfsgid exist */
 /* #undef HAVE_IATTR_VFSID */
 
 /* Define if you have the iconv() function and it works. */
 #define HAVE_ICONV 1
 
 /* iops->getattr() takes struct mnt_idmap* */
 /* #undef HAVE_IDMAP_IOPS_GETATTR */
 
 /* iops->setattr() takes struct mnt_idmap* */
 /* #undef HAVE_IDMAP_IOPS_SETATTR */
 
 /* APIs for idmapped mount are present */
 /* #undef HAVE_IDMAP_MNT_API */
 
 /* mnt_idmap does not have user_namespace */
 /* #undef HAVE_IDMAP_NO_USERNS */
 
 /* Define if compiler supports -Wimplicit-fallthrough */
 /* #undef HAVE_IMPLICIT_FALLTHROUGH */
 
 /* Define if compiler supports -Winfinite-recursion */
 /* #undef HAVE_INFINITE_RECURSION */
 
 /* inode_get_atime() exists in linux/fs.h */
 /* #undef HAVE_INODE_GET_ATIME */
 
 /* inode_get_ctime() exists in linux/fs.h */
 /* #undef HAVE_INODE_GET_CTIME */
 
 /* inode_get_mtime() exists in linux/fs.h */
 /* #undef HAVE_INODE_GET_MTIME */
 
 /* yes */
 /* #undef HAVE_INODE_LOCK_SHARED */
 
 /* inode_owner_or_capable() exists */
 /* #undef HAVE_INODE_OWNER_OR_CAPABLE */
 
 /* inode_owner_or_capable() takes mnt_idmap */
 /* #undef HAVE_INODE_OWNER_OR_CAPABLE_IDMAP */
 
 /* inode_owner_or_capable() takes user_ns */
 /* #undef HAVE_INODE_OWNER_OR_CAPABLE_USERNS */
 
 /* inode_set_atime_to_ts() exists in linux/fs.h */
 /* #undef HAVE_INODE_SET_ATIME_TO_TS */
 
 /* inode_set_ctime_to_ts() exists in linux/fs.h */
 /* #undef HAVE_INODE_SET_CTIME_TO_TS */
 
 /* inode_set_flags() exists */
 /* #undef HAVE_INODE_SET_FLAGS */
 
 /* inode_set_iversion() exists */
 /* #undef HAVE_INODE_SET_IVERSION */
 
 /* inode_set_mtime_to_ts() exists in linux/fs.h */
 /* #undef HAVE_INODE_SET_MTIME_TO_TS */
 
 /* inode->i_*time's are timespec64 */
 /* #undef HAVE_INODE_TIMESPEC64_TIMES */
 
 /* timestamp_truncate() exists */
 /* #undef HAVE_INODE_TIMESTAMP_TRUNCATE */
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 
 /* in_compat_syscall() is available */
 /* #undef HAVE_IN_COMPAT_SYSCALL */
 
 /* iops->create() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_CREATE_IDMAP */
 
 /* iops->create() takes struct user_namespace* */
 /* #undef HAVE_IOPS_CREATE_USERNS */
 
 /* iops->mkdir() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_MKDIR_IDMAP */
 
 /* iops->mkdir() takes struct user_namespace* */
 /* #undef HAVE_IOPS_MKDIR_USERNS */
 
 /* iops->mknod() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_MKNOD_IDMAP */
 
 /* iops->mknod() takes struct user_namespace* */
 /* #undef HAVE_IOPS_MKNOD_USERNS */
 
 /* iops->permission() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_PERMISSION_IDMAP */
 
 /* iops->permission() takes struct user_namespace* */
 /* #undef HAVE_IOPS_PERMISSION_USERNS */
 
 /* iops->rename() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_RENAME_IDMAP */
 
 /* iops->rename() takes struct user_namespace* */
 /* #undef HAVE_IOPS_RENAME_USERNS */
 
 /* iops->setattr() exists */
 /* #undef HAVE_IOPS_SETATTR */
 
 /* iops->symlink() takes struct mnt_idmap* */
 /* #undef HAVE_IOPS_SYMLINK_IDMAP */
 
 /* iops->symlink() takes struct user_namespace* */
 /* #undef HAVE_IOPS_SYMLINK_USERNS */
 
 /* iov_iter_advance() is available */
 /* #undef HAVE_IOV_ITER_ADVANCE */
 
 /* iov_iter_count() is available */
 /* #undef HAVE_IOV_ITER_COUNT */
 
 /* iov_iter_fault_in_readable() is available */
 /* #undef HAVE_IOV_ITER_FAULT_IN_READABLE */
 
 /* iov_iter_revert() is available */
 /* #undef HAVE_IOV_ITER_REVERT */
 
 /* iov_iter_type() is available */
 /* #undef HAVE_IOV_ITER_TYPE */
 
 /* iov_iter types are available */
 /* #undef HAVE_IOV_ITER_TYPES */
 
 /* yes */
 /* #undef HAVE_IO_SCHEDULE_TIMEOUT */
 
 /* Define to 1 if you have the 'issetugid' function. */
 #define HAVE_ISSETUGID 1
 
 /* iter_iov() is available */
 /* #undef HAVE_ITER_IOV */
 
 /* kernel has kernel_fpu_* functions */
 /* #undef HAVE_KERNEL_FPU */
 
 /* kernel has asm/fpu/api.h */
 /* #undef HAVE_KERNEL_FPU_API_HEADER */
 
 /* kernel fpu internal */
 /* #undef HAVE_KERNEL_FPU_INTERNAL */
 
 /* kernel has asm/fpu/internal.h */
 /* #undef HAVE_KERNEL_FPU_INTERNAL_HEADER */
 
 /* uncached_acl_sentinel() exists */
 /* #undef HAVE_KERNEL_GET_ACL_HANDLE_CACHE */
 
 /* Define if compiler supports -Winfinite-recursion */
 /* #undef HAVE_KERNEL_INFINITE_RECURSION */
 
 /* kernel defines intptr_t */
 /* #undef HAVE_KERNEL_INTPTR_T */
 
 /* kernel has kernel_neon_* functions */
 /* #undef HAVE_KERNEL_NEON */
 
 /* kernel does stack verification */
 /* #undef HAVE_KERNEL_OBJTOOL */
 
 /* kernel has linux/objtool.h */
 /* #undef HAVE_KERNEL_OBJTOOL_HEADER */
 
 /* kernel_read() take loff_t pointer */
 /* #undef HAVE_KERNEL_READ_PPOS */
 
 /* strlcpy() exists */
 /* #undef HAVE_KERNEL_STRLCPY */
 
 /* strscpy() exists */
 /* #undef HAVE_KERNEL_STRSCPY */
 
 /* timer_list.function gets a timer_list */
 /* #undef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST */
 
 /* struct timer_list has a flags member */
 /* #undef HAVE_KERNEL_TIMER_LIST_FLAGS */
 
 /* timer_setup() is available */
 /* #undef HAVE_KERNEL_TIMER_SETUP */
 
 /* kernel_write() take loff_t pointer */
 /* #undef HAVE_KERNEL_WRITE_PPOS */
 
 /* kernel has kmap_local_page */
 /* #undef HAVE_KMAP_LOCAL_PAGE */
 
 /* kmem_cache_create_usercopy() exists */
 /* #undef HAVE_KMEM_CACHE_CREATE_USERCOPY */
 
 /* kstrtoul() exists */
 /* #undef HAVE_KSTRTOUL */
 
 /* ktime_get_coarse_real_ts64() exists */
 /* #undef HAVE_KTIME_GET_COARSE_REAL_TS64 */
 
 /* ktime_get_raw_ts64() exists */
 /* #undef HAVE_KTIME_GET_RAW_TS64 */
 
 /* kvmalloc exists */
 /* #undef HAVE_KVMALLOC */
 
 /* Define if you have [aio] */
 /* #undef HAVE_LIBAIO */
 
 /* Define if you have [blkid] */
 /* #undef HAVE_LIBBLKID */
 
 /* Define if you have [crypto] */
 #define HAVE_LIBCRYPTO 1
 
 /* Define if you have [tirpc] */
 /* #undef HAVE_LIBTIRPC */
 
 /* Define if you have [udev] */
 /* #undef HAVE_LIBUDEV */
 
 /* Define if you have [unwind] */
 /* #undef HAVE_LIBUNWIND */
 
 /* libunwind has unw_get_elf_filename */
 /* #undef HAVE_LIBUNWIND_ELF */
 
 /* Define if you have [uuid] */
 /* #undef HAVE_LIBUUID */
 
 /* linux/blk-cgroup.h exists */
 /* #undef HAVE_LINUX_BLK_CGROUP_HEADER */
 
 /* lseek_execute() is available */
 /* #undef HAVE_LSEEK_EXECUTE */
 
 /* makedev() is declared in sys/mkdev.h */
 /* #undef HAVE_MAKEDEV_IN_MKDEV */
 
 /* makedev() is declared in sys/sysmacros.h */
 /* #undef HAVE_MAKEDEV_IN_SYSMACROS */
 
 /* Noting that make_request_fn() returns blk_qc_t */
 /* #undef HAVE_MAKE_REQUEST_FN_RET_QC */
 
 /* Noting that make_request_fn() returns void */
 /* #undef HAVE_MAKE_REQUEST_FN_RET_VOID */
 
 /* iops->mkdir() takes umode_t */
 /* #undef HAVE_MKDIR_UMODE_T */
 
 /* Define to 1 if you have the 'mlockall' function. */
 #define HAVE_MLOCKALL 1
 
+/* page_mapping() is available */
+/* #undef HAVE_MM_PAGE_MAPPING */
+
 /* page_size() is available */
 /* #undef HAVE_MM_PAGE_SIZE */
 
 /* lookup_bdev() wants mode arg */
 /* #undef HAVE_MODE_LOOKUP_BDEV */
 
 /* Define if host toolchain supports MOVBE */
 #define HAVE_MOVBE 1
 
 /* new_sync_read()/new_sync_write() are available */
 /* #undef HAVE_NEW_SYNC_READ */
 
 /* folio_wait_bit() exists */
 /* #undef HAVE_PAGEMAP_FOLIO_WAIT_BIT */
 
 /* part_to_dev() exists */
 /* #undef HAVE_PART_TO_DEV */
 
 /* iops->getattr() takes a path */
 /* #undef HAVE_PATH_IOPS_GETATTR */
 
 /* Define if host toolchain supports PCLMULQDQ */
 #define HAVE_PCLMULQDQ 1
 
 /* percpu_counter_add_batch() is defined */
 /* #undef HAVE_PERCPU_COUNTER_ADD_BATCH */
 
 /* percpu_counter_init() wants gfp_t */
 /* #undef HAVE_PERCPU_COUNTER_INIT_WITH_GFP */
 
 /* posix_acl_chmod() exists */
 /* #undef HAVE_POSIX_ACL_CHMOD */
 
 /* posix_acl_from_xattr() needs user_ns */
 /* #undef HAVE_POSIX_ACL_FROM_XATTR_USERNS */
 
 /* posix_acl_release() is available */
 /* #undef HAVE_POSIX_ACL_RELEASE */
 
 /* posix_acl_release() is GPL-only */
 /* #undef HAVE_POSIX_ACL_RELEASE_GPL_ONLY */
 
 /* posix_acl_valid() wants user namespace */
 /* #undef HAVE_POSIX_ACL_VALID_WITH_NS */
 
+/* proc_handler ctl_table arg is const */
+/* #undef HAVE_PROC_HANDLER_CTL_TABLE_CONST */
+
 /* proc_ops structure exists */
 /* #undef HAVE_PROC_OPS_STRUCT */
 
 /* iops->put_link() cookie */
 /* #undef HAVE_PUT_LINK_COOKIE */
 
 /* iops->put_link() delayed */
 /* #undef HAVE_PUT_LINK_DELAYED */
 
 /* iops->put_link() nameidata */
 /* #undef HAVE_PUT_LINK_NAMEIDATA */
 
 /* If available, contains the Python version number currently in use. */
 #define HAVE_PYTHON "3.7"
 
 /* qat is enabled and existed */
 /* #undef HAVE_QAT */
 
 /* struct reclaim_state has reclaimed */
 /* #undef HAVE_RECLAIM_STATE_RECLAIMED */
 
 /* register_shrinker is vararg */
 /* #undef HAVE_REGISTER_SHRINKER_VARARG */
 
+/* register_sysctl_sz exists */
+/* #undef HAVE_REGISTER_SYSCTL_SZ */
+
 /* register_sysctl_table exists */
 /* #undef HAVE_REGISTER_SYSCTL_TABLE */
 
 /* iops->rename2() exists */
 /* #undef HAVE_RENAME2 */
 
 /* struct inode_operations_wrapper takes .rename2() */
 /* #undef HAVE_RENAME2_OPERATIONS_WRAPPER */
 
 /* iops->rename() wants flags */
 /* #undef HAVE_RENAME_WANTS_FLAGS */
 
 /* REQ_DISCARD is defined */
 /* #undef HAVE_REQ_DISCARD */
 
 /* REQ_FLUSH is defined */
 /* #undef HAVE_REQ_FLUSH */
 
 /* REQ_OP_DISCARD is defined */
 /* #undef HAVE_REQ_OP_DISCARD */
 
 /* REQ_OP_FLUSH is defined */
 /* #undef HAVE_REQ_OP_FLUSH */
 
 /* REQ_OP_SECURE_ERASE is defined */
 /* #undef HAVE_REQ_OP_SECURE_ERASE */
 
 /* REQ_PREFLUSH is defined */
 /* #undef HAVE_REQ_PREFLUSH */
 
 /* revalidate_disk() is available */
 /* #undef HAVE_REVALIDATE_DISK */
 
 /* revalidate_disk_size() is available */
 /* #undef HAVE_REVALIDATE_DISK_SIZE */
 
 /* struct rw_semaphore has member activity */
 /* #undef HAVE_RWSEM_ACTIVITY */
 
 /* struct rw_semaphore has atomic_long_t member count */
 /* #undef HAVE_RWSEM_ATOMIC_LONG_COUNT */
 
 /* linux/sched/signal.h exists */
 /* #undef HAVE_SCHED_SIGNAL_HEADER */
 
 /* Define to 1 if you have the <security/pam_modules.h> header file. */
 #define HAVE_SECURITY_PAM_MODULES_H 1
 
 /* setattr_prepare() accepts mnt_idmap */
 /* #undef HAVE_SETATTR_PREPARE_IDMAP */
 
 /* setattr_prepare() is available, doesn't accept user_namespace */
 /* #undef HAVE_SETATTR_PREPARE_NO_USERNS */
 
 /* setattr_prepare() accepts user_namespace */
 /* #undef HAVE_SETATTR_PREPARE_USERNS */
 
 /* iops->set_acl() exists, takes 3 args */
 /* #undef HAVE_SET_ACL */
 
 /* iops->set_acl() takes 4 args, arg1 is struct mnt_idmap * */
 /* #undef HAVE_SET_ACL_IDMAP_DENTRY */
 
 /* iops->set_acl() takes 4 args */
 /* #undef HAVE_SET_ACL_USERNS */
 
 /* iops->set_acl() takes 4 args, arg2 is struct dentry * */
 /* #undef HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
 
 /* set_cached_acl() is usable */
 /* #undef HAVE_SET_CACHED_ACL_USABLE */
 
 /* set_special_state() exists */
 /* #undef HAVE_SET_SPECIAL_STATE */
 
 /* shrinker_register exists */
 /* #undef HAVE_SHRINKER_REGISTER */
 
 /* struct shrink_control exists */
 /* #undef HAVE_SHRINK_CONTROL_STRUCT */
 
 /* kernel_siginfo_t exists */
 /* #undef HAVE_SIGINFO */
 
 /* signal_stop() exists */
 /* #undef HAVE_SIGNAL_STOP */
 
 /* new shrinker callback wants 2 args */
 /* #undef HAVE_SINGLE_SHRINKER_CALLBACK */
 
 /* cs->count_objects exists */
 /* #undef HAVE_SPLIT_SHRINKER_CALLBACK */
 
 #if defined(__amd64__) || defined(__i386__)
 /* Define if host toolchain supports SSE */
 #define HAVE_SSE 1
 
 /* Define if host toolchain supports SSE2 */
 #define HAVE_SSE2 1
 
 /* Define if host toolchain supports SSE3 */
 #define HAVE_SSE3 1
 
 /* Define if host toolchain supports SSE4.1 */
 #define HAVE_SSE4_1 1
 
 /* Define if host toolchain supports SSE4.2 */
 #define HAVE_SSE4_2 1
 
 /* Define if host toolchain supports SSSE3 */
 #define HAVE_SSSE3 1
 #endif
 
 /* STACK_FRAME_NON_STANDARD is defined */
 /* #undef HAVE_STACK_FRAME_NON_STANDARD */
 
 /* standalone <linux/stdarg.h> exists */
 /* #undef HAVE_STANDALONE_LINUX_STDARG */
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
 
 /* Define to 1 if you have the <stdio.h> header file. */
 #define HAVE_STDIO_H 1
 
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1
 
 /* Define to 1 if you have the <strings.h> header file. */
 #define HAVE_STRINGS_H 1
 
 /* Define to 1 if you have the <string.h> header file. */
 #define HAVE_STRING_H 1
 
 /* Define to 1 if you have the 'strlcat' function. */
 #define HAVE_STRLCAT 1
 
 /* Define to 1 if you have the 'strlcpy' function. */
 #define HAVE_STRLCPY 1
 
 /* submit_bio is member of struct block_device_operations */
 /* #undef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
 /* have super_block s_shrink */
 /* #undef HAVE_SUPER_BLOCK_S_SHRINK */
 
 /* have super_block s_shrink pointer */
 /* #undef HAVE_SUPER_BLOCK_S_SHRINK_PTR */
 
 /* super_setup_bdi_name() exits */
 /* #undef HAVE_SUPER_SETUP_BDI_NAME */
 
 /* super_block->s_user_ns exists */
 /* #undef HAVE_SUPER_USER_NS */
 
 /* sync_blockdev() is declared in include/blkdev.h */
 /* #undef HAVE_SYNC_BLOCKDEV */
 
 /* struct kobj_type has default_groups */
 /* #undef HAVE_SYSFS_DEFAULT_GROUPS */
 
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #define HAVE_SYS_STAT_H 1
 
 /* Define to 1 if you have the <sys/types.h> header file. */
 #define HAVE_SYS_TYPES_H 1
 
 /* i_op->tmpfile() exists */
 /* #undef HAVE_TMPFILE */
 
 /* i_op->tmpfile() uses old dentry signature */
 /* #undef HAVE_TMPFILE_DENTRY */
 
 /* i_op->tmpfile() has mnt_idmap */
 /* #undef HAVE_TMPFILE_IDMAP */
 
 /* i_op->tmpfile() has userns */
 /* #undef HAVE_TMPFILE_USERNS */
 
 /* totalhigh_pages() exists */
 /* #undef HAVE_TOTALHIGH_PAGES */
 
 /* kernel has totalram_pages() */
 /* #undef HAVE_TOTALRAM_PAGES_FUNC */
 
 /* Define to 1 if you have the 'udev_device_get_is_initialized' function. */
 /* #undef HAVE_UDEV_DEVICE_GET_IS_INITIALIZED */
 
 /* kernel has __kernel_fpu_* functions */
 /* #undef HAVE_UNDERSCORE_KERNEL_FPU */
 
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
 /* iops->getattr() takes struct user_namespace* */
 /* #undef HAVE_USERNS_IOPS_GETATTR */
 
 /* iops->setattr() takes struct user_namespace* */
 /* #undef HAVE_USERNS_IOPS_SETATTR */
 
 /* user_namespace->ns.inum exists */
 /* #undef HAVE_USER_NS_COMMON_INUM */
 
 /* iops->getattr() takes a vfsmount */
 /* #undef HAVE_VFSMOUNT_IOPS_GETATTR */
 
 /* fops->clone_file_range() is available */
 /* #undef HAVE_VFS_CLONE_FILE_RANGE */
 
 /* fops->copy_file_range() is available */
 /* #undef HAVE_VFS_COPY_FILE_RANGE */
 
 /* fops->dedupe_file_range() is available */
 /* #undef HAVE_VFS_DEDUPE_FILE_RANGE */
 
 /* aops->direct_IO() uses iovec */
 /* #undef HAVE_VFS_DIRECT_IO_IOVEC */
 
 /* aops->direct_IO() uses iov_iter without rw */
 /* #undef HAVE_VFS_DIRECT_IO_ITER */
 
 /* aops->direct_IO() uses iov_iter with offset */
 /* #undef HAVE_VFS_DIRECT_IO_ITER_OFFSET */
 
 /* aops->direct_IO() uses iov_iter with rw and offset */
 /* #undef HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET */
 
 /* filemap_dirty_folio exists */
 /* #undef HAVE_VFS_FILEMAP_DIRTY_FOLIO */
 
 /* file_operations_extend takes .copy_file_range() and .clone_file_range() */
 /* #undef HAVE_VFS_FILE_OPERATIONS_EXTEND */
 
 /* generic_copy_file_range() is available */
 /* #undef HAVE_VFS_GENERIC_COPY_FILE_RANGE */
 
 /* All required iov_iter interfaces are available */
 /* #undef HAVE_VFS_IOV_ITER */
 
 /* fops->iterate() is available */
 /* #undef HAVE_VFS_ITERATE */
 
 /* fops->iterate_shared() is available */
 /* #undef HAVE_VFS_ITERATE_SHARED */
 
 /* fops->readdir() is available */
 /* #undef HAVE_VFS_READDIR */
 
 /* address_space_operations->readpages exists */
 /* #undef HAVE_VFS_READPAGES */
 
 /* read_folio exists */
 /* #undef HAVE_VFS_READ_FOLIO */
 
 /* fops->remap_file_range() is available */
 /* #undef HAVE_VFS_REMAP_FILE_RANGE */
 
 /* fops->read/write_iter() are available */
 /* #undef HAVE_VFS_RW_ITERATE */
 
 /* __set_page_dirty_nobuffers exists */
 /* #undef HAVE_VFS_SET_PAGE_DIRTY_NOBUFFERS */
 
 /* splice_copy_file_range() is available */
 /* #undef HAVE_VFS_SPLICE_COPY_FILE_RANGE */
 
 /* __vmalloc page flags exists */
 /* #undef HAVE_VMALLOC_PAGE_KERNEL */
 
 /* yes */
 /* #undef HAVE_WAIT_ON_BIT_ACTION */
 
 /* wait_queue_entry_t exists */
 /* #undef HAVE_WAIT_QUEUE_ENTRY_T */
 
 /* wq_head->head and wq_entry->entry exist */
 /* #undef HAVE_WAIT_QUEUE_HEAD_ENTRY */
 
 /* int (*writepage_t)() takes struct folio* */
 /* #undef HAVE_WRITEPAGE_T_FOLIO */
 
 /* xattr_handler->get() wants dentry */
 /* #undef HAVE_XATTR_GET_DENTRY */
 
 /* xattr_handler->get() wants both dentry and inode */
 /* #undef HAVE_XATTR_GET_DENTRY_INODE */
 
 /* xattr_handler->get() wants dentry and inode and flags */
 /* #undef HAVE_XATTR_GET_DENTRY_INODE_FLAGS */
 
 /* xattr_handler->get() wants xattr_handler */
 /* #undef HAVE_XATTR_GET_HANDLER */
 
 /* xattr_handler has name */
 /* #undef HAVE_XATTR_HANDLER_NAME */
 
 /* xattr_handler->list() wants dentry */
 /* #undef HAVE_XATTR_LIST_DENTRY */
 
 /* xattr_handler->list() wants xattr_handler */
 /* #undef HAVE_XATTR_LIST_HANDLER */
 
 /* xattr_handler->list() wants simple */
 /* #undef HAVE_XATTR_LIST_SIMPLE */
 
 /* xattr_handler->set() wants dentry */
 /* #undef HAVE_XATTR_SET_DENTRY */
 
 /* xattr_handler->set() wants both dentry and inode */
 /* #undef HAVE_XATTR_SET_DENTRY_INODE */
 
 /* xattr_handler->set() wants xattr_handler */
 /* #undef HAVE_XATTR_SET_HANDLER */
 
 /* xattr_handler->set() takes mnt_idmap */
 /* #undef HAVE_XATTR_SET_IDMAP */
 
 /* xattr_handler->set() takes user_namespace */
 /* #undef HAVE_XATTR_SET_USERNS */
 
 /* Define if host toolchain supports XSAVE */
 #define HAVE_XSAVE 1
 
 /* Define if host toolchain supports XSAVEOPT */
 #define HAVE_XSAVEOPT 1
 
 /* Define if host toolchain supports XSAVES */
 #define HAVE_XSAVES 1
 
 /* ZERO_PAGE() is GPL-only */
 /* #undef HAVE_ZERO_PAGE_GPL_ONLY */
 
 /* Define if you have [z] */
 #define HAVE_ZLIB 1
 
 /* __posix_acl_chmod() exists */
 /* #undef HAVE___POSIX_ACL_CHMOD */
 
 /* kernel exports FPU functions */
 /* #undef KERNEL_EXPORTS_X86_FPU */
 
 /* TBD: fetch(3) support */
 #if 0
 /* whether the chosen libfetch is to be loaded at run-time */
 #define LIBFETCH_DYNAMIC 1
 
 /* libfetch is fetch(3) */
 #define LIBFETCH_IS_FETCH 1
 
 /* libfetch is libcurl */
 #define LIBFETCH_IS_LIBCURL 0
 
 /* soname of chosen libfetch */
 #define LIBFETCH_SONAME "libfetch.so.6"
 #endif
 
 /* Define to the sub-directory where libtool stores uninstalled libraries. */
 #define LT_OBJDIR ".libs/"
 
 /* make_request_fn() return type */
 /* #undef MAKE_REQUEST_FN_RET */
 
 /* struct shrink_control has nid */
 /* #undef SHRINK_CONTROL_HAS_NID */
 
 /* using complete_and_exit() instead */
 /* #undef SPL_KTHREAD_COMPLETE_AND_EXIT */
 
 /* Defined for legacy compatibility. */
 #define SPL_META_ALIAS ZFS_META_ALIAS
 
 /* Defined for legacy compatibility. */
 #define SPL_META_RELEASE ZFS_META_RELEASE
 
 /* Defined for legacy compatibility. */
 #define SPL_META_VERSION ZFS_META_VERSION
 
 /* pde_data() is PDE_DATA() */
 /* #undef SPL_PDE_DATA */
 
 /* Define to 1 if all of the C89 standard headers exist (not just the ones
    required in a freestanding environment). This macro is provided for
    backward compatibility; new code need not use it. */
 #define SYSTEM_FREEBSD 1
 
 /* True if ZFS is to be compiled for a Linux system */
 /* #undef SYSTEM_LINUX */
 
 /* Version number of package */
 /* #undef ZFS_DEBUG */
 
 /* /dev/zfs minor */
 /* #undef ZFS_DEVICE_MINOR */
 
 /* enum node_stat_item contains NR_FILE_PAGES */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES */
 
 /* enum node_stat_item contains NR_INACTIVE_ANON */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON */
 
 /* enum node_stat_item contains NR_INACTIVE_FILE */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE */
 
 /* enum zone_stat_item contains NR_FILE_PAGES */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_FILE_PAGES */
 
 /* enum zone_stat_item contains NR_INACTIVE_ANON */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_ANON */
 
 /* enum zone_stat_item contains NR_INACTIVE_FILE */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_FILE */
 
 /* GENHD_FL_EXT_DEVT flag is not available */
 /* #undef ZFS_GENHD_FL_EXT_DEVT */
 
 /* GENHD_FL_NO_PART_SCAN flag is available */
 /* #undef ZFS_GENHD_FL_NO_PART */
 
 /* global_node_page_state() exists */
 /* #undef ZFS_GLOBAL_NODE_PAGE_STATE */
 
 /* global_zone_page_state() exists */
 /* #undef ZFS_GLOBAL_ZONE_PAGE_STATE */
 
 /* Define to 1 if GPL-only symbols can be used */
 /* #undef ZFS_IS_GPL_COMPATIBLE */
 
 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.99-623-FreeBSD_g9c56b8ec7"
+#define ZFS_META_ALIAS "zfs-2.2.99-634-FreeBSD_gd2ccc2155"
 
 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
 
 /* Define the project release date. */
 /* #undef ZFS_META_DATA */
 
 /* Define the maximum compatible kernel version. */
 #define ZFS_META_KVER_MAX "6.9"
 
 /* Define the minimum compatible kernel version. */
 #define ZFS_META_KVER_MIN "3.10"
 
 /* Define the project license. */
 #define ZFS_META_LICENSE "CDDL"
 
 /* Define the libtool library 'age' version information. */
 /* #undef ZFS_META_LT_AGE */
 
 /* Define the libtool library 'current' version information. */
 /* #undef ZFS_META_LT_CURRENT */
 
 /* Define the libtool library 'revision' version information. */
 /* #undef ZFS_META_LT_REVISION */
 
 /* Define the project name. */
 #define ZFS_META_NAME "zfs"
 
 /* Define the project release. */
-#define ZFS_META_RELEASE "623-FreeBSD_g9c56b8ec7"
+#define ZFS_META_RELEASE "634-FreeBSD_gd2ccc2155"
 
 /* Define the project version. */
 #define ZFS_META_VERSION "2.2.99"
 
 /* count is located in percpu_ref.data */
 /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
index 97a0479c6fc0..23b7a5afa4ce 100644
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define	ZFS_META_GITREV "zfs-2.2.99-623-g9c56b8ec7"
+#define	ZFS_META_GITREV "zfs-2.2.99-634-gd2ccc2155"