diff --git a/cddl/lib/libzpool/Makefile b/cddl/lib/libzpool/Makefile
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@@ -135,6 +135,7 @@
 	uberblock.c \
 	unique.c \
 	vdev.c \
+	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_file.c \
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -326,6 +326,7 @@
 contrib/openzfs/module/zfs/uberblock.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/unique.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev.c		optional zfs compile-with "${ZFS_C}"
+contrib/openzfs/module/zfs/vdev_cache.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_draid.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_draid_rand.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_indirect.c		optional zfs compile-with "${ZFS_C}"
diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk
--- a/sys/conf/kern.pre.mk
+++ b/sys/conf/kern.pre.mk
@@ -252,7 +252,8 @@
 # Special flags for managing the compat compiles for ZFS
 ZFS_CFLAGS+=	-I$S/contrib/openzfs/module/icp/include \
 	${CDDL_CFLAGS} -DBUILDING_ZFS -DHAVE_UIO_ZEROCOPY \
-	-DWITH_NETDUMP -D__KERNEL__ -D_SYS_CONDVAR_H_ -DSMP
+	-DWITH_NETDUMP -D__KERNEL__ -D_SYS_CONDVAR_H_ -DSMP \
+	-DIN_FREEBSD_BASE
 
 .if ${MACHINE_ARCH} == "amd64"
 ZFS_CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META
--- a/sys/contrib/openzfs/META
+++ b/sys/contrib/openzfs/META
@@ -1,10 +1,10 @@
 Meta:          1
 Name:          zfs
 Branch:        1.0
-Version:       2.2.0
-Release:       rc1
+Version:       2.1.99
+Release:       1
 Release-Tags:  relext
 License:       CDDL
 Author:        OpenZFS
-Linux-Maximum: 6.3
+Linux-Maximum: 6.2
 Linux-Minimum: 3.10
diff --git a/sys/contrib/openzfs/cmd/arc_summary b/sys/contrib/openzfs/cmd/arc_summary
--- a/sys/contrib/openzfs/cmd/arc_summary
+++ b/sys/contrib/openzfs/cmd/arc_summary
@@ -64,6 +64,7 @@
 SECTION_PATHS = {'arc': 'arcstats',
                  'dmu': 'dmu_tx',
                  'l2arc': 'arcstats',  # L2ARC stuff lives in arcstats
+                 'vdev': 'vdev_cache_stats',
                  'zfetch': 'zfetchstats',
                  'zil': 'zil'}
 
@@ -89,6 +90,8 @@
     # Requires py36-sysctl on FreeBSD
     import sysctl
 
+    VDEV_CACHE_SIZE = 'vdev.cache_size'
+
     def is_value(ctl):
         return ctl.type != sysctl.CTLTYPE_NODE
 
@@ -132,6 +135,8 @@
     SPL_PATH = '/sys/module/spl/parameters'
     TUNABLES_PATH = '/sys/module/zfs/parameters'
 
+    VDEV_CACHE_SIZE = 'zfs_vdev_cache_size'
+
     def load_kstats(section):
         path = os.path.join(KSTAT_PATH, section)
         with open(path) as f:
@@ -837,8 +842,7 @@
                ('Free on write:', 'l2_free_on_write'),
                ('R/W clashes:', 'l2_rw_clash'),
                ('Bad checksums:', 'l2_cksum_bad'),
-               ('Read errors:', 'l2_io_error'),
-               ('Write errors:', 'l2_writes_error'))
+               ('I/O errors:', 'l2_io_error'))
 
     for title, value in l2_todo:
         prt_i1(title, f_hits(arc_stats[value]))
@@ -874,20 +878,28 @@
     prt_i2('Miss ratio:',
            f_perc(arc_stats['l2_misses'], l2_access_total),
            f_hits(arc_stats['l2_misses']))
+    prt_i1('Feeds:', f_hits(arc_stats['l2_feeds']))
 
     print()
-    print('L2ARC I/O:')
-    prt_i2('Reads:',
-           f_bytes(arc_stats['l2_read_bytes']),
-           f_hits(arc_stats['l2_hits']))
-    prt_i2('Writes:',
-           f_bytes(arc_stats['l2_write_bytes']),
-           f_hits(arc_stats['l2_writes_sent']))
+    print('L2ARC writes:')
+
+    if arc_stats['l2_writes_done'] != arc_stats['l2_writes_sent']:
+        prt_i2('Writes sent:', 'FAULTED', f_hits(arc_stats['l2_writes_sent']))
+        prt_i2('Done ratio:',
+               f_perc(arc_stats['l2_writes_done'],
+                      arc_stats['l2_writes_sent']),
+               f_hits(arc_stats['l2_writes_done']))
+        prt_i2('Error ratio:',
+               f_perc(arc_stats['l2_writes_error'],
+                      arc_stats['l2_writes_sent']),
+               f_hits(arc_stats['l2_writes_error']))
+    else:
+        prt_i2('Writes sent:', '100 %', f_hits(arc_stats['l2_writes_sent']))
 
     print()
     print('L2ARC evicts:')
-    prt_i1('L1 cached:', f_hits(arc_stats['l2_evict_l1cached']))
-    prt_i1('While reading:', f_hits(arc_stats['l2_evict_reading']))
+    prt_i1('Lock retries:', f_hits(arc_stats['l2_evict_lock_retry']))
+    prt_i1('Upon reading:', f_hits(arc_stats['l2_evict_reading']))
     print()
 
 
@@ -947,6 +959,35 @@
     print()
 
 
+def section_vdev(kstats_dict):
+    """Collect information on VDEV caches"""
+
+    # Currently [Nov 2017] the VDEV cache is disabled, because it is actually
+    # harmful. When this is the case, we just skip the whole entry. See
+    # https://github.com/openzfs/zfs/blob/master/module/zfs/vdev_cache.c
+    # for details
+    tunables = get_vdev_params()
+
+    if tunables[VDEV_CACHE_SIZE] == '0':
+        print('VDEV cache disabled, skipping section\n')
+        return
+
+    vdev_stats = isolate_section('vdev_cache_stats', kstats_dict)
+
+    vdev_cache_total = int(vdev_stats['hits']) +\
+        int(vdev_stats['misses']) +\
+        int(vdev_stats['delegations'])
+
+    prt_1('VDEV cache summary:', f_hits(vdev_cache_total))
+    prt_i2('Hit ratio:', f_perc(vdev_stats['hits'], vdev_cache_total),
+           f_hits(vdev_stats['hits']))
+    prt_i2('Miss ratio:', f_perc(vdev_stats['misses'], vdev_cache_total),
+           f_hits(vdev_stats['misses']))
+    prt_i2('Delegations:', f_perc(vdev_stats['delegations'], vdev_cache_total),
+           f_hits(vdev_stats['delegations']))
+    print()
+
+
 def section_zil(kstats_dict):
     """Collect information on the ZFS Intent Log. Some of the information
     taken from https://github.com/openzfs/zfs/blob/master/include/sys/zil.h
@@ -974,6 +1015,7 @@
                  'l2arc': section_l2arc,
                  'spl': section_spl,
                  'tunables': section_tunables,
+                 'vdev': section_vdev,
                  'zil': section_zil}
 
 
diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c
--- a/sys/contrib/openzfs/cmd/zdb/zdb.c
+++ b/sys/contrib/openzfs/cmd/zdb/zdb.c
@@ -33,7 +33,6 @@
  *     under sponsorship from the FreeBSD Foundation.
  * Copyright (c) 2021 Allan Jude
  * Copyright (c) 2021 Toomas Soome <tsoome@me.com>
- * Copyright (c) 2023, Klara Inc.
  */
 
 #include <stdio.h>
@@ -327,7 +326,7 @@
 	int err;
 	struct sublivelist_verify *sv = args;
 
-	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
+	zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
 	    sizeof (sublivelist_verify_block_refcnt_t));
 
 	err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
@@ -391,7 +390,7 @@
 {
 	(void) args;
 	sublivelist_verify_t sv;
-	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
 	    sizeof (sublivelist_verify_block_t));
 	int err = sublivelist_verify_func(&sv, dle);
 	zfs_btree_clear(&sv.sv_leftover);
@@ -683,7 +682,7 @@
 	(void) printf("Verifying deleted livelist entries\n");
 
 	sublivelist_verify_t sv;
-	zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
+	zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
 	    sizeof (sublivelist_verify_block_t));
 	iterate_deleted_livelists(spa, livelist_verify, &sv);
 
@@ -717,7 +716,7 @@
 			mv.mv_start = m->ms_start;
 			mv.mv_end = m->ms_start + m->ms_size;
 			zfs_btree_create(&mv.mv_livelist_allocs,
-			    livelist_block_compare, NULL,
+			    livelist_block_compare,
 			    sizeof (sublivelist_verify_block_t));
 
 			mv_populate_livelist_allocs(&mv, &sv);
@@ -790,11 +789,8 @@
 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]]\n"
 	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] [-K <key>]\n"
 	    "\t\t[<poolname>[/<dataset | objset id>] [<object | range> ...]\n"
-	    "\t%s -B [-e [-V] [-p <path> ...]] [-I <inflight I/Os>]\n"
-	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
-	    "\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
 	    "\t%s [-v] <bookmark>\n"
-	    "\t%s -C [-A] [-U <cache>] [<poolname>]\n"
+	    "\t%s -C [-A] [-U <cache>]\n"
 	    "\t%s -l [-Aqu] <device>\n"
 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
@@ -806,7 +802,7 @@
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
-	    cmdname, cmdname, cmdname, cmdname, cmdname);
+	    cmdname, cmdname, cmdname, cmdname);
 
 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
@@ -829,8 +825,6 @@
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
 	(void) fprintf(stderr, "        -b --block-stats             "
 	    "block statistics\n");
-	(void) fprintf(stderr, "        -B --backup                  "
-	    "backup stream\n");
 	(void) fprintf(stderr, "        -c --checksum                "
 	    "checksum all metadata (twice for all data) blocks\n");
 	(void) fprintf(stderr, "        -C --config                  "
@@ -4881,81 +4875,6 @@
 	return (err);
 }
 
-static int
-dump_backup_bytes(objset_t *os, void *buf, int len, void *arg)
-{
-	const char *p = (const char *)buf;
-	ssize_t nwritten;
-
-	(void) os;
-	(void) arg;
-
-	/* Write the data out, handling short writes and signals. */
-	while ((nwritten = write(STDOUT_FILENO, p, len)) < len) {
-		if (nwritten < 0) {
-			if (errno == EINTR)
-				continue;
-			return (errno);
-		}
-		p += nwritten;
-		len -= nwritten;
-	}
-
-	return (0);
-}
-
-static void
-dump_backup(const char *pool, uint64_t objset_id, const char *flagstr)
-{
-	boolean_t embed = B_FALSE;
-	boolean_t large_block = B_FALSE;
-	boolean_t compress = B_FALSE;
-	boolean_t raw = B_FALSE;
-
-	const char *c;
-	for (c = flagstr; c != NULL && *c != '\0'; c++) {
-		switch (*c) {
-			case 'e':
-				embed = B_TRUE;
-				break;
-			case 'L':
-				large_block = B_TRUE;
-				break;
-			case 'c':
-				compress = B_TRUE;
-				break;
-			case 'w':
-				raw = B_TRUE;
-				break;
-			default:
-				fprintf(stderr, "dump_backup: invalid flag "
-				    "'%c'\n", *c);
-				return;
-		}
-	}
-
-	if (isatty(STDOUT_FILENO)) {
-		fprintf(stderr, "dump_backup: stream cannot be written "
-		    "to a terminal\n");
-		return;
-	}
-
-	offset_t off = 0;
-	dmu_send_outparams_t out = {
-	    .dso_outfunc = dump_backup_bytes,
-	    .dso_dryrun  = B_FALSE,
-	};
-
-	int err = dmu_send_obj(pool, objset_id, /* fromsnap */0, embed,
-	    large_block, compress, raw, /* saved */ B_FALSE, STDOUT_FILENO,
-	    &off, &out);
-	if (err != 0) {
-		fprintf(stderr, "dump_backup: dmu_send_obj: %s\n",
-		    strerror(err));
-		return;
-	}
-}
-
 static int
 zdb_copy_object(objset_t *os, uint64_t srcobj, char *destfile)
 {
@@ -8546,9 +8465,9 @@
 		 */
 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
-		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
-		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
-		    NULL, NULL));
+		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE |
+		    ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
+		    ZIO_FLAG_OPTIONAL, NULL, NULL));
 	}
 
 	error = zio_wait(zio);
@@ -8642,6 +8561,7 @@
 				zio_nowait(zio_vdev_child_io(czio, bp, vd,
 				    offset, pabd, psize, ZIO_TYPE_READ,
 				    ZIO_PRIORITY_SYNC_READ,
+				    ZIO_FLAG_DONT_CACHE |
 				    ZIO_FLAG_DONT_PROPAGATE |
 				    ZIO_FLAG_DONT_RETRY |
 				    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW |
@@ -8775,7 +8695,6 @@
 	struct option long_options[] = {
 		{"ignore-assertions",	no_argument,		NULL, 'A'},
 		{"block-stats",		no_argument,		NULL, 'b'},
-		{"backup",		no_argument,		NULL, 'B'},
 		{"checksum",		no_argument,		NULL, 'c'},
 		{"config",		no_argument,		NULL, 'C'},
 		{"datasets",		no_argument,		NULL, 'd'},
@@ -8817,11 +8736,10 @@
 	};
 
 	while ((c = getopt_long(argc, argv,
-	    "AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
+	    "AbcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
 	    long_options, NULL)) != -1) {
 		switch (c) {
 		case 'b':
-		case 'B':
 		case 'c':
 		case 'C':
 		case 'd':
@@ -8969,7 +8887,7 @@
 		verbose = MAX(verbose, 1);
 
 	for (c = 0; c < 256; c++) {
-		if (dump_all && strchr("ABeEFkKlLNOPrRSXy", c) == NULL)
+		if (dump_all && strchr("AeEFkKlLNOPrRSXy", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
@@ -9155,8 +9073,7 @@
 				    checkpoint_pool, error);
 			}
 
-		} else if (target_is_spa || dump_opt['R'] || dump_opt['B'] ||
-		    objset_id == 0) {
+		} else if (target_is_spa || dump_opt['R'] || objset_id == 0) {
 			zdb_set_skip_mmp(target);
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
@@ -9292,10 +9209,7 @@
 					    strerror(errno));
 			}
 		}
-		if (dump_opt['B']) {
-			dump_backup(target, objset_id,
-			    argc > 0 ? argv[0] : NULL);
-		} else if (os != NULL) {
+		if (os != NULL) {
 			dump_objset(os);
 		} else if (zopt_object_args > 0 && !dump_opt['m']) {
 			dump_objset(spa->spa_meta_objset);
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
@@ -369,7 +369,9 @@
 			return (NULL);
 		}
 
-		if ((event = list_remove_head(&agent_events)) != NULL) {
+		if ((event = (list_head(&agent_events))) != NULL) {
+			list_remove(&agent_events, event);
+
 			(void) pthread_mutex_unlock(&agent_lock);
 
 			/* dispatch to all event subscribers */
@@ -432,7 +434,8 @@
 	(void) pthread_join(g_agents_tid, NULL);
 
 	/* drain any pending events */
-	while ((event = list_remove_head(&agent_events)) != NULL) {
+	while ((event = (list_head(&agent_events))) != NULL) {
+		list_remove(&agent_events, event);
 		nvlist_free(event->ae_nvl);
 		free(event);
 	}
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@@ -1288,14 +1288,17 @@
 		tpool_destroy(g_tpool);
 	}
 
-	while ((pool = list_remove_head(&g_pool_list)) != NULL) {
+	while ((pool = (list_head(&g_pool_list))) != NULL) {
+		list_remove(&g_pool_list, pool);
 		zpool_close(pool->uap_zhp);
 		free(pool);
 	}
 	list_destroy(&g_pool_list);
 
-	while ((device = list_remove_head(&g_device_list)) != NULL)
+	while ((device = (list_head(&g_device_list))) != NULL) {
+		list_remove(&g_device_list, device);
 		free(device);
+	}
 	list_destroy(&g_device_list);
 
 	libzfs_fini(g_zfshdl);
diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@@ -6057,8 +6057,8 @@
 				if (p != NULL)
 					rid = p->pw_uid;
 				else if (*endch != '\0') {
-					(void) snprintf(errbuf, sizeof (errbuf),
-					    gettext("invalid user %s\n"), curr);
+					(void) snprintf(errbuf, 256, gettext(
+					    "invalid user %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else if (opts->group) {
@@ -6071,9 +6071,8 @@
 				if (g != NULL)
 					rid = g->gr_gid;
 				else if (*endch != '\0') {
-					(void) snprintf(errbuf, sizeof (errbuf),
-					    gettext("invalid group %s\n"),
-					    curr);
+					(void) snprintf(errbuf, 256, gettext(
+					    "invalid group %s\n"),  curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else {
@@ -6098,9 +6097,8 @@
 					who_type = ZFS_DELEG_GROUP;
 					rid = g->gr_gid;
 				} else {
-					(void) snprintf(errbuf, sizeof (errbuf),
-					    gettext("invalid user/group %s\n"),
-					    curr);
+					(void) snprintf(errbuf, 256, gettext(
+					    "invalid user/group %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			}
diff --git a/sys/contrib/openzfs/cmd/zilstat.in b/sys/contrib/openzfs/cmd/zilstat.in
--- a/sys/contrib/openzfs/cmd/zilstat.in
+++ b/sys/contrib/openzfs/cmd/zilstat.in
@@ -36,49 +36,31 @@
 from argparse import RawTextHelpFormatter
 
 cols = {
-	# hdr:       [size,      scale,      kstat name]
+	# hdr:       [size,      scale, 	 kstat name]
 	"time":      [8,         -1,         "time"],
 	"pool":      [12,        -1,         "pool"],
 	"ds":        [12,        -1,         "dataset_name"],
 	"obj":       [12,        -1,         "objset"],
-	"cc":        [5,         1000,       "zil_commit_count"],
-	"cwc":       [5,         1000,       "zil_commit_writer_count"],
-	"ic":        [5,         1000,       "zil_itx_count"],
-	"iic":       [5,         1000,       "zil_itx_indirect_count"],
-	"iib":       [5,         1024,       "zil_itx_indirect_bytes"],
-	"icc":       [5,         1000,       "zil_itx_copied_count"],
-	"icb":       [5,         1024,       "zil_itx_copied_bytes"],
-	"inc":       [5,         1000,       "zil_itx_needcopy_count"],
-	"inb":       [5,         1024,       "zil_itx_needcopy_bytes"],
-	"idc":       [5,         1000,       "icc+inc"],
-	"idb":       [5,         1024,       "icb+inb"],
-	"iwc":       [5,         1000,       "iic+idc"],
-	"iwb":       [5,         1024,       "iib+idb"],
-	"imnc":      [6,         1000,       "zil_itx_metaslab_normal_count"],
-	"imnb":      [6,         1024,       "zil_itx_metaslab_normal_bytes"],
-	"imnw":      [6,         1024,       "zil_itx_metaslab_normal_write"],
-	"imna":      [6,         1024,       "zil_itx_metaslab_normal_alloc"],
-	"imsc":      [6,         1000,       "zil_itx_metaslab_slog_count"],
-	"imsb":      [6,         1024,       "zil_itx_metaslab_slog_bytes"],
-	"imsw":      [6,         1024,       "zil_itx_metaslab_slog_write"],
-	"imsa":      [6,         1024,       "zil_itx_metaslab_slog_alloc"],
-	"imc":       [5,         1000,       "imnc+imsc"],
-	"imb":       [5,         1024,       "imnb+imsb"],
-	"imw":       [5,         1024,       "imnw+imsw"],
-	"ima":       [5,         1024,       "imna+imsa"],
-	"se%":       [3,         100,        "imb/ima"],
-	"sen%":      [4,         100,        "imnb/imna"],
-	"ses%":      [4,         100,        "imsb/imsa"],
-	"te%":       [3,         100,        "imb/imw"],
-	"ten%":      [4,         100,        "imnb/imnw"],
-	"tes%":      [4,         100,        "imsb/imsw"],
+	"zcc":       [10,        1000,       "zil_commit_count"],
+	"zcwc":      [10,        1000,       "zil_commit_writer_count"],
+	"ziic":      [10,        1000,       "zil_itx_indirect_count"],
+	"zic":       [10,        1000,       "zil_itx_count"],
+	"ziib":      [10,        1024,       "zil_itx_indirect_bytes"],
+	"zicc":      [10,        1000,       "zil_itx_copied_count"],
+	"zicb":      [10,        1024,       "zil_itx_copied_bytes"],
+	"zinc":      [10,        1000,       "zil_itx_needcopy_count"],
+	"zinb":      [10,        1024,       "zil_itx_needcopy_bytes"],
+	"zimnc":     [10,        1000,       "zil_itx_metaslab_normal_count"],
+	"zimnb":     [10,        1024,       "zil_itx_metaslab_normal_bytes"],
+	"zimsc":     [10,        1000,       "zil_itx_metaslab_slog_count"],
+	"zimsb":     [10,        1024,       "zil_itx_metaslab_slog_bytes"],
 }
 
-hdr = ["time", "ds", "cc", "ic", "idc", "idb", "iic", "iib",
-	"imnc", "imnw", "imsc", "imsw"]
+hdr = ["time", "pool", "ds", "obj", "zcc", "zcwc", "ziic", "zic", "ziib", \
+	"zicc", "zicb", "zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
 
-ghdr = ["time", "cc", "ic", "idc", "idb", "iic", "iib",
-	"imnc", "imnw", "imsc", "imsw"]
+ghdr = ["time", "zcc", "zcwc", "ziic", "zic", "ziib", "zicc", "zicb",
+	"zinc", "zinb", "zimnc", "zimnb", "zimsc", "zimsb"]
 
 cmd = ("Usage: zilstat [-hgdv] [-i interval] [-p pool_name]")
 
@@ -123,7 +105,7 @@
 	global sep
 	for col in hdr:
 		new_col = col
-		if interval > 0 and cols[col][1] > 100:
+		if interval > 0 and col not in ['time', 'pool', 'ds', 'obj']:
 			new_col += "/s"
 		sys.stdout.write("%*s%s" % (cols[col][0], new_col, sep))
 	sys.stdout.write("\n")
@@ -133,7 +115,7 @@
 	global sep
 	for col in hdr:
 		val = v[cols[col][2]]
-		if interval > 0 and cols[col][1] > 100:
+		if col not in ['time', 'pool', 'ds', 'obj'] and interval > 0:
 			val = v[cols[col][2]] // interval
 		sys.stdout.write("%s%s" % (
 			prettynum(cols[col][0], cols[col][1], val), sep))
@@ -255,7 +237,9 @@
 
 		invalid = []
 		for ele in hdr:
-			if ele not in cols:
+			if gFlag and ele not in ghdr:
+				invalid.append(ele)
+			elif ele not in cols:
 				invalid.append(ele)
 
 		if len(invalid) > 0:
@@ -419,17 +403,17 @@
 	diff = copy.deepcopy(curr)
 	for pool in curr:
 		for objset in curr[pool]:
-			for key in curr[pool][objset]:
-				if not isinstance(diff[pool][objset][key], int):
-					continue
-				# If prev is NULL, this is the
-				# first time we are here
-				if not prev:
-					diff[pool][objset][key] = 0
-				else:
-					diff[pool][objset][key] \
-						= curr[pool][objset][key] \
-						- prev[pool][objset][key]
+			for col in hdr:
+				if col not in ['time', 'pool', 'ds', 'obj']:
+					key = cols[col][2]
+					# If prev is NULL, this is the
+					# first time we are here
+					if not prev:
+						diff[pool][objset][key] = 0
+					else:
+						diff[pool][objset][key] \
+							= curr[pool][objset][key] \
+							- prev[pool][objset][key]
 
 def zil_build_dict(pool = "GLOBAL"):
 	global kstat
@@ -441,77 +425,10 @@
 			if objset not in curr[pool]:
 				curr[pool][objset] = dict()
 			curr[pool][objset][key] = val
-
-def zil_extend_dict():
-	global diff
-	for pool in diff:
-		for objset in diff[pool]:
-			diff[pool][objset]["pool"] = pool
-			diff[pool][objset]["objset"] = objset
-			diff[pool][objset]["time"] = time.strftime("%H:%M:%S", \
-				time.localtime())
-			diff[pool][objset]["icc+inc"] = \
-				diff[pool][objset]["zil_itx_copied_count"] + \
-				diff[pool][objset]["zil_itx_needcopy_count"]
-			diff[pool][objset]["icb+inb"] = \
-				diff[pool][objset]["zil_itx_copied_bytes"] + \
-				diff[pool][objset]["zil_itx_needcopy_bytes"]
-			diff[pool][objset]["iic+idc"] = \
-				diff[pool][objset]["zil_itx_indirect_count"] + \
-				diff[pool][objset]["zil_itx_copied_count"] + \
-				diff[pool][objset]["zil_itx_needcopy_count"]
-			diff[pool][objset]["iib+idb"] = \
-				diff[pool][objset]["zil_itx_indirect_bytes"] + \
-				diff[pool][objset]["zil_itx_copied_bytes"] + \
-				diff[pool][objset]["zil_itx_needcopy_bytes"]
-			diff[pool][objset]["imnc+imsc"] = \
-				diff[pool][objset]["zil_itx_metaslab_normal_count"] + \
-				diff[pool][objset]["zil_itx_metaslab_slog_count"]
-			diff[pool][objset]["imnb+imsb"] = \
-				diff[pool][objset]["zil_itx_metaslab_normal_bytes"] + \
-				diff[pool][objset]["zil_itx_metaslab_slog_bytes"]
-			diff[pool][objset]["imnw+imsw"] = \
-				diff[pool][objset]["zil_itx_metaslab_normal_write"] + \
-				diff[pool][objset]["zil_itx_metaslab_slog_write"]
-			diff[pool][objset]["imna+imsa"] = \
-				diff[pool][objset]["zil_itx_metaslab_normal_alloc"] + \
-				diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
-			if diff[pool][objset]["imna+imsa"] > 0:
-				diff[pool][objset]["imb/ima"] = 100 * \
-					diff[pool][objset]["imnb+imsb"] // \
-					diff[pool][objset]["imna+imsa"]
-			else:
-				diff[pool][objset]["imb/ima"] = 100
-			if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
-				diff[pool][objset]["imnb/imna"] = 100 * \
-					diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
-					diff[pool][objset]["zil_itx_metaslab_normal_alloc"]
-			else:
-				diff[pool][objset]["imnb/imna"] = 100
-			if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
-				diff[pool][objset]["imsb/imsa"] = 100 * \
-					diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
-					diff[pool][objset]["zil_itx_metaslab_slog_alloc"]
-			else:
-				diff[pool][objset]["imsb/imsa"] = 100
-			if diff[pool][objset]["imnw+imsw"] > 0:
-				diff[pool][objset]["imb/imw"] = 100 * \
-					diff[pool][objset]["imnb+imsb"] // \
-					diff[pool][objset]["imnw+imsw"]
-			else:
-				diff[pool][objset]["imb/imw"] = 100
-			if diff[pool][objset]["zil_itx_metaslab_normal_alloc"] > 0:
-				diff[pool][objset]["imnb/imnw"] = 100 * \
-					diff[pool][objset]["zil_itx_metaslab_normal_bytes"] // \
-					diff[pool][objset]["zil_itx_metaslab_normal_write"]
-			else:
-				diff[pool][objset]["imnb/imnw"] = 100
-			if diff[pool][objset]["zil_itx_metaslab_slog_alloc"] > 0:
-				diff[pool][objset]["imsb/imsw"] = 100 * \
-					diff[pool][objset]["zil_itx_metaslab_slog_bytes"] // \
-					diff[pool][objset]["zil_itx_metaslab_slog_write"]
-			else:
-				diff[pool][objset]["imsb/imsw"] = 100
+		curr[pool][objset]["pool"] = pool
+		curr[pool][objset]["objset"] = objset
+		curr[pool][objset]["time"] = time.strftime("%H:%M:%S", \
+			time.localtime())
 
 def sign_handler_epipe(sig, frame):
 	print("Caught EPIPE signal: " + str(frame))
@@ -520,31 +437,30 @@
 
 def main():
 	global interval
-	global curr, diff
+	global curr
 	hprint = False
 	init()
 	signal.signal(signal.SIGINT, signal.SIG_DFL)
 	signal.signal(signal.SIGPIPE, sign_handler_epipe)
 
-	zil_process_kstat()
-	if not curr:
-		print ("Error: No stats to show")
-		sys.exit(0)
-	print_header()
 	if interval > 0:
-		time.sleep(interval)
 		while True:
 			calculate_diff()
 			if not diff:
 				print ("Error: No stats to show")
 				sys.exit(0)
-			zil_extend_dict()
+			if hprint == False:
+				print_header()
+				hprint = True
 			print_dict(diff)
 			time.sleep(interval)
 	else:
-		diff = curr
-		zil_extend_dict()
-		print_dict(diff)
+		zil_process_kstat()
+		if not curr:
+			print ("Error: No stats to show")
+			sys.exit(0)
+		print_header()
+		print_dict(curr)
 
 if __name__ == '__main__':
 	main()
diff --git a/sys/contrib/openzfs/cmd/zpool/Makefile.am b/sys/contrib/openzfs/cmd/zpool/Makefile.am
--- a/sys/contrib/openzfs/cmd/zpool/Makefile.am
+++ b/sys/contrib/openzfs/cmd/zpool/Makefile.am
@@ -145,7 +145,6 @@
 	%D%/compatibility.d/openzfs-2.0-linux \
 	%D%/compatibility.d/openzfs-2.1-freebsd \
 	%D%/compatibility.d/openzfs-2.1-linux \
-	%D%/compatibility.d/openzfs-2.2 \
 	%D%/compatibility.d/openzfsonosx-1.7.0 \
 	%D%/compatibility.d/openzfsonosx-1.8.1 \
 	%D%/compatibility.d/openzfsonosx-1.9.3 \
@@ -169,20 +168,12 @@
 	"freebsd-11.3		freebsd-12.0" \
 	"freebsd-11.3		freebsd-12.1" \
 	"freebsd-11.3		freebsd-12.2" \
-	"freebsd-11.3		freebsd-12.3" \
-	"freebsd-11.3		freebsd-12.4" \
-	"openzfs-2.1-freebsd	freebsd-13.0" \
-	"openzfs-2.1-freebsd	freebsd-13.1" \
-	"openzfs-2.1-freebsd	freebsd-13.2" \
 	"freebsd-11.3		freenas-11.3" \
 	"freenas-11.0		freenas-11.1" \
 	"openzfsonosx-1.9.3	openzfsonosx-1.9.4" \
 	"openzfs-2.0-freebsd	truenas-12.0" \
 	"zol-0.7		ubuntu-18.04" \
-	"zol-0.8		ubuntu-20.04" \
-	"openzfs-2.1-linux	ubuntu-22.04" \
-	"openzfs-2.2		openzfs-2.2-linux" \
-	"openzfs-2.2		openzfs-2.2-freebsd"
+	"zol-0.8		ubuntu-20.04"
 
 zpoolconfdir = $(sysconfdir)/zfs/zpool.d
 INSTALL_DATA_HOOKS += zpool-install-data-hook
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
+++ b/sys/contrib/openzfs/cmd/zpool/compatibility.d/grub2
@@ -8,7 +8,5 @@
 filesystem_limits
 hole_birth
 large_blocks
-livelist
 lz4_compress
 spacemap_histogram
-zpool_checkpoint
diff --git a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2 b/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
deleted file mode 100644
--- a/sys/contrib/openzfs/cmd/zpool/compatibility.d/openzfs-2.2
+++ /dev/null
@@ -1,40 +0,0 @@
-# Features supported by OpenZFS 2.2 on Linux and FreeBSD
-allocation_classes
-async_destroy
-blake3
-block_cloning
-bookmark_v2
-bookmark_written
-bookmarks
-device_rebuild
-device_removal
-draid
-edonr
-embedded_data
-empty_bpobj
-enabled_txg
-encryption
-extensible_dataset
-filesystem_limits
-head_errlog
-hole_birth
-large_blocks
-large_dnode
-livelist
-log_spacemap
-lz4_compress
-multi_vdev_crash_dump
-obsolete_counts
-project_quota
-redacted_datasets
-redaction_bookmarks
-resilver_defer
-sha512
-skein
-spacemap_histogram
-spacemap_v2
-userobj_accounting
-vdev_zaps_v2
-zilsaxattr
-zpool_checkpoint
-zstd_compress
diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@@ -7662,11 +7662,11 @@
 print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
 {
 	time_t start, end, pause;
-	uint64_t pass_scanned, scanned, pass_issued, issued, total_s, total_i;
+	uint64_t pass_scanned, scanned, pass_issued, issued, total;
 	uint64_t elapsed, scan_rate, issue_rate;
 	double fraction_done;
-	char processed_buf[7], scanned_buf[7], issued_buf[7], total_s_buf[7];
-	char total_i_buf[7], srate_buf[7], irate_buf[7], time_buf[32];
+	char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
+	char srate_buf[7], irate_buf[7], time_buf[32];
 
 	printf("  ");
 	printf_color(ANSI_BOLD, gettext("scan:"));
@@ -7738,11 +7738,10 @@
 	pass_scanned = ps->pss_pass_exam;
 	issued = ps->pss_issued;
 	pass_issued = ps->pss_pass_issued;
-	total_s = ps->pss_to_examine;
-	total_i = ps->pss_to_examine - ps->pss_skipped;
+	total = ps->pss_to_examine;
 
 	/* we are only done with a block once we have issued the IO for it */
-	fraction_done = (double)issued / total_i;
+	fraction_done = (double)issued / total;
 
 	/* elapsed time for this pass, rounding up to 1 if it's 0 */
 	elapsed = time(NULL) - ps->pss_pass_start;
@@ -7751,25 +7750,26 @@
 
 	scan_rate = pass_scanned / elapsed;
 	issue_rate = pass_issued / elapsed;
+	uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
+	    ((total - issued) / issue_rate) : UINT64_MAX;
+	secs_to_dhms(total_secs_left, time_buf);
 
 	/* format all of the numbers we will be reporting */
 	zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
 	zfs_nicebytes(issued, issued_buf, sizeof (issued_buf));
-	zfs_nicebytes(total_s, total_s_buf, sizeof (total_s_buf));
-	zfs_nicebytes(total_i, total_i_buf, sizeof (total_i_buf));
+	zfs_nicebytes(total, total_buf, sizeof (total_buf));
+	zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
+	zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
 
 	/* do not print estimated time if we have a paused scrub */
-	(void) printf(gettext("\t%s / %s scanned"), scanned_buf, total_s_buf);
-	if (pause == 0 && scan_rate > 0) {
-		zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
-		(void) printf(gettext(" at %s/s"), srate_buf);
-	}
-	(void) printf(gettext(", %s / %s issued"), issued_buf, total_i_buf);
-	if (pause == 0 && issue_rate > 0) {
-		zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
-		(void) printf(gettext(" at %s/s"), irate_buf);
+	if (pause == 0) {
+		(void) printf(gettext("\t%s scanned at %s/s, "
+		    "%s issued at %s/s, %s total\n"),
+		    scanned_buf, srate_buf, issued_buf, irate_buf, total_buf);
+	} else {
+		(void) printf(gettext("\t%s scanned, %s issued, %s total\n"),
+		    scanned_buf, issued_buf, total_buf);
 	}
-	(void) printf(gettext("\n"));
 
 	if (is_resilver) {
 		(void) printf(gettext("\t%s resilvered, %.2f%% done"),
@@ -7782,16 +7782,16 @@
 	if (pause == 0) {
 		/*
 		 * Only provide an estimate iff:
-		 * 1) we haven't yet issued all we expected, and
+		 * 1) the time remaining is valid, and
 		 * 2) the issue rate exceeds 10 MB/s, and
 		 * 3) it's either:
 		 *    a) a resilver which has started repairs, or
 		 *    b) a scrub which has entered the issue phase.
 		 */
-		if (total_i >= issued && issue_rate >= 10 * 1024 * 1024 &&
+		if (total_secs_left != UINT64_MAX &&
+		    issue_rate >= 10 * 1024 * 1024 &&
 		    ((is_resilver && ps->pss_processed > 0) ||
 		    (is_scrub && issued > 0))) {
-			secs_to_dhms((total_i - issued) / issue_rate, time_buf);
 			(void) printf(gettext(", %s to go\n"), time_buf);
 		} else {
 			(void) printf(gettext(", no estimated "
@@ -7803,7 +7803,7 @@
 }
 
 static void
-print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, uint_t c, char *vdev_name)
+print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
 {
 	if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE)
 		return;
@@ -7815,20 +7815,17 @@
 	uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
 	uint64_t bytes_issued = vrs->vrs_bytes_issued;
 	uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
-	uint64_t bytes_est_s = vrs->vrs_bytes_est;
-	uint64_t bytes_est_i = vrs->vrs_bytes_est;
-	if (c > offsetof(vdev_rebuild_stat_t, vrs_pass_bytes_skipped) / 8)
-		bytes_est_i -= vrs->vrs_pass_bytes_skipped;
+	uint64_t bytes_est = vrs->vrs_bytes_est;
 	uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
 	    (vrs->vrs_pass_time_ms + 1)) * 1000;
 	uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
 	    (vrs->vrs_pass_time_ms + 1)) * 1000;
 	double scan_pct = MIN((double)bytes_scanned * 100 /
-	    (bytes_est_s + 1), 100);
+	    (bytes_est + 1), 100);
 
 	/* Format all of the numbers we will be reporting */
 	char bytes_scanned_buf[7], bytes_issued_buf[7];
-	char bytes_rebuilt_buf[7], bytes_est_s_buf[7], bytes_est_i_buf[7];
+	char bytes_rebuilt_buf[7], bytes_est_buf[7];
 	char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
 	zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
 	    sizeof (bytes_scanned_buf));
@@ -7836,8 +7833,9 @@
 	    sizeof (bytes_issued_buf));
 	zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
 	    sizeof (bytes_rebuilt_buf));
-	zfs_nicebytes(bytes_est_s, bytes_est_s_buf, sizeof (bytes_est_s_buf));
-	zfs_nicebytes(bytes_est_i, bytes_est_i_buf, sizeof (bytes_est_i_buf));
+	zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
+	zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
+	zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));
 
 	time_t start = vrs->vrs_start_time;
 	time_t end = vrs->vrs_end_time;
@@ -7860,29 +7858,17 @@
 
 	assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);
 
-	(void) printf(gettext("\t%s / %s scanned"), bytes_scanned_buf,
-	    bytes_est_s_buf);
-	if (scan_rate > 0) {
-		zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
-		(void) printf(gettext(" at %s/s"), scan_rate_buf);
-	}
-	(void) printf(gettext(", %s / %s issued"), bytes_issued_buf,
-	    bytes_est_i_buf);
-	if (issue_rate > 0) {
-		zfs_nicebytes(issue_rate, issue_rate_buf,
-		    sizeof (issue_rate_buf));
-		(void) printf(gettext(" at %s/s"), issue_rate_buf);
-	}
-	(void) printf(gettext("\n"));
+	secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
+	    MAX(scan_rate, 1), time_buf);
 
+	(void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
+	    "%s total\n"), bytes_scanned_buf, scan_rate_buf,
+	    bytes_issued_buf, issue_rate_buf, bytes_est_buf);
 	(void) printf(gettext("\t%s resilvered, %.2f%% done"),
 	    bytes_rebuilt_buf, scan_pct);
 
 	if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
-		if (bytes_est_s >= bytes_scanned &&
-		    scan_rate >= 10 * 1024 * 1024) {
-			secs_to_dhms((bytes_est_s - bytes_scanned) / scan_rate,
-			    time_buf);
+		if (scan_rate >= 10 * 1024 * 1024) {
 			(void) printf(gettext(", %s to go\n"), time_buf);
 		} else {
 			(void) printf(gettext(", no estimated "
@@ -7914,7 +7900,7 @@
 		    ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
 			char *name = zpool_vdev_name(g_zfs, zhp,
 			    child[c], VDEV_NAME_TYPE_ID);
-			print_rebuild_status_impl(vrs, i, name);
+			print_rebuild_status_impl(vrs, name);
 			free(name);
 		}
 	}
@@ -8019,15 +8005,13 @@
 			active_resilver = (ps->pss_state == DSS_SCANNING);
 		}
 
+
 		have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
 		have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
 		scrub_start = ps->pss_start_time;
-		if (c > offsetof(pool_scan_stat_t,
-		    pss_pass_error_scrub_pause) / 8) {
-			have_errorscrub = (ps->pss_error_scrub_func ==
-			    POOL_SCAN_ERRORSCRUB);
-			errorscrub_start = ps->pss_error_scrub_start;
-		}
+		have_errorscrub = (ps->pss_error_scrub_func ==
+		    POOL_SCAN_ERRORSCRUB);
+		errorscrub_start = ps->pss_error_scrub_start;
 	}
 
 	boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);
diff --git a/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c b/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
--- a/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
+++ b/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
@@ -238,7 +238,6 @@
 	print_kv("end_ts", ps->pss_end_time);
 	print_kv(",errors", ps->pss_errors);
 	print_kv(",examined", examined);
-	print_kv(",skipped", ps->pss_skipped);
 	print_kv(",issued", ps->pss_issued);
 	print_kv(",pass_examined", pass_exam);
 	print_kv(",pass_issued", ps->pss_pass_issued);
@@ -250,6 +249,7 @@
 	print_kv(",remaining_t", remaining_time);
 	print_kv(",start_ts", ps->pss_start_time);
 	print_kv(",to_examine", ps->pss_to_examine);
+	print_kv(",to_process", ps->pss_to_process);
 	printf(" %llu\n", (u_longlong_t)timestamp);
 	return (0);
 }
diff --git a/sys/contrib/openzfs/config/kernel-reclaim_state.m4 b/sys/contrib/openzfs/config/kernel-reclaim_state.m4
deleted file mode 100644
--- a/sys/contrib/openzfs/config/kernel-reclaim_state.m4
+++ /dev/null
@@ -1,26 +0,0 @@
-AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
-	dnl #
-	dnl # 6.4 API change
-	dnl # The reclaimed_slab of struct reclaim_state
-	dnl # is renamed to reclaimed
-	dnl #
-	ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
-		#include <linux/swap.h>
-		static const struct reclaim_state
-		    rs  __attribute__ ((unused)) = {
-		    .reclaimed = 100,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
-	AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
-	ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
-		   [struct reclaim_state has reclaimed])
-	],[
-		AC_MSG_RESULT(no)
-	])
-])
-
diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@@ -153,7 +153,6 @@
 	ZFS_AC_KERNEL_SRC_IATTR_VFSID
 	ZFS_AC_KERNEL_SRC_FILEMAP
 	ZFS_AC_KERNEL_SRC_WRITEPAGE_T
-	ZFS_AC_KERNEL_SRC_RECLAIMED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@@ -286,7 +285,6 @@
 	ZFS_AC_KERNEL_IATTR_VFSID
 	ZFS_AC_KERNEL_FILEMAP
 	ZFS_AC_KERNEL_WRITEPAGE_T
-	ZFS_AC_KERNEL_RECLAIMED
 	case "$host_cpu" in
 		powerpc*)
 			ZFS_AC_KERNEL_CPU_HAS_FEATURE
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init
new file mode 120000
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfs-zed.zfs-zed.init
@@ -0,0 +1 @@
+../etc/init.d/zfs-zed
\ No newline at end of file
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init
new file mode 120000
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-import.init
@@ -0,0 +1 @@
+../etc/init.d/zfs-import
\ No newline at end of file
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init
new file mode 120000
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-load-key.init
@@ -0,0 +1 @@
+../etc/init.d/zfs-load-key
\ No newline at end of file
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init
new file mode 120000
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-mount.init
@@ -0,0 +1 @@
+../etc/init.d/zfs-mount
\ No newline at end of file
diff --git a/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init
new file mode 120000
--- /dev/null
+++ b/sys/contrib/openzfs/contrib/debian/openzfs-zfsutils.zfs-share.init
@@ -0,0 +1 @@
+../etc/init.d/zfs-share
\ No newline at end of file
diff --git a/sys/contrib/openzfs/contrib/debian/rules.in b/sys/contrib/openzfs/contrib/debian/rules.in
--- a/sys/contrib/openzfs/contrib/debian/rules.in
+++ b/sys/contrib/openzfs/contrib/debian/rules.in
@@ -7,8 +7,8 @@
 LINUX_MIN  := $(shell awk '/Linux-Minimum:/{print $$2}' META)
 LINUX_NEXT := $(shell awk -F'[ .]' '/Linux-Maximum:/{print $$2 "." $$3+1}' META)
 
-DKMSFILES := module include config zfs.release.in autogen.sh copy-builtin META AUTHORS \
-		COPYRIGHT LICENSE README.md CODE_OF_CONDUCT.md NEWS NOTICE RELEASES.md
+DKMSFILES := module include config zfs.release.in autogen.sh META AUTHORS \
+		COPYRIGHT LICENSE README.md
 
 ifndef KVERS
 KVERS=$(shell uname -r)
diff --git a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
--- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
+++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
@@ -36,7 +36,7 @@
 		{ dfatal "Failed to install essential binaries"; exit 1; }
 
 	# Adapted from https://github.com/zbm-dev/zfsbootmenu
-	if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so' && ldconfig -p 2> /dev/null | grep -qF 'libc.so.6' ; then
+	if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so'; then
 		# On systems with gcc-config (Gentoo, Funtoo, etc.), use it to find libgcc_s
 		if command -v gcc-config >/dev/null; then
 			inst_simple "/usr/lib/gcc/$(s=$(gcc-config -c); echo "${s%-*}/${s##*-}")/libgcc_s.so.1" ||
diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
--- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
+++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs
@@ -344,7 +344,7 @@
 
 	# Need the _original_ datasets mountpoint!
 	mountpoint=$(get_fs_value "$fs" mountpoint)
-	ZFS_CMD="mount -o zfsutil -t zfs"
+	ZFS_CMD="mount.zfs -o zfsutil"
 	if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
 		# Can't use the mountpoint property. Might be one of our
 		# clones. Check the 'org.zol:mountpoint' property set in
@@ -361,7 +361,7 @@
 			fi
 			# Don't use mount.zfs -o zfsutils for legacy mountpoint
 			if [ "$mountpoint" = "legacy" ]; then
-				ZFS_CMD="mount -t zfs"
+				ZFS_CMD="mount.zfs"
 			fi
 			# Last hail-mary: Hope 'rootmnt' is set!
 			mountpoint=""
@@ -944,7 +944,7 @@
 		echo "       not specified on the kernel command line."
 		echo ""
 		echo "Manually mount the root filesystem on $rootmnt and then exit."
-		echo "Hint: Try:  mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
+		echo "Hint: Try:  mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
 		shell
 	fi
 
diff --git a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
--- a/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
+++ b/sys/contrib/openzfs/contrib/pam_zfs_key/pam_zfs_key.c
@@ -67,7 +67,6 @@
 #include <sys/mman.h>
 
 static const char PASSWORD_VAR_NAME[] = "pam_zfs_key_authtok";
-static const char OLD_PASSWORD_VAR_NAME[] = "pam_zfs_key_oldauthtok";
 
 static libzfs_handle_t *g_zfs;
 
@@ -161,10 +160,10 @@
 }
 
 static pw_password_t *
-pw_fetch(pam_handle_t *pamh, int tok)
+pw_fetch(pam_handle_t *pamh)
 {
 	const char *token;
-	if (pam_get_authtok(pamh, tok, &token, NULL) != PAM_SUCCESS) {
+	if (pam_get_authtok(pamh, PAM_AUTHTOK, &token, NULL) != PAM_SUCCESS) {
 		pam_syslog(pamh, LOG_ERR,
 		    "couldn't get password from PAM stack");
 		return (NULL);
@@ -178,13 +177,13 @@
 }
 
 static const pw_password_t *
-pw_fetch_lazy(pam_handle_t *pamh, int tok, const char *var_name)
+pw_fetch_lazy(pam_handle_t *pamh)
 {
-	pw_password_t *pw = pw_fetch(pamh, tok);
+	pw_password_t *pw = pw_fetch(pamh);
 	if (pw == NULL) {
 		return (NULL);
 	}
-	int ret = pam_set_data(pamh, var_name, pw, destroy_pw);
+	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, pw, destroy_pw);
 	if (ret != PAM_SUCCESS) {
 		pw_free(pw);
 		pam_syslog(pamh, LOG_ERR, "pam_set_data failed");
@@ -194,23 +193,23 @@
 }
 
 static const pw_password_t *
-pw_get(pam_handle_t *pamh, int tok, const char *var_name)
+pw_get(pam_handle_t *pamh)
 {
 	const pw_password_t *authtok = NULL;
-	int ret = pam_get_data(pamh, var_name,
+	int ret = pam_get_data(pamh, PASSWORD_VAR_NAME,
 	    (const void**)(&authtok));
 	if (ret == PAM_SUCCESS)
 		return (authtok);
 	if (ret == PAM_NO_MODULE_DATA)
-		return (pw_fetch_lazy(pamh, tok, var_name));
+		return (pw_fetch_lazy(pamh));
 	pam_syslog(pamh, LOG_ERR, "password not available");
 	return (NULL);
 }
 
 static int
-pw_clear(pam_handle_t *pamh, const char *var_name)
+pw_clear(pam_handle_t *pamh)
 {
-	int ret = pam_set_data(pamh, var_name, NULL, NULL);
+	int ret = pam_set_data(pamh, PASSWORD_VAR_NAME, NULL, NULL);
 	if (ret != PAM_SUCCESS) {
 		pam_syslog(pamh, LOG_ERR, "clearing password failed");
 		return (-1);
@@ -387,7 +386,7 @@
 	int ret = lzc_load_key(ds_name, noop, (uint8_t *)key->value,
 	    WRAPPING_KEY_LEN);
 	pw_free(key);
-	if (ret && ret != EEXIST) {
+	if (ret) {
 		pam_syslog(pamh, LOG_ERR, "load_key failed: %d", ret);
 		zfs_close(ds);
 		return (-1);
@@ -407,14 +406,14 @@
 }
 
 static int
-unmount_unload(pam_handle_t *pamh, const char *ds_name, boolean_t force)
+unmount_unload(pam_handle_t *pamh, const char *ds_name)
 {
 	zfs_handle_t *ds = zfs_open(g_zfs, ds_name, ZFS_TYPE_FILESYSTEM);
 	if (ds == NULL) {
 		pam_syslog(pamh, LOG_ERR, "dataset %s not found", ds_name);
 		return (-1);
 	}
-	int ret = zfs_unmount(ds, NULL, force ? MS_FORCE : 0);
+	int ret = zfs_unmount(ds, NULL, 0);
 	if (ret) {
 		pam_syslog(pamh, LOG_ERR, "zfs_unmount failed with: %d", ret);
 		zfs_close(ds);
@@ -436,13 +435,9 @@
 	char *runstatedir;
 	char *homedir;
 	char *dsname;
-	uid_t uid_min;
-	uid_t uid_max;
 	uid_t uid;
 	const char *username;
-	boolean_t unmount_and_unload;
-	boolean_t force_unmount;
-	boolean_t recursive_homes;
+	int unmount_and_unload;
 } zfs_key_config_t;
 
 static int
@@ -474,13 +469,9 @@
 		free(config->homes_prefix);
 		return (PAM_USER_UNKNOWN);
 	}
-	config->uid_min = 1000;
-	config->uid_max = MAXUID;
 	config->uid = entry->pw_uid;
 	config->username = name;
-	config->unmount_and_unload = B_TRUE;
-	config->force_unmount = B_FALSE;
-	config->recursive_homes = B_FALSE;
+	config->unmount_and_unload = 1;
 	config->dsname = NULL;
 	config->homedir = NULL;
 	for (int c = 0; c < argc; c++) {
@@ -490,16 +481,8 @@
 		} else if (strncmp(argv[c], "runstatedir=", 12) == 0) {
 			free(config->runstatedir);
 			config->runstatedir = strdup(argv[c] + 12);
-		} else if (strncmp(argv[c], "uid_min=", 8) == 0) {
-			sscanf(argv[c] + 8, "%u", &config->uid_min);
-		} else if (strncmp(argv[c], "uid_max=", 8) == 0) {
-			sscanf(argv[c] + 8, "%u", &config->uid_max);
 		} else if (strcmp(argv[c], "nounmount") == 0) {
-			config->unmount_and_unload = B_FALSE;
-		} else if (strcmp(argv[c], "forceunmount") == 0) {
-			config->force_unmount = B_TRUE;
-		} else if (strcmp(argv[c], "recursive_homes") == 0) {
-			config->recursive_homes = B_TRUE;
+			config->unmount_and_unload = 0;
 		} else if (strcmp(argv[c], "prop_mountpoint") == 0) {
 			if (config->homedir == NULL)
 				config->homedir = strdup(entry->pw_dir);
@@ -534,12 +517,8 @@
 	(void) zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE);
 	if (strcmp(target->homedir, mountpoint) != 0) {
-		if (target->recursive_homes) {
-			(void) zfs_iter_filesystems_v2(zhp, 0,
-			    find_dsname_by_prop_value, target);
-		}
 		zfs_close(zhp);
-		return (target->dsname != NULL);
+		return (0);
 	}
 
 	target->dsname = strdup(zfs_get_name(zhp));
@@ -552,23 +531,17 @@
 {
 	if (config->homedir != NULL &&
 	    config->homes_prefix != NULL) {
-		if (strcmp(config->homes_prefix, "*") == 0) {
-			(void) zfs_iter_root(g_zfs,
-			    find_dsname_by_prop_value, config);
-		} else {
-			zfs_handle_t *zhp = zfs_open(g_zfs,
-			    config->homes_prefix, ZFS_TYPE_FILESYSTEM);
-			if (zhp == NULL) {
-				pam_syslog(NULL, LOG_ERR,
-				    "dataset %s not found",
-				    config->homes_prefix);
-				return (NULL);
-			}
-
-			(void) zfs_iter_filesystems_v2(zhp, 0,
-			    find_dsname_by_prop_value, config);
-			zfs_close(zhp);
+		zfs_handle_t *zhp = zfs_open(g_zfs, config->homes_prefix,
+		    ZFS_TYPE_FILESYSTEM);
+		if (zhp == NULL) {
+			pam_syslog(NULL, LOG_ERR, "dataset %s not found",
+			    config->homes_prefix);
+			return (NULL);
 		}
+
+		(void) zfs_iter_filesystems_v2(zhp, 0,
+		    find_dsname_by_prop_value, config);
+		zfs_close(zhp);
 		char *dsname = config->dsname;
 		config->dsname = NULL;
 		return (dsname);
@@ -682,13 +655,8 @@
 	if (config_err != PAM_SUCCESS) {
 		return (config_err);
 	}
-	if (config.uid < config.uid_min || config.uid > config.uid_max) {
-		zfs_key_config_free(&config);
-		return (PAM_SERVICE_ERR);
-	}
 
-	const pw_password_t *token = pw_fetch_lazy(pamh,
-	    PAM_AUTHTOK, PASSWORD_VAR_NAME);
+	const pw_password_t *token = pw_fetch_lazy(pamh);
 	if (token == NULL) {
 		zfs_key_config_free(&config);
 		return (PAM_AUTH_ERR);
@@ -738,12 +706,10 @@
 	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
 		return (PAM_SERVICE_ERR);
 	}
-	if (config.uid < config.uid_min || config.uid > config.uid_max) {
+	if (config.uid < 1000) {
 		zfs_key_config_free(&config);
-		return (PAM_SERVICE_ERR);
+		return (PAM_SUCCESS);
 	}
-	const pw_password_t *old_token = pw_get(pamh,
-	    PAM_OLDAUTHTOK, OLD_PASSWORD_VAR_NAME);
 	{
 		if (pam_zfs_init(pamh) != 0) {
 			zfs_key_config_free(&config);
@@ -755,62 +721,49 @@
 			zfs_key_config_free(&config);
 			return (PAM_SERVICE_ERR);
 		}
-		if (!old_token) {
-			pam_syslog(pamh, LOG_ERR,
-			    "old password from PAM stack is null");
+		int key_loaded = is_key_loaded(pamh, dataset);
+		if (key_loaded == -1) {
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
 			return (PAM_SERVICE_ERR);
 		}
-		if (decrypt_mount(pamh, dataset,
-		    old_token->value, B_TRUE) == -1) {
+		free(dataset);
+		pam_zfs_free();
+		if (! key_loaded) {
 			pam_syslog(pamh, LOG_ERR,
-			    "old token mismatch");
-			free(dataset);
-			pam_zfs_free();
+			    "key not loaded, returning try_again");
 			zfs_key_config_free(&config);
 			return (PAM_PERM_DENIED);
 		}
 	}
 
 	if ((flags & PAM_UPDATE_AUTHTOK) != 0) {
-		const pw_password_t *token = pw_get(pamh, PAM_AUTHTOK,
-		    PASSWORD_VAR_NAME);
+		const pw_password_t *token = pw_get(pamh);
 		if (token == NULL) {
-			pam_syslog(pamh, LOG_ERR, "new password unavailable");
-			pam_zfs_free();
 			zfs_key_config_free(&config);
-			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
+			return (PAM_SERVICE_ERR);
+		}
+		if (pam_zfs_init(pamh) != 0) {
+			zfs_key_config_free(&config);
 			return (PAM_SERVICE_ERR);
 		}
 		char *dataset = zfs_key_config_get_dataset(&config);
 		if (!dataset) {
 			pam_zfs_free();
 			zfs_key_config_free(&config);
-			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
-			pw_clear(pamh, PASSWORD_VAR_NAME);
 			return (PAM_SERVICE_ERR);
 		}
-		int was_loaded = is_key_loaded(pamh, dataset);
-		if (!was_loaded && decrypt_mount(pamh, dataset,
-		    old_token->value, B_FALSE) == -1) {
+		if (change_key(pamh, dataset, token->value) == -1) {
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
-			pw_clear(pamh, OLD_PASSWORD_VAR_NAME);
-			pw_clear(pamh, PASSWORD_VAR_NAME);
 			return (PAM_SERVICE_ERR);
 		}
-		int changed = change_key(pamh, dataset, token->value);
-		if (!was_loaded) {
-			unmount_unload(pamh, dataset, config.force_unmount);
-		}
 		free(dataset);
 		pam_zfs_free();
 		zfs_key_config_free(&config);
-		if (pw_clear(pamh, OLD_PASSWORD_VAR_NAME) == -1 ||
-		    pw_clear(pamh, PASSWORD_VAR_NAME) == -1 || changed == -1) {
+		if (pw_clear(pamh) == -1) {
 			return (PAM_SERVICE_ERR);
 		}
 	} else {
@@ -835,7 +788,7 @@
 		return (PAM_SESSION_ERR);
 	}
 
-	if (config.uid < config.uid_min || config.uid > config.uid_max) {
+	if (config.uid < 1000) {
 		zfs_key_config_free(&config);
 		return (PAM_SUCCESS);
 	}
@@ -846,8 +799,7 @@
 		return (PAM_SUCCESS);
 	}
 
-	const pw_password_t *token = pw_get(pamh,
-	    PAM_AUTHTOK, PASSWORD_VAR_NAME);
+	const pw_password_t *token = pw_get(pamh);
 	if (token == NULL) {
 		zfs_key_config_free(&config);
 		return (PAM_SESSION_ERR);
@@ -871,7 +823,7 @@
 	free(dataset);
 	pam_zfs_free();
 	zfs_key_config_free(&config);
-	if (pw_clear(pamh, PASSWORD_VAR_NAME) == -1) {
+	if (pw_clear(pamh) == -1) {
 		return (PAM_SERVICE_ERR);
 	}
 	return (PAM_SUCCESS);
@@ -894,7 +846,7 @@
 	if (zfs_key_config_load(pamh, &config, argc, argv) != PAM_SUCCESS) {
 		return (PAM_SESSION_ERR);
 	}
-	if (config.uid < config.uid_min || config.uid > config.uid_max) {
+	if (config.uid < 1000) {
 		zfs_key_config_free(&config);
 		return (PAM_SUCCESS);
 	}
@@ -916,7 +868,7 @@
 			zfs_key_config_free(&config);
 			return (PAM_SESSION_ERR);
 		}
-		if (unmount_unload(pamh, dataset, config.force_unmount) == -1) {
+		if (unmount_unload(pamh, dataset) == -1) {
 			free(dataset);
 			pam_zfs_free();
 			zfs_key_config_free(&config);
diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/kmem.h
@@ -75,7 +75,7 @@
 extern uint64_t spl_kmem_cache_inuse(kmem_cache_t *cache);
 extern uint64_t spl_kmem_cache_entry_size(kmem_cache_t *cache);
 
-__attribute__((malloc, alloc_size(1)))
+__attribute__((alloc_size(1)))
 void *zfs_kmem_alloc(size_t size, int kmflags);
 void zfs_kmem_free(void *buf, size_t size);
 uint64_t kmem_size(void);
@@ -83,7 +83,6 @@
     int (*constructor)(void *, void *, int), void (*destructor)(void *, void *),
     void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags);
 void kmem_cache_destroy(kmem_cache_t *cache);
-__attribute__((malloc))
 void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
 void kmem_cache_free(kmem_cache_t *cache, void *buf);
 boolean_t kmem_cache_reap_active(void);
diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/mod_compat.h
@@ -68,6 +68,7 @@
 	zfs_trim,
 	zfs_txg,
 	zfs_vdev,
+	zfs_vdev_cache,
 	zfs_vdev_file,
 	zfs_vdev_mirror,
 	zfs_vnops,
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/kmem.h
@@ -31,10 +31,10 @@
 #include <linux/vmalloc.h>
 
 extern int kmem_debugging(void);
-__attribute__((format(printf, 1, 0)))
-extern char *kmem_vasprintf(const char *fmt, va_list ap);
-__attribute__((format(printf, 1, 2)))
-extern char *kmem_asprintf(const char *fmt, ...);
+extern char *kmem_vasprintf(const char *fmt, va_list ap)
+    __attribute__((format(printf, 1, 0)));
+extern char *kmem_asprintf(const char *fmt, ...)
+    __attribute__((format(printf, 1, 2)));
 extern char *kmem_strdup(const char *str);
 extern void kmem_strfree(char *str);
 
@@ -186,10 +186,10 @@
 #define	kmem_free(ptr, sz)	spl_kmem_free((ptr), (sz))
 #define	kmem_cache_reap_active	spl_kmem_cache_reap_active
 
-__attribute__((malloc, alloc_size(1)))
-extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line);
-__attribute__((malloc, alloc_size(1)))
-extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line);
+extern void *spl_kmem_alloc(size_t sz, int fl, const char *func, int line)
+    __attribute__((alloc_size(1)));
+extern void *spl_kmem_zalloc(size_t sz, int fl, const char *func, int line)
+    __attribute__((alloc_size(1)));
 extern void spl_kmem_free(const void *ptr, size_t sz);
 
 /*
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/taskq.h
@@ -104,7 +104,6 @@
 	/* list node for the cpu hotplug callback */
 	struct hlist_node	tq_hp_cb_node;
 	boolean_t		tq_hp_support;
-	unsigned long		lastshouldstop; /* when to purge dynamic */
 } taskq_t;
 
 typedef struct taskq_ent {
diff --git a/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h b/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/vmem.h
@@ -91,10 +91,8 @@
 #define	vmem_zalloc(sz, fl)	spl_vmem_zalloc((sz), (fl), __func__, __LINE__)
 #define	vmem_free(ptr, sz)	spl_vmem_free((ptr), (sz))
 
-extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line)
-    __attribute__((malloc, alloc_size(1)));
-extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line)
-    __attribute__((malloc, alloc_size(1)));
+extern void *spl_vmem_alloc(size_t sz, int fl, const char *func, int line);
+extern void *spl_vmem_zalloc(size_t sz, int fl, const char *func, int line);
 extern void spl_vmem_free(const void *ptr, size_t sz);
 
 int spl_vmem_init(void);
diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_zil.h
@@ -215,39 +215,6 @@
     TP_ARGS(zilog, zcw))
 DEFINE_ZIL_COMMIT_IO_ERROR_EVENT(zfs_zil__commit__io__error);
 
-/*
- * Generic support for three argument tracepoints of the form:
- *
- * DTRACE_PROBE3(...,
- *     zilog_t *, ...,
- *     uint64_t, ...,
- *     uint64_t, ...);
- */
-/* BEGIN CSTYLED */
-DECLARE_EVENT_CLASS(zfs_zil_block_size_class,
-	TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1),
-	TP_ARGS(zilog, res, s1),
-	TP_STRUCT__entry(
-	    ZILOG_TP_STRUCT_ENTRY
-	    __field(uint64_t, res)
-	    __field(uint64_t, s1)
-	),
-	TP_fast_assign(
-	    ZILOG_TP_FAST_ASSIGN
-	    __entry->res = res;
-	    __entry->s1 = s1;
-	),
-	TP_printk(
-	    ZILOG_TP_PRINTK_FMT " res %llu s1 %llu",
-	    ZILOG_TP_PRINTK_ARGS, __entry->res, __entry->s1)
-);
-
-#define	DEFINE_ZIL_BLOCK_SIZE_EVENT(name) \
-DEFINE_EVENT(zfs_zil_block_size_class, name, \
-    TP_PROTO(zilog_t *zilog, uint64_t res, uint64_t s1), \
-    TP_ARGS(zilog, res, s1))
-DEFINE_ZIL_BLOCK_SIZE_EVENT(zfs_zil__block__size);
-
 #endif /* _TRACE_ZIL_H */
 
 #undef TRACE_INCLUDE_PATH
@@ -261,7 +228,6 @@
 DEFINE_DTRACE_PROBE2(zil__process__commit__itx);
 DEFINE_DTRACE_PROBE2(zil__process__normal__itx);
 DEFINE_DTRACE_PROBE2(zil__commit__io__error);
-DEFINE_DTRACE_PROBE3(zil__block__size);
 
 #endif /* HAVE_DECLARE_EVENT_CLASS */
 #endif /* _KERNEL */
diff --git a/sys/contrib/openzfs/include/sys/abd.h b/sys/contrib/openzfs/include/sys/abd.h
--- a/sys/contrib/openzfs/include/sys/abd.h
+++ b/sys/contrib/openzfs/include/sys/abd.h
@@ -86,15 +86,10 @@
  * Allocations and deallocations
  */
 
-__attribute__((malloc))
 abd_t *abd_alloc(size_t, boolean_t);
-__attribute__((malloc))
 abd_t *abd_alloc_linear(size_t, boolean_t);
-__attribute__((malloc))
 abd_t *abd_alloc_gang(void);
-__attribute__((malloc))
 abd_t *abd_alloc_for_io(size_t, boolean_t);
-__attribute__((malloc))
 abd_t *abd_alloc_sametype(abd_t *, size_t);
 boolean_t abd_size_alloc_linear(size_t);
 void abd_gang_add(abd_t *, abd_t *, boolean_t);
diff --git a/sys/contrib/openzfs/include/sys/arc.h b/sys/contrib/openzfs/include/sys/arc.h
--- a/sys/contrib/openzfs/include/sys/arc.h
+++ b/sys/contrib/openzfs/include/sys/arc.h
@@ -304,8 +304,9 @@
 zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
     arc_buf_t *buf, boolean_t uncached, boolean_t l2arc, const zio_prop_t *zp,
     arc_write_done_func_t *ready, arc_write_done_func_t *child_ready,
-    arc_write_done_func_t *done, void *priv, zio_priority_t priority,
-    int zio_flags, const zbookmark_phys_t *zb);
+    arc_write_done_func_t *physdone, arc_write_done_func_t *done,
+    void *priv, zio_priority_t priority, int zio_flags,
+    const zbookmark_phys_t *zb);
 
 arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *priv);
 void arc_remove_prune_callback(arc_prune_t *p);
diff --git a/sys/contrib/openzfs/include/sys/arc_impl.h b/sys/contrib/openzfs/include/sys/arc_impl.h
--- a/sys/contrib/openzfs/include/sys/arc_impl.h
+++ b/sys/contrib/openzfs/include/sys/arc_impl.h
@@ -123,6 +123,7 @@
 	void			*awcb_private;
 	arc_write_done_func_t	*awcb_ready;
 	arc_write_done_func_t	*awcb_children_ready;
+	arc_write_done_func_t	*awcb_physdone;
 	arc_write_done_func_t	*awcb_done;
 	arc_buf_t		*awcb_buf;
 };
diff --git a/sys/contrib/openzfs/include/sys/btree.h b/sys/contrib/openzfs/include/sys/btree.h
--- a/sys/contrib/openzfs/include/sys/btree.h
+++ b/sys/contrib/openzfs/include/sys/btree.h
@@ -105,13 +105,8 @@
 	boolean_t	bti_before;
 } zfs_btree_index_t;
 
-typedef struct btree zfs_btree_t;
-typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
-    const void *, zfs_btree_index_t *);
-
-struct btree {
+typedef struct btree {
 	int (*bt_compar) (const void *, const void *);
-	bt_find_in_buf_f	bt_find_in_buf;
 	size_t			bt_elem_size;
 	size_t			bt_leaf_size;
 	uint32_t		bt_leaf_cap;
@@ -120,54 +115,7 @@
 	uint64_t		bt_num_nodes;
 	zfs_btree_hdr_t		*bt_root;
 	zfs_btree_leaf_t	*bt_bulk; // non-null if bulk loading
-};
-
-/*
- * Implementation of Shar's algorithm designed to accelerate binary search by
- * eliminating impossible to predict branches.
- *
- * For optimality, this should be used to generate the search function in the
- * same file as the comparator  and the comparator should be marked
- * `__attribute__((always_inline) inline` so that the compiler will inline it.
- *
- * Arguments are:
- *
- * NAME   - The function name for this instance of the search function. Use it
- *          in a subsequent call to zfs_btree_create().
- * T      - The element type stored inside the B-Tree.
- * COMP   - A comparator to compare two nodes, it must return exactly: -1, 0,
- *          or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
- *          TREE_CMP() from avl.h can be used in a boilerplate function.
- */
-/* BEGIN CSTYLED */
-#define	ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP)			\
-_Pragma("GCC diagnostic push")						\
-_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"")			\
-static void *								\
-NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,			\
-    const void *value, zfs_btree_index_t *where)			\
-{									\
-	T *i = (T *)buf;						\
-	(void) tree;							\
-	_Pragma("GCC unroll 9")						\
-	while (nelems > 1) {						\
-		uint32_t half = nelems / 2;				\
-		nelems -= half;						\
-		i += (COMP(&i[half - 1], value) < 0) * half;		\
-	}								\
-									\
-	int comp = COMP(i, value);					\
-	where->bti_offset = (i - (T *)buf) + (comp < 0);		\
-	where->bti_before = (comp != 0);				\
-									\
-	if (comp == 0) {						\
-		return (i);						\
-	}								\
-									\
-	return (NULL);							\
-}									\
-_Pragma("GCC diagnostic pop")
-/* END CSTYLED */
+} zfs_btree_t;
 
 /*
  * Allocate and deallocate caches for btree nodes.
@@ -181,19 +129,13 @@
  * tree   - the tree to be initialized
  * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
  *          -1 for <, 0 for ==, and +1 for >
- * find   - optional function to accelerate searches inside B-Tree nodes
- *          through Shar's algorithm and comparator inlining. Setting this to
- *          NULL will use a generic function. The function should be created
- *          using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
- *          compar should be marked `__attribute__((always_inline)) inline` or
- *          performance is unlikely to improve very much.
  * size   - the value of sizeof(struct my_type)
  * lsize  - custom leaf size
  */
 void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
-    bt_find_in_buf_f, size_t);
+    size_t);
 void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
-    bt_find_in_buf_f, size_t, size_t);
+    size_t, size_t);
 
 /*
  * Find a node with a matching value in the tree. Returns the matching node
diff --git a/sys/contrib/openzfs/include/sys/dsl_scan.h b/sys/contrib/openzfs/include/sys/dsl_scan.h
--- a/sys/contrib/openzfs/include/sys/dsl_scan.h
+++ b/sys/contrib/openzfs/include/sys/dsl_scan.h
@@ -61,7 +61,7 @@
 	uint64_t scn_end_time;
 	uint64_t scn_to_examine; /* total bytes to be scanned */
 	uint64_t scn_examined; /* bytes scanned so far */
-	uint64_t scn_skipped;	/* bytes skipped by scanner */
+	uint64_t scn_to_process;
 	uint64_t scn_processed;
 	uint64_t scn_errors;	/* scan I/O error count */
 	uint64_t scn_ddt_class_max;
diff --git a/sys/contrib/openzfs/include/sys/fs/zfs.h b/sys/contrib/openzfs/include/sys/fs/zfs.h
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@@ -1088,7 +1088,7 @@
 	uint64_t	pss_end_time;	/* scan end time */
 	uint64_t	pss_to_examine;	/* total bytes to scan */
 	uint64_t	pss_examined;	/* total bytes located by scanner */
-	uint64_t	pss_skipped;	/* total bytes skipped by scanner */
+	uint64_t	pss_to_process; /* total bytes to process */
 	uint64_t	pss_processed;	/* total processed bytes */
 	uint64_t	pss_errors;	/* scan errors	*/
 
@@ -1152,7 +1152,6 @@
 	uint64_t vrs_pass_time_ms;	/* pass run time (millisecs) */
 	uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
 	uint64_t vrs_pass_bytes_issued;	/* bytes rebuilt since start/resume */
-	uint64_t vrs_pass_bytes_skipped; /* bytes skipped since start/resume */
 } vdev_rebuild_stat_t;
 
 /*
diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h
--- a/sys/contrib/openzfs/include/sys/spa.h
+++ b/sys/contrib/openzfs/include/sys/spa.h
@@ -723,10 +723,16 @@
  * Send TRIM commands in-line during normal pool operation while deleting.
  *	OFF: no
  *	ON: yes
+ * NB: IN_FREEBSD_BASE is defined within the FreeBSD sources.
  */
 typedef enum {
 	SPA_AUTOTRIM_OFF = 0,	/* default */
 	SPA_AUTOTRIM_ON,
+#ifdef IN_FREEBSD_BASE
+	SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_ON,
+#else
+	SPA_AUTOTRIM_DEFAULT = SPA_AUTOTRIM_OFF,
+#endif
 } spa_autotrim_t;
 
 /*
@@ -1168,6 +1174,10 @@
     zbookmark_phys_t *zb);
 extern void name_to_errphys(char *buf, zbookmark_err_phys_t *zep);
 
+/* vdev cache */
+extern void vdev_cache_stat_init(void);
+extern void vdev_cache_stat_fini(void);
+
 /* vdev mirror */
 extern void vdev_mirror_stat_init(void);
 extern void vdev_mirror_stat_fini(void);
diff --git a/sys/contrib/openzfs/include/sys/vdev.h b/sys/contrib/openzfs/include/sys/vdev.h
--- a/sys/contrib/openzfs/include/sys/vdev.h
+++ b/sys/contrib/openzfs/include/sys/vdev.h
@@ -158,15 +158,20 @@
 extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
 extern boolean_t vdev_is_spacemap_addressable(vdev_t *vd);
 
+extern void vdev_cache_init(vdev_t *vd);
+extern void vdev_cache_fini(vdev_t *vd);
+extern boolean_t vdev_cache_read(zio_t *zio);
+extern void vdev_cache_write(zio_t *zio);
+extern void vdev_cache_purge(vdev_t *vd);
+
 extern void vdev_queue_init(vdev_t *vd);
 extern void vdev_queue_fini(vdev_t *vd);
 extern zio_t *vdev_queue_io(zio_t *zio);
 extern void vdev_queue_io_done(zio_t *zio);
 extern void vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority);
 
-extern uint32_t vdev_queue_length(vdev_t *vd);
+extern int vdev_queue_length(vdev_t *vd);
 extern uint64_t vdev_queue_last_offset(vdev_t *vd);
-extern uint64_t vdev_queue_class_length(vdev_t *vq, zio_priority_t p);
 
 extern void vdev_config_dirty(vdev_t *vd);
 extern void vdev_config_clean(vdev_t *vd);
diff --git a/sys/contrib/openzfs/include/sys/vdev_impl.h b/sys/contrib/openzfs/include/sys/vdev_impl.h
--- a/sys/contrib/openzfs/include/sys/vdev_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_impl.h
@@ -57,6 +57,8 @@
  * Forward declarations that lots of things need.
  */
 typedef struct vdev_queue vdev_queue_t;
+typedef struct vdev_cache vdev_cache_t;
+typedef struct vdev_cache_entry vdev_cache_entry_t;
 struct abd;
 
 extern uint_t zfs_vdev_queue_depth_pct;
@@ -130,24 +132,44 @@
 /*
  * Virtual device properties
  */
-typedef union vdev_queue_class {
-	list_t		vqc_list;
-	avl_tree_t	vqc_tree;
+struct vdev_cache_entry {
+	struct abd	*ve_abd;
+	uint64_t	ve_offset;
+	clock_t		ve_lastused;
+	avl_node_t	ve_offset_node;
+	avl_node_t	ve_lastused_node;
+	uint32_t	ve_hits;
+	uint16_t	ve_missed_update;
+	zio_t		*ve_fill_io;
+};
+
+struct vdev_cache {
+	avl_tree_t	vc_offset_tree;
+	avl_tree_t	vc_lastused_tree;
+	kmutex_t	vc_lock;
+};
+
+typedef struct vdev_queue_class {
+	uint32_t	vqc_active;
+
+	/*
+	 * Sorted by offset or timestamp, depending on if the queue is
+	 * LBA-ordered vs FIFO.
+	 */
+	avl_tree_t	vqc_queued_tree;
 } vdev_queue_class_t;
 
 struct vdev_queue {
 	vdev_t		*vq_vdev;
 	vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
+	avl_tree_t	vq_active_tree;
 	avl_tree_t	vq_read_offset_tree;
 	avl_tree_t	vq_write_offset_tree;
+	avl_tree_t	vq_trim_offset_tree;
 	uint64_t	vq_last_offset;
 	zio_priority_t	vq_last_prio;	/* Last sent I/O priority. */
-	uint32_t	vq_cqueued;	/* Classes with queued I/Os. */
-	uint32_t	vq_cactive[ZIO_PRIORITY_NUM_QUEUEABLE];
-	uint32_t	vq_active;	/* Number of active I/Os. */
 	uint32_t	vq_ia_active;	/* Active interactive I/Os. */
 	uint32_t	vq_nia_credit;	/* Non-interactive I/Os credit. */
-	list_t		vq_active_list;	/* List of active I/Os. */
 	hrtime_t	vq_io_complete_ts; /* time last i/o completed */
 	hrtime_t	vq_io_delta_ts;
 	zio_t		vq_io_search; /* used as local for stack reduction */
@@ -421,6 +443,7 @@
 	boolean_t	vdev_resilver_deferred;  /* resilver deferred */
 	boolean_t	vdev_kobj_flag; /* kobj event record */
 	vdev_queue_t	vdev_queue;	/* I/O deadline schedule queue	*/
+	vdev_cache_t	vdev_cache;	/* physical block cache		*/
 	spa_aux_vdev_t	*vdev_aux;	/* for l2cache and spares vdevs	*/
 	zio_t		*vdev_probe_zio; /* root of current probe	*/
 	vdev_aux_t	vdev_label_aux;	/* on-disk aux state		*/
diff --git a/sys/contrib/openzfs/include/sys/vdev_rebuild.h b/sys/contrib/openzfs/include/sys/vdev_rebuild.h
--- a/sys/contrib/openzfs/include/sys/vdev_rebuild.h
+++ b/sys/contrib/openzfs/include/sys/vdev_rebuild.h
@@ -79,7 +79,6 @@
 	uint64_t	vr_pass_start_time;
 	uint64_t	vr_pass_bytes_scanned;
 	uint64_t	vr_pass_bytes_issued;
-	uint64_t	vr_pass_bytes_skipped;
 
 	/* On-disk state updated by vdev_rebuild_zap_update_sync() */
 	vdev_rebuild_phys_t vr_rebuild_phys;
diff --git a/sys/contrib/openzfs/include/sys/zfs_refcount.h b/sys/contrib/openzfs/include/sys/zfs_refcount.h
--- a/sys/contrib/openzfs/include/sys/zfs_refcount.h
+++ b/sys/contrib/openzfs/include/sys/zfs_refcount.h
@@ -27,7 +27,6 @@
 #define	_SYS_ZFS_REFCOUNT_H
 
 #include <sys/inttypes.h>
-#include <sys/avl.h>
 #include <sys/list.h>
 #include <sys/zfs_context.h>
 
@@ -44,22 +43,19 @@
 
 #ifdef	ZFS_DEBUG
 typedef struct reference {
-	union {
-		avl_node_t a;
-		list_node_t l;
-	} ref_link;
+	list_node_t ref_link;
 	const void *ref_holder;
 	uint64_t ref_number;
-	boolean_t ref_search;
+	uint8_t *ref_removed;
 } reference_t;
 
 typedef struct refcount {
-	uint64_t rc_count;
 	kmutex_t rc_mtx;
-	avl_tree_t rc_tree;
-	list_t rc_removed;
-	uint_t rc_removed_count;
 	boolean_t rc_tracked;
+	list_t rc_list;
+	list_t rc_removed;
+	uint64_t rc_count;
+	uint64_t rc_removed_count;
 } zfs_refcount_t;
 
 /*
@@ -77,15 +73,13 @@
 int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
 int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
 /*
- * Note that (add|remove)_many adds/removes one reference with "number" N,
- * _not_ N references with "number" 1, which is what (add|remove)_few does,
- * or what vanilla zfs_refcount_(add|remove) called N times would do.
+ * Note that (add|remove)_many add/remove one reference with "number" N,
+ * _not_ make N references with "number" 1, which is what vanilla
+ * zfs_refcount_(add|remove) would do if called N times.
  *
  * Attempting to remove a reference with number N when none exists is a
  * panic on debug kernels with reference_tracking enabled.
  */
-void zfs_refcount_add_few(zfs_refcount_t *, uint64_t, const void *);
-void zfs_refcount_remove_few(zfs_refcount_t *, uint64_t, const void *);
 int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
 int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
 void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
@@ -114,10 +108,6 @@
 #define	zfs_refcount_count(rc) atomic_load_64(&(rc)->rc_count)
 #define	zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
 #define	zfs_refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
-#define	zfs_refcount_add_few(rc, number, holder) \
-	atomic_add_64(&(rc)->rc_count, number)
-#define	zfs_refcount_remove_few(rc, number, holder) \
-	atomic_add_64(&(rc)->rc_count, -number)
 #define	zfs_refcount_add_many(rc, number, holder) \
 	atomic_add_64_nv(&(rc)->rc_count, number)
 #define	zfs_refcount_remove_many(rc, number, holder) \
diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h
--- a/sys/contrib/openzfs/include/sys/zfs_znode.h
+++ b/sys/contrib/openzfs/include/sys/zfs_znode.h
@@ -158,7 +158,6 @@
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 
 extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len);
-extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
 
 #ifdef _KERNEL
 #include <sys/zfs_znode_impl.h>
@@ -281,6 +280,7 @@
 extern void	zfs_remove_op_tables(void);
 extern int	zfs_create_op_tables(void);
 extern dev_t	zfs_cmpldev(uint64_t);
+extern int	zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
 extern int	zfs_get_stats(objset_t *os, nvlist_t *nv);
 extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
 extern void	zfs_znode_dmu_fini(znode_t *);
diff --git a/sys/contrib/openzfs/include/sys/zil.h b/sys/contrib/openzfs/include/sys/zil.h
--- a/sys/contrib/openzfs/include/sys/zil.h
+++ b/sys/contrib/openzfs/include/sys/zil.h
@@ -489,22 +489,18 @@
 	 * Transactions which have been allocated to the "normal"
 	 * (i.e. not slog) storage pool. Note that "bytes" accumulate
 	 * the actual log record sizes - which do not include the actual
-	 * data in case of indirect writes.  bytes <= write <= alloc.
+	 * data in case of indirect writes.
 	 */
 	kstat_named_t zil_itx_metaslab_normal_count;
 	kstat_named_t zil_itx_metaslab_normal_bytes;
-	kstat_named_t zil_itx_metaslab_normal_write;
-	kstat_named_t zil_itx_metaslab_normal_alloc;
 
 	/*
 	 * Transactions which have been allocated to the "slog" storage pool.
 	 * If there are no separate log devices, this is the same as the
-	 * "normal" pool.  bytes <= write <= alloc.
+	 * "normal" pool.
 	 */
 	kstat_named_t zil_itx_metaslab_slog_count;
 	kstat_named_t zil_itx_metaslab_slog_bytes;
-	kstat_named_t zil_itx_metaslab_slog_write;
-	kstat_named_t zil_itx_metaslab_slog_alloc;
 } zil_kstat_values_t;
 
 typedef struct zil_sums {
@@ -519,12 +515,8 @@
 	wmsum_t zil_itx_needcopy_bytes;
 	wmsum_t zil_itx_metaslab_normal_count;
 	wmsum_t zil_itx_metaslab_normal_bytes;
-	wmsum_t zil_itx_metaslab_normal_write;
-	wmsum_t zil_itx_metaslab_normal_alloc;
 	wmsum_t zil_itx_metaslab_slog_count;
 	wmsum_t zil_itx_metaslab_slog_bytes;
-	wmsum_t zil_itx_metaslab_slog_write;
-	wmsum_t zil_itx_metaslab_slog_alloc;
 } zil_sums_t;
 
 #define	ZIL_STAT_INCR(zil, stat, val) \
diff --git a/sys/contrib/openzfs/include/sys/zil_impl.h b/sys/contrib/openzfs/include/sys/zil_impl.h
--- a/sys/contrib/openzfs/include/sys/zil_impl.h
+++ b/sys/contrib/openzfs/include/sys/zil_impl.h
@@ -44,7 +44,7 @@
  * must be held.
  *
  * After the lwb is "opened", it can transition into the "issued" state
- * via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
+ * via zil_lwb_write_issue(). Again, the zilog's "zl_issuer_lock" must
  * be held when making this transition.
  *
  * After the lwb's write zio completes, it transitions into the "write
@@ -93,23 +93,20 @@
 	blkptr_t	lwb_blk;	/* on disk address of this log blk */
 	boolean_t	lwb_fastwrite;	/* is blk marked for fastwrite? */
 	boolean_t	lwb_slog;	/* lwb_blk is on SLOG device */
-	boolean_t	lwb_indirect;	/* do not postpone zil_lwb_commit() */
 	int		lwb_nused;	/* # used bytes in buffer */
-	int		lwb_nfilled;	/* # filled bytes in buffer */
 	int		lwb_sz;		/* size of block and buffer */
 	lwb_state_t	lwb_state;	/* the state of this lwb */
 	char		*lwb_buf;	/* log write buffer */
 	zio_t		*lwb_write_zio;	/* zio for the lwb buffer */
 	zio_t		*lwb_root_zio;	/* root zio for lwb write and flushes */
-	hrtime_t	lwb_issued_timestamp; /* when was the lwb issued? */
 	uint64_t	lwb_issued_txg;	/* the txg when the write is issued */
 	uint64_t	lwb_max_txg;	/* highest txg in this lwb */
 	list_node_t	lwb_node;	/* zilog->zl_lwb_list linkage */
-	list_node_t	lwb_issue_node;	/* linkage of lwbs ready for issue */
 	list_t		lwb_itxs;	/* list of itx's */
 	list_t		lwb_waiters;	/* list of zil_commit_waiter's */
 	avl_tree_t	lwb_vdev_tree;	/* vdevs to flush after lwb write */
 	kmutex_t	lwb_vdev_lock;	/* protects lwb_vdev_tree */
+	hrtime_t	lwb_issued_timestamp; /* when was the lwb issued? */
 } lwb_t;
 
 /*
diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@@ -190,6 +190,7 @@
 #define	ZIO_FLAG_SPECULATIVE	(1ULL << 8)
 #define	ZIO_FLAG_CONFIG_WRITER	(1ULL << 9)
 #define	ZIO_FLAG_DONT_RETRY	(1ULL << 10)
+#define	ZIO_FLAG_DONT_CACHE	(1ULL << 11)
 #define	ZIO_FLAG_NODATA		(1ULL << 12)
 #define	ZIO_FLAG_INDUCE_DAMAGE	(1ULL << 13)
 #define	ZIO_FLAG_IO_ALLOCATING	(1ULL << 14)
@@ -341,9 +342,9 @@
 	enum zio_checksum	zp_checksum;
 	enum zio_compress	zp_compress;
 	uint8_t			zp_complevel;
+	dmu_object_type_t	zp_type;
 	uint8_t			zp_level;
 	uint8_t			zp_copies;
-	dmu_object_type_t	zp_type;
 	boolean_t		zp_dedup;
 	boolean_t		zp_dedup_verify;
 	boolean_t		zp_nopwrite;
@@ -436,12 +437,6 @@
 	list_node_t	zl_child_node;
 } zio_link_t;
 
-enum zio_qstate {
-	ZIO_QS_NONE = 0,
-	ZIO_QS_QUEUED,
-	ZIO_QS_ACTIVE,
-};
-
 struct zio {
 	/* Core information about this I/O */
 	zbookmark_phys_t	io_bookmark;
@@ -466,6 +461,7 @@
 	/* Callback info */
 	zio_done_func_t	*io_ready;
 	zio_done_func_t	*io_children_ready;
+	zio_done_func_t	*io_physdone;
 	zio_done_func_t	*io_done;
 	void		*io_private;
 	int64_t		io_prev_space_delta;	/* DMU private */
@@ -485,12 +481,6 @@
 	const zio_vsd_ops_t *io_vsd_ops;
 	metaslab_class_t *io_metaslab_class;	/* dva throttle class */
 
-	enum zio_qstate	io_queue_state;	/* vdev queue state */
-	union {
-		list_node_t l;
-		avl_node_t a;
-	} io_queue_node ____cacheline_aligned;	/* allocator and vdev queues */
-	avl_node_t	io_offset_node;	/* vdev offset queues */
 	uint64_t	io_offset;
 	hrtime_t	io_timestamp;	/* submitted at */
 	hrtime_t	io_queued_timestamp;
@@ -498,6 +488,9 @@
 	hrtime_t	io_delta;	/* vdev queue service delta */
 	hrtime_t	io_delay;	/* Device access time (disk or */
 					/* file). */
+	avl_node_t	io_queue_node;
+	avl_node_t	io_offset_node;
+	avl_node_t	io_alloc_node;
 	zio_alloc_list_t 	io_alloc_list;
 
 	/* Internal pipeline state */
@@ -511,6 +504,9 @@
 	int		io_error;
 	int		io_child_error[ZIO_CHILD_TYPES];
 	uint64_t	io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
+	uint64_t	io_child_count;
+	uint64_t	io_phys_children;
+	uint64_t	io_parent_count;
 	uint64_t	*io_stall;
 	zio_t		*io_gang_leader;
 	zio_gang_node_t	*io_gang_tree;
@@ -558,8 +554,9 @@
 extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
     struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
     zio_done_func_t *ready, zio_done_func_t *children_ready,
-    zio_done_func_t *done, void *priv, zio_priority_t priority,
-    zio_flag_t flags, const zbookmark_phys_t *zb);
+    zio_done_func_t *physdone, zio_done_func_t *done,
+    void *priv, zio_priority_t priority, zio_flag_t flags,
+    const zbookmark_phys_t *zb);
 
 extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
     struct abd *data, uint64_t size, zio_done_func_t *done, void *priv,
@@ -611,7 +608,6 @@
 extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
 extern zio_t *zio_unique_parent(zio_t *cio);
 extern void zio_add_child(zio_t *pio, zio_t *cio);
-extern void zio_add_child_first(zio_t *pio, zio_t *cio);
 
 extern void *zio_buf_alloc(size_t size);
 extern void zio_buf_free(void *buf, size_t size);
diff --git a/sys/contrib/openzfs/lib/libspl/include/umem.h b/sys/contrib/openzfs/lib/libspl/include/umem.h
--- a/sys/contrib/openzfs/lib/libspl/include/umem.h
+++ b/sys/contrib/openzfs/lib/libspl/include/umem.h
@@ -83,7 +83,7 @@
 const char *_umem_options_init(void);
 const char *_umem_logging_init(void);
 
-__attribute__((malloc, alloc_size(1)))
+__attribute__((alloc_size(1)))
 static inline void *
 umem_alloc(size_t size, int flags)
 {
@@ -96,7 +96,7 @@
 	return (ptr);
 }
 
-__attribute__((malloc, alloc_size(1)))
+__attribute__((alloc_size(1)))
 static inline void *
 umem_alloc_aligned(size_t size, size_t align, int flags)
 {
@@ -118,7 +118,7 @@
 	return (ptr);
 }
 
-__attribute__((malloc, alloc_size(1)))
+__attribute__((alloc_size(1)))
 static inline void *
 umem_zalloc(size_t size, int flags)
 {
@@ -188,7 +188,6 @@
 	umem_free(cp, sizeof (umem_cache_t));
 }
 
-__attribute__((malloc))
 static inline void *
 umem_cache_alloc(umem_cache_t *cp, int flags)
 {
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
@@ -1789,8 +1789,7 @@
 	nvlist_t *nvl;
 	int nvl_len = 0;
 	int added_resv = 0;
-	zfs_prop_t prop;
-	boolean_t nsprop = B_FALSE;
+	zfs_prop_t prop = 0;
 	nvpair_t *elem;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
@@ -1837,7 +1836,6 @@
 	    elem = nvlist_next_nvpair(nvl, elem)) {
 
 		prop = zfs_name_to_prop(nvpair_name(elem));
-		nsprop |= zfs_is_namespace_prop(prop);
 
 		assert(cl_idx < nvl_len);
 		/*
@@ -1936,7 +1934,8 @@
 			 * if one of the options handled by the generic
 			 * Linux namespace layer has been modified.
 			 */
-			if (nsprop && zfs_is_mounted(zhp, NULL))
+			if (zfs_is_namespace_prop(prop) &&
+			    zfs_is_mounted(zhp, NULL))
 				ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 		}
 	}
diff --git a/sys/contrib/openzfs/lib/libzpool/Makefile.am b/sys/contrib/openzfs/lib/libzpool/Makefile.am
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@@ -135,6 +135,7 @@
 	module/zfs/uberblock.c \
 	module/zfs/unique.c \
 	module/zfs/vdev.c \
+	module/zfs/vdev_cache.c \
 	module/zfs/vdev_draid.c \
 	module/zfs/vdev_draid_rand.c \
 	module/zfs/vdev_indirect.c \
diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4
--- a/sys/contrib/openzfs/man/man4/spl.4
+++ b/sys/contrib/openzfs/man/man4/spl.4
@@ -193,19 +193,4 @@
 reading it could cause a lock-up if the list grow too large
 without limiting the output.
 "(truncated)" will be shown if the list is larger than the limit.
-.
-.It Sy spl_taskq_thread_timeout_ms Ns = Ns Sy 10000 Pq uint
-(Linux-only)
-How long a taskq has to have had no work before we tear it down.
-Previously, we would tear down a dynamic taskq worker as soon
-as we noticed it had no work, but it was observed that this led
-to a lot of churn in tearing down things we then immediately
-spawned anew.
-In practice, it seems any nonzero value will remove the vast
-majority of this churn, while the nontrivially larger value
-was chosen to help filter out the little remaining churn on
-a mostly idle system.
-Setting this value to
-.Sy 0
-will revert to the previous behavior.
 .El
diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@@ -239,16 +239,6 @@
 Make some blocks above a certain size be gang blocks.
 This option is used by the test suite to facilitate testing.
 .
-.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
-Default DDT ZAP data block size as a power of 2. Note that changing this after
-creating a DDT on the pool will not affect existing DDTs, only newly created
-ones.
-.
-.It Sy zfs_ddt_zap_default_ibs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
-Default DDT ZAP indirect block size as a power of 2. Note that changing this
-after creating a DDT on the pool will not affect existing DDTs, only newly
-created ones.
-.
 .It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int
 Default dnode block size as a power of 2.
 .
@@ -2026,12 +2016,33 @@
 Flush dirty data to disk at least every this many seconds (maximum TXG
 duration).
 .
+.It Sy zfs_vdev_aggregate_trim Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Allow TRIM I/O operations to be aggregated.
+This is normally not helpful because the extents to be trimmed
+will have been already been aggregated by the metaslab.
+This option is provided for debugging and performance analysis.
+.
 .It Sy zfs_vdev_aggregation_limit Ns = Ns Sy 1048576 Ns B Po 1 MiB Pc Pq uint
 Max vdev I/O aggregation size.
 .
 .It Sy zfs_vdev_aggregation_limit_non_rotating Ns = Ns Sy 131072 Ns B Po 128 KiB Pc Pq uint
 Max vdev I/O aggregation size for non-rotating media.
 .
+.It Sy zfs_vdev_cache_bshift Ns = Ns Sy 16 Po 64 KiB Pc Pq uint
+Shift size to inflate reads to.
+.
+.It Sy zfs_vdev_cache_max Ns = Ns Sy 16384 Ns B Po 16 KiB Pc Pq uint
+Inflate reads smaller than this value to meet the
+.Sy zfs_vdev_cache_bshift
+size
+.Pq default Sy 64 KiB .
+.
+.It Sy zfs_vdev_cache_size Ns = Ns Sy 0 Pq uint
+Total size of the per-disk cache in bytes.
+.Pp
+Currently this feature is disabled, as it has been found to not be helpful
+for performance and in some cases harmful.
+.
 .It Sy zfs_vdev_mirror_rotating_inc Ns = Ns Sy 0 Pq int
 A number by which the balancing algorithm increments the load calculation for
 the purpose of selecting the least busy mirror member when an I/O operation
diff --git a/sys/contrib/openzfs/man/man7/zpool-features.7 b/sys/contrib/openzfs/man/man7/zpool-features.7
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@@ -228,10 +228,8 @@
 filesystem_limits
 hole_birth
 large_blocks
-livelist
 lz4_compress
 spacemap_histogram
-zpool_checkpoint
 
 .No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
 .Ed
diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8
--- a/sys/contrib/openzfs/man/man8/zdb.8
+++ b/sys/contrib/openzfs/man/man8/zdb.8
@@ -14,7 +14,7 @@
 .\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
 .\" Copyright (c) 2017 Intel Corporation.
 .\"
-.Dd June 27, 2023
+.Dd October 7, 2020
 .Dt ZDB 8
 .Os
 .
@@ -41,17 +41,9 @@
 .Ar poolname Ns Op Ar / Ns Ar dataset Ns | Ns Ar objset-ID
 .Op Ar object Ns | Ns Ar range Ns …
 .Nm
-.Fl B
-.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
-.Op Fl U Ar cache
-.Op Fl K Ar key
-.Ar poolname Ns Ar / Ns Ar objset-ID
-.Op Ar backup-flags
-.Nm
 .Fl C
 .Op Fl A
 .Op Fl U Ar cache
-.Op Ar poolname
 .Nm
 .Fl E
 .Op Fl A
@@ -131,22 +123,6 @@
 Display statistics regarding the number, size
 .Pq logical, physical and allocated
 and deduplication of blocks.
-.It Fl B , -backup
-Generate a backup stream, similar to
-.Nm zfs Cm send ,
-but for the numeric objset ID, and without opening the dataset.
-This can be useful in recovery scenarios if dataset metadata has become
-corrupted but the dataset itself is readable.
-The optional
-.Ar flags
-argument is a string of one or more of the letters
-.Sy e ,
-.Sy L ,
-.Sy c ,
-and
-.Sy w ,
-which correspond to the same flags in
-.Xr zfs-send 8 .
 .It Fl c , -checksum
 Verify the checksum of all metadata blocks while printing block statistics
 .Po see
diff --git a/sys/contrib/openzfs/man/man8/zfs-create.8 b/sys/contrib/openzfs/man/man8/zfs-create.8
--- a/sys/contrib/openzfs/man/man8/zfs-create.8
+++ b/sys/contrib/openzfs/man/man8/zfs-create.8
@@ -234,11 +234,14 @@
 Print verbose information about the created dataset.
 .El
 .El
-.Ss ZFS for Swap
-Swapping to a ZFS volume is prone to deadlock and not recommended.
-See OpenZFS FAQ.
-.Pp
-Swapping to a file on a ZFS filesystem is not supported.
+.Ss ZFS Volumes as Swap
+ZFS volumes may be used as swap devices.
+After creating the volume with the
+.Nm zfs Cm create Fl V
+enable the swap area using the
+.Xr swapon 8
+command.
+Swapping to files on ZFS filesystems is not supported.
 .
 .Sh EXAMPLES
 .\" These are, respectively, examples 1, 10 from zfs.8
diff --git a/sys/contrib/openzfs/man/man8/zpool-create.8 b/sys/contrib/openzfs/man/man8/zpool-create.8
--- a/sys/contrib/openzfs/man/man8/zpool-create.8
+++ b/sys/contrib/openzfs/man/man8/zpool-create.8
@@ -87,13 +87,13 @@
 However this check is not robust enough
 to detect simultaneous attempts to use a new device in different pools, even if
 .Sy multihost Ns = Sy enabled .
-The administrator must ensure that simultaneous invocations of any combination
+The administrator must ensure, that simultaneous invocations of any combination
 of
 .Nm zpool Cm replace ,
 .Nm zpool Cm create ,
 .Nm zpool Cm add ,
 or
-.Nm zpool Cm labelclear
+.Nm zpool Cm labelclear ,
 do not refer to the same device.
 Using the same device in two pools will result in pool corruption.
 .Pp
diff --git a/sys/contrib/openzfs/man/man8/zpool-events.8 b/sys/contrib/openzfs/man/man8/zpool-events.8
--- a/sys/contrib/openzfs/man/man8/zpool-events.8
+++ b/sys/contrib/openzfs/man/man8/zpool-events.8
@@ -456,6 +456,7 @@
 ZIO_FLAG_SPECULATIVE:0x00000100
 ZIO_FLAG_CONFIG_WRITER:0x00000200
 ZIO_FLAG_DONT_RETRY:0x00000400
+ZIO_FLAG_DONT_CACHE:0x00000800
 ZIO_FLAG_NODATA:0x00001000
 ZIO_FLAG_INDUCE_DAMAGE:0x00002000
 
diff --git a/sys/contrib/openzfs/man/man8/zpool-scrub.8 b/sys/contrib/openzfs/man/man8/zpool-scrub.8
--- a/sys/contrib/openzfs/man/man8/zpool-scrub.8
+++ b/sys/contrib/openzfs/man/man8/zpool-scrub.8
@@ -26,7 +26,7 @@
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 .\"
-.Dd June 22, 2023
+.Dd July 25, 2021
 .Dt ZPOOL-SCRUB 8
 .Os
 .
@@ -123,7 +123,7 @@
 .No # Nm zpool Cm status
   ...
   scan: scrub in progress since Sun Jul 25 16:07:49 2021
-        403M / 405M scanned at 100M/s, 68.4M / 405M issued at 10.0M/s
+        403M scanned at 100M/s, 68.4M issued at 10.0M/s, 405M total
         0B repaired, 16.91% done, 00:00:04 to go
   ...
 .Ed
diff --git a/sys/contrib/openzfs/module/Kbuild.in b/sys/contrib/openzfs/module/Kbuild.in
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@@ -34,20 +34,6 @@
 ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
 endif
 
-# Generated binary search code is particularly bad with this optimization.
-# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
-# is not affected when unrolling is done.
-# Disable it until the following upstream issue is resolved:
-# https://github.com/llvm/llvm-project/issues/62790
-ifeq ($(CONFIG_X86),y)
-ifeq ($(CONFIG_CC_IS_CLANG),y)
-CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
-CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
-CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
-CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
-endif
-endif
-
 ifneq ($(KBUILD_EXTMOD),)
 @CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
 @CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
@@ -382,6 +368,7 @@
 	uberblock.o \
 	unique.o \
 	vdev.o \
+	vdev_cache.o \
 	vdev_draid.o \
 	vdev_draid_rand.o \
 	vdev_indirect.o \
diff --git a/sys/contrib/openzfs/module/Makefile.bsd b/sys/contrib/openzfs/module/Makefile.bsd
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@@ -308,6 +308,7 @@
 	uberblock.c \
 	unique.c \
 	vdev.c \
+	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_indirect.c \
@@ -399,20 +400,6 @@
 
 .include <bsd.kmod.mk>
 
-# Generated binary search code is particularly bad with this optimization.
-# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
-# is not affected when unrolling is done.
-# Disable it until the following upstream issue is resolved:
-# https://github.com/llvm/llvm-project/issues/62790
-.if ${CC} == "clang"
-.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
-CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
-CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
-CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
-CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
-.endif
-.endif
-
 CFLAGS.sysctl_os.c= -include ../zfs_config.h
 CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h
 
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -872,6 +872,8 @@
 	"Enable to bypass vdev_validate().");
 /* END CSTYLED */
 
+/* vdev_cache.c */
+
 /* vdev_mirror.c */
 
 /* vdev_queue.c */
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_acl.c
@@ -495,8 +495,10 @@
 {
 	zfs_acl_node_t *aclnode;
 
-	while ((aclnode = list_remove_head(&aclp->z_acl)))
+	while ((aclnode = list_head(&aclp->z_acl))) {
+		list_remove(&aclp->z_acl, aclnode);
 		zfs_acl_node_free(aclnode);
+	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -2220,6 +2220,92 @@
 	return (0);
 }
 
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION) {
+		pname = ZPL_VERSION_STR;
+	} else {
+		pname = zfs_prop_to_name(prop);
+	}
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_NFSV4;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
 /*
  * Return true if the corresponding vfs's unmounted flag is set.
  * Otherwise return false.
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c
@@ -2069,93 +2069,6 @@
 	return (error);
 }
 
-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION) {
-		pname = ZPL_VERSION_STR;
-	} else {
-		pname = zfs_prop_to_name(prop);
-	}
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_NFSV4;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
-
 
 void
 zfs_znode_update_vfs(znode_t *zp)
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
@@ -182,11 +182,8 @@
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
-#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
-		current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
-#else
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
-#endif
+
 	vfree(ptr);
 }
 
@@ -1015,18 +1012,8 @@
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
-
-	*obj = NULL;
-
-	/*
-	 * Since we can't sleep attempt an emergency allocation to satisfy
-	 * the request.  The only alterative is to fail the allocation but
-	 * it's preferable try.  The use of KM_NOSLEEP is expected to be rare.
-	 */
-	if (flags & KM_NOSLEEP)
-		return (spl_emergency_alloc(skc, flags, obj));
-
 	might_sleep();
+	*obj = NULL;
 
 	/*
 	 * Before allocating a new slab wait for any reaping to complete and
diff --git a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-taskq.c
@@ -36,12 +36,6 @@
 module_param(spl_taskq_thread_bind, int, 0644);
 MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
 
-static uint_t spl_taskq_thread_timeout_ms = 10000;
-/* BEGIN CSTYLED */
-module_param(spl_taskq_thread_timeout_ms, uint, 0644);
-/* END CSTYLED */
-MODULE_PARM_DESC(spl_taskq_thread_timeout_ms,
-	"Time to require a dynamic thread be idle before it gets cleaned up");
 
 static int spl_taskq_thread_dynamic = 1;
 module_param(spl_taskq_thread_dynamic, int, 0444);
@@ -854,37 +848,12 @@
 	    tqt_thread_list) == tqt)
 		return (0);
 
-	int no_work =
+	return
 	    ((tq->tq_nspawn == 0) &&	/* No threads are being spawned */
 	    (tq->tq_nactive == 0) &&	/* No threads are handling tasks */
 	    (tq->tq_nthreads > 1) &&	/* More than 1 thread is running */
 	    (!taskq_next_ent(tq)) &&	/* There are no pending tasks */
 	    (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
-
-	/*
-	 * If we would have said stop before, let's instead wait a bit, maybe
-	 * we'll see more work come our way soon...
-	 */
-	if (no_work) {
-		/* if it's 0, we want the old behavior. */
-		/* if the taskq is being torn down, we also want to go away. */
-		if (spl_taskq_thread_timeout_ms == 0 ||
-		    !(tq->tq_flags & TASKQ_ACTIVE))
-			return (1);
-		unsigned long lasttime = tq->lastshouldstop;
-		if (lasttime > 0) {
-			if (time_after(jiffies, lasttime +
-			    msecs_to_jiffies(spl_taskq_thread_timeout_ms)))
-				return (1);
-			else
-				return (0);
-		} else {
-			tq->lastshouldstop = jiffies;
-		}
-	} else {
-		tq->lastshouldstop = 0;
-	}
-	return (0);
 }
 
 static int
@@ -1122,7 +1091,6 @@
 	tq->tq_flags = (flags | TASKQ_ACTIVE);
 	tq->tq_next_id = TASKQID_INITIAL;
 	tq->tq_lowest_id = TASKQID_INITIAL;
-	tq->lastshouldstop = 0;
 	INIT_LIST_HEAD(&tq->tq_free_list);
 	INIT_LIST_HEAD(&tq->tq_pend_list);
 	INIT_LIST_HEAD(&tq->tq_prio_list);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
--- a/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/arc_os.c
@@ -219,11 +219,7 @@
 	arc_reduce_target_size(ptob(sc->nr_to_scan));
 	arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
 	if (current->reclaim_state != NULL)
-#ifdef	HAVE_RECLAIM_STATE_RECLAIMED
-		current->reclaim_state->reclaimed += sc->nr_to_scan;
-#else
 		current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
-#endif
 
 	/*
 	 * We are experiencing memory pressure which the arc_evict_zthr was
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_acl.c
@@ -493,8 +493,10 @@
 {
 	zfs_acl_node_t *aclnode;
 
-	while ((aclnode = list_remove_head(&aclp->z_acl)))
+	while ((aclnode = list_head(&aclp->z_acl))) {
+		list_remove(&aclp->z_acl, aclnode);
 		zfs_acl_node_free(aclnode);
+	}
 	aclp->z_acl_count = 0;
 	aclp->z_acl_bytes = 0;
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@@ -2052,6 +2052,91 @@
 	return (0);
 }
 
+/*
+ * Read a property stored within the master node.
+ */
+int
+zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
+{
+	uint64_t *cached_copy = NULL;
+
+	/*
+	 * Figure out where in the objset_t the cached copy would live, if it
+	 * is available for the requested property.
+	 */
+	if (os != NULL) {
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			cached_copy = &os->os_version;
+			break;
+		case ZFS_PROP_NORMALIZE:
+			cached_copy = &os->os_normalization;
+			break;
+		case ZFS_PROP_UTF8ONLY:
+			cached_copy = &os->os_utf8only;
+			break;
+		case ZFS_PROP_CASE:
+			cached_copy = &os->os_casesensitivity;
+			break;
+		default:
+			break;
+		}
+	}
+	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
+		*value = *cached_copy;
+		return (0);
+	}
+
+	/*
+	 * If the property wasn't cached, look up the file system's value for
+	 * the property. For the version property, we look up a slightly
+	 * different string.
+	 */
+	const char *pname;
+	int error = ENOENT;
+	if (prop == ZFS_PROP_VERSION)
+		pname = ZPL_VERSION_STR;
+	else
+		pname = zfs_prop_to_name(prop);
+
+	if (os != NULL) {
+		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
+		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+	}
+
+	if (error == ENOENT) {
+		/* No value set, use the default value */
+		switch (prop) {
+		case ZFS_PROP_VERSION:
+			*value = ZPL_VERSION;
+			break;
+		case ZFS_PROP_NORMALIZE:
+		case ZFS_PROP_UTF8ONLY:
+			*value = 0;
+			break;
+		case ZFS_PROP_CASE:
+			*value = ZFS_CASE_SENSITIVE;
+			break;
+		case ZFS_PROP_ACLTYPE:
+			*value = ZFS_ACLTYPE_OFF;
+			break;
+		default:
+			return (error);
+		}
+		error = 0;
+	}
+
+	/*
+	 * If one of the methods for getting the property value above worked,
+	 * copy it into the objset_t's cache.
+	 */
+	if (error == 0 && cached_copy != NULL) {
+		*cached_copy = *value;
+	}
+
+	return (error);
+}
+
 /*
  * Return true if the corresponding vfs's unmounted flag is set.
  * Otherwise return false.
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c
@@ -2254,91 +2254,6 @@
 	return (error);
 }
 
-/*
- * Read a property stored within the master node.
- */
-int
-zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
-{
-	uint64_t *cached_copy = NULL;
-
-	/*
-	 * Figure out where in the objset_t the cached copy would live, if it
-	 * is available for the requested property.
-	 */
-	if (os != NULL) {
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			cached_copy = &os->os_version;
-			break;
-		case ZFS_PROP_NORMALIZE:
-			cached_copy = &os->os_normalization;
-			break;
-		case ZFS_PROP_UTF8ONLY:
-			cached_copy = &os->os_utf8only;
-			break;
-		case ZFS_PROP_CASE:
-			cached_copy = &os->os_casesensitivity;
-			break;
-		default:
-			break;
-		}
-	}
-	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
-		*value = *cached_copy;
-		return (0);
-	}
-
-	/*
-	 * If the property wasn't cached, look up the file system's value for
-	 * the property. For the version property, we look up a slightly
-	 * different string.
-	 */
-	const char *pname;
-	int error = ENOENT;
-	if (prop == ZFS_PROP_VERSION)
-		pname = ZPL_VERSION_STR;
-	else
-		pname = zfs_prop_to_name(prop);
-
-	if (os != NULL) {
-		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
-		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
-	}
-
-	if (error == ENOENT) {
-		/* No value set, use the default value */
-		switch (prop) {
-		case ZFS_PROP_VERSION:
-			*value = ZPL_VERSION;
-			break;
-		case ZFS_PROP_NORMALIZE:
-		case ZFS_PROP_UTF8ONLY:
-			*value = 0;
-			break;
-		case ZFS_PROP_CASE:
-			*value = ZFS_CASE_SENSITIVE;
-			break;
-		case ZFS_PROP_ACLTYPE:
-			*value = ZFS_ACLTYPE_OFF;
-			break;
-		default:
-			return (error);
-		}
-		error = 0;
-	}
-
-	/*
-	 * If one of the methods for getting the property value above worked,
-	 * copy it into the objset_t's cache.
-	 */
-	if (error == 0 && cached_copy != NULL) {
-		*cached_copy = *value;
-	}
-
-	return (error);
-}
-
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zfs_create_fs);
 EXPORT_SYMBOL(zfs_obj_to_path);
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@@ -54,7 +54,7 @@
 static unsigned long zvol_max_discard_blocks = 16384;
 
 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
-static unsigned int zvol_open_timeout_ms = 1000;
+static const unsigned int zvol_open_timeout_ms = 1000;
 #endif
 
 static unsigned int zvol_threads = 0;
@@ -1612,9 +1612,4 @@
     "Process volblocksize blocks per thread");
 #endif
 
-#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
-module_param(zvol_open_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
-#endif
-
 /* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zcommon/zpool_prop.c b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
--- a/sys/contrib/openzfs/module/zcommon/zpool_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zpool_prop.c
@@ -160,7 +160,7 @@
 	    "wait | continue | panic", "FAILMODE", failuremode_table,
 	    sfeatures);
 	zprop_register_index(ZPOOL_PROP_AUTOTRIM, "autotrim",
-	    SPA_AUTOTRIM_OFF, PROP_DEFAULT, ZFS_TYPE_POOL,
+	    SPA_AUTOTRIM_DEFAULT, PROP_DEFAULT, ZFS_TYPE_POOL,
 	    "on | off", "AUTOTRIM", boolean_table, sfeatures);
 
 	/* hidden properties */
diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c
--- a/sys/contrib/openzfs/module/zfs/arc.c
+++ b/sys/contrib/openzfs/module/zfs/arc.c
@@ -965,7 +965,7 @@
     l2arc_dev_t *dev);
 
 /* L2ARC persistence write I/O routines. */
-static uint64_t l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
+static void l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio,
     l2arc_write_callback_t *cb);
 
 /* L2ARC persistence auxiliary routines. */
@@ -6106,7 +6106,8 @@
 				    asize, abd,
 				    ZIO_CHECKSUM_OFF,
 				    l2arc_read_done, cb, priority,
-				    zio_flags | ZIO_FLAG_CANFAIL |
+				    zio_flags | ZIO_FLAG_DONT_CACHE |
+				    ZIO_FLAG_CANFAIL |
 				    ZIO_FLAG_DONT_PROPAGATE |
 				    ZIO_FLAG_DONT_RETRY, B_FALSE);
 				acb->acb_zio_head = rzio;
@@ -6675,6 +6676,18 @@
 	callback->awcb_children_ready(zio, buf, callback->awcb_private);
 }
 
+/*
+ * The SPA calls this callback for each physical write that happens on behalf
+ * of a logical write.  See the comment in dbuf_write_physdone() for details.
+ */
+static void
+arc_write_physdone(zio_t *zio)
+{
+	arc_write_callback_t *cb = zio->io_private;
+	if (cb->awcb_physdone != NULL)
+		cb->awcb_physdone(zio, cb->awcb_buf, cb->awcb_private);
+}
+
 static void
 arc_write_done(zio_t *zio)
 {
@@ -6764,9 +6777,9 @@
 arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
     blkptr_t *bp, arc_buf_t *buf, boolean_t uncached, boolean_t l2arc,
     const zio_prop_t *zp, arc_write_done_func_t *ready,
-    arc_write_done_func_t *children_ready, arc_write_done_func_t *done,
-    void *private, zio_priority_t priority, int zio_flags,
-    const zbookmark_phys_t *zb)
+    arc_write_done_func_t *children_ready, arc_write_done_func_t *physdone,
+    arc_write_done_func_t *done, void *private, zio_priority_t priority,
+    int zio_flags, const zbookmark_phys_t *zb)
 {
 	arc_buf_hdr_t *hdr = buf->b_hdr;
 	arc_write_callback_t *callback;
@@ -6813,6 +6826,7 @@
 	callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
 	callback->awcb_ready = ready;
 	callback->awcb_children_ready = children_ready;
+	callback->awcb_physdone = physdone;
 	callback->awcb_done = done;
 	callback->awcb_private = private;
 	callback->awcb_buf = buf;
@@ -6849,7 +6863,8 @@
 	    abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)),
 	    HDR_GET_LSIZE(hdr), arc_buf_size(buf), &localprop, arc_write_ready,
 	    (children_ready != NULL) ? arc_write_children_ready : NULL,
-	    arc_write_done, callback, priority, zio_flags, zb);
+	    arc_write_physdone, arc_write_done, callback,
+	    priority, zio_flags, zb);
 
 	return (zio);
 }
@@ -7851,7 +7866,8 @@
 	taskq_destroy(arc_prune_taskq);
 
 	mutex_enter(&arc_prune_mtx);
-	while ((p = list_remove_head(&arc_prune_list)) != NULL) {
+	while ((p = list_head(&arc_prune_list)) != NULL) {
+		list_remove(&arc_prune_list, p);
 		zfs_refcount_remove(&p->p_refcnt, &arc_prune_list);
 		zfs_refcount_destroy(&p->p_refcnt);
 		kmem_free(p, sizeof (*p));
@@ -8159,7 +8175,7 @@
 static uint64_t
 l2arc_write_size(l2arc_dev_t *dev)
 {
-	uint64_t size;
+	uint64_t size, dev_size, tsize;
 
 	/*
 	 * Make sure our globals have meaningful values in case the user
@@ -8176,45 +8192,35 @@
 	if (arc_warm == B_FALSE)
 		size += l2arc_write_boost;
 
+	/*
+	 * Make sure the write size does not exceed the size of the cache
+	 * device. This is important in l2arc_evict(), otherwise infinite
+	 * iteration can occur.
+	 */
+	dev_size = dev->l2ad_end - dev->l2ad_start;
+
 	/* We need to add in the worst case scenario of log block overhead. */
-	size += l2arc_log_blk_overhead(size, dev);
+	tsize = size + l2arc_log_blk_overhead(size, dev);
 	if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
 		/*
 		 * Trim ahead of the write size 64MB or (l2arc_trim_ahead/100)
 		 * times the writesize, whichever is greater.
 		 */
-		size += MAX(64 * 1024 * 1024,
-		    (size * l2arc_trim_ahead) / 100);
+		tsize += MAX(64 * 1024 * 1024,
+		    (tsize * l2arc_trim_ahead) / 100);
 	}
 
-	/*
-	 * Make sure the write size does not exceed the size of the cache
-	 * device. This is important in l2arc_evict(), otherwise infinite
-	 * iteration can occur.
-	 */
-	if (size > dev->l2ad_end - dev->l2ad_start) {
+	if (tsize >= dev_size) {
 		cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
 		    "plus the overhead of log blocks (persistent L2ARC, "
 		    "%llu bytes) exceeds the size of the cache device "
 		    "(guid %llu), resetting them to the default (%d)",
 		    (u_longlong_t)l2arc_log_blk_overhead(size, dev),
 		    (u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
-
 		size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;
 
-		if (l2arc_trim_ahead > 1) {
-			cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
-			l2arc_trim_ahead = 1;
-		}
-
 		if (arc_warm == B_FALSE)
 			size += l2arc_write_boost;
-
-		size += l2arc_log_blk_overhead(size, dev);
-		if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
-			size += MAX(64 * 1024 * 1024,
-			    (size * l2arc_trim_ahead) / 100);
-		}
 	}
 
 	return (size);
@@ -8313,14 +8319,20 @@
 static void
 l2arc_do_free_on_write(void)
 {
-	l2arc_data_free_t *df;
+	list_t *buflist;
+	l2arc_data_free_t *df, *df_prev;
 
 	mutex_enter(&l2arc_free_on_write_mtx);
-	while ((df = list_remove_head(l2arc_free_on_write)) != NULL) {
+	buflist = l2arc_free_on_write;
+
+	for (df = list_tail(buflist); df; df = df_prev) {
+		df_prev = list_prev(buflist, df);
 		ASSERT3P(df->l2df_abd, !=, NULL);
 		abd_free(df->l2df_abd);
+		list_remove(buflist, df);
 		kmem_free(df, sizeof (l2arc_data_free_t));
 	}
+
 	mutex_exit(&l2arc_free_on_write_mtx);
 }
 
@@ -8833,7 +8845,7 @@
 
 top:
 	rerun = B_FALSE;
-	if (dev->l2ad_hand + distance > dev->l2ad_end) {
+	if (dev->l2ad_hand >= (dev->l2ad_end - distance)) {
 		/*
 		 * When there is no space to accommodate upcoming writes,
 		 * evict to the end. Then bump the write and evict hands
@@ -9027,7 +9039,7 @@
 		 */
 		ASSERT3U(dev->l2ad_hand + distance, <, dev->l2ad_end);
 		if (!dev->l2ad_first)
-			ASSERT3U(dev->l2ad_hand, <=, dev->l2ad_evict);
+			ASSERT3U(dev->l2ad_hand, <, dev->l2ad_evict);
 	}
 }
 
@@ -9287,13 +9299,7 @@
 			uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
 			    psize);
 
-			/*
-			 * If the allocated size of this buffer plus the max
-			 * size for the pending log block exceeds the evicted
-			 * target size, terminate writing buffers for this run.
-			 */
-			if (write_asize + asize +
-			    sizeof (l2arc_log_blk_phys_t) > target_sz) {
+			if ((write_asize + asize) > target_sz) {
 				full = B_TRUE;
 				mutex_exit(hash_lock);
 				break;
@@ -9407,14 +9413,8 @@
 			 * arcstat_l2_{size,asize} kstats are updated
 			 * internally.
 			 */
-			if (l2arc_log_blk_insert(dev, hdr)) {
-				/*
-				 * l2ad_hand will be adjusted in
-				 * l2arc_log_blk_commit().
-				 */
-				write_asize +=
-				    l2arc_log_blk_commit(dev, pio, cb);
-			}
+			if (l2arc_log_blk_insert(dev, hdr))
+				l2arc_log_blk_commit(dev, pio, cb);
 
 			zio_nowait(wzio);
 		}
@@ -10173,7 +10173,8 @@
 	err = zio_wait(zio_read_phys(NULL, dev->l2ad_vdev,
 	    VDEV_LABEL_START_SIZE, l2dhdr_asize, abd,
 	    ZIO_CHECKSUM_LABEL, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
+	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
+	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
 	    ZIO_FLAG_SPECULATIVE, B_FALSE));
 
 	abd_free(abd);
@@ -10493,10 +10494,11 @@
 	cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP);
 	cb->l2rcb_abd = abd_get_from_buf(lb, asize);
 	pio = zio_root(vd->vdev_spa, l2arc_blk_fetch_done, cb,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
+	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
+	    ZIO_FLAG_DONT_RETRY);
 	(void) zio_nowait(zio_read_phys(pio, vd, lbp->lbp_daddr, asize,
 	    cb->l2rcb_abd, ZIO_CHECKSUM_OFF, NULL, NULL,
-	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL |
+	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL |
 	    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE));
 
 	return (pio);
@@ -10562,7 +10564,7 @@
  * This function allocates some memory to temporarily hold the serialized
  * buffer to be written. This is then released in l2arc_write_done.
  */
-static uint64_t
+static void
 l2arc_log_blk_commit(l2arc_dev_t *dev, zio_t *pio, l2arc_write_callback_t *cb)
 {
 	l2arc_log_blk_phys_t	*lb = &dev->l2ad_log_blk;
@@ -10673,8 +10675,6 @@
 	dev->l2ad_log_ent_idx = 0;
 	dev->l2ad_log_blk_payload_asize = 0;
 	dev->l2ad_log_blk_payload_start = 0;
-
-	return (asize);
 }
 
 /*
diff --git a/sys/contrib/openzfs/module/zfs/bplist.c b/sys/contrib/openzfs/module/zfs/bplist.c
--- a/sys/contrib/openzfs/module/zfs/bplist.c
+++ b/sys/contrib/openzfs/module/zfs/bplist.c
@@ -65,8 +65,9 @@
 	bplist_entry_t *bpe;
 
 	mutex_enter(&bpl->bpl_lock);
-	while ((bpe = list_remove_head(&bpl->bpl_list))) {
+	while ((bpe = list_head(&bpl->bpl_list))) {
 		bplist_iterate_last_removed = bpe;
+		list_remove(&bpl->bpl_list, bpe);
 		mutex_exit(&bpl->bpl_lock);
 		func(arg, &bpe->bpe_blk, tx);
 		kmem_free(bpe, sizeof (*bpe));
@@ -81,7 +82,10 @@
 	bplist_entry_t *bpe;
 
 	mutex_enter(&bpl->bpl_lock);
-	while ((bpe = list_remove_head(&bpl->bpl_list)))
+	while ((bpe = list_head(&bpl->bpl_list))) {
+		bplist_iterate_last_removed = bpe;
+		list_remove(&bpl->bpl_list, bpe);
 		kmem_free(bpe, sizeof (*bpe));
+	}
 	mutex_exit(&bpl->bpl_lock);
 }
diff --git a/sys/contrib/openzfs/module/zfs/btree.c b/sys/contrib/openzfs/module/zfs/btree.c
--- a/sys/contrib/openzfs/module/zfs/btree.c
+++ b/sys/contrib/openzfs/module/zfs/btree.c
@@ -193,20 +193,14 @@
 
 void
 zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
-    bt_find_in_buf_f bt_find_in_buf, size_t size)
+    size_t size)
 {
-	zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
-	    BTREE_LEAF_SIZE);
+	zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
 }
 
-static void *
-zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
-    const void *value, zfs_btree_index_t *where);
-
 void
 zfs_btree_create_custom(zfs_btree_t *tree,
     int (*compar) (const void *, const void *),
-    bt_find_in_buf_f bt_find_in_buf,
     size_t size, size_t lsize)
 {
 	size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
@@ -214,8 +208,6 @@
 	ASSERT3U(size, <=, esize / 2);
 	memset(tree, 0, sizeof (*tree));
 	tree->bt_compar = compar;
-	tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
-	    zfs_btree_find_in_buf : bt_find_in_buf;
 	tree->bt_elem_size = size;
 	tree->bt_leaf_size = lsize;
 	tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
@@ -311,7 +303,7 @@
 			 * element in the last leaf, it's in the last leaf or
 			 * it's not in the tree.
 			 */
-			void *d = tree->bt_find_in_buf(tree,
+			void *d = zfs_btree_find_in_buf(tree,
 			    last_leaf->btl_elems +
 			    last_leaf->btl_hdr.bth_first * size,
 			    last_leaf->btl_hdr.bth_count, value, &idx);
@@ -335,7 +327,7 @@
 	for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
 	    node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
 		ASSERT3P(node, !=, NULL);
-		void *d = tree->bt_find_in_buf(tree, node->btc_elems,
+		void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
 		    node->btc_hdr.bth_count, value, &idx);
 		EQUIV(d != NULL, !idx.bti_before);
 		if (d != NULL) {
@@ -355,7 +347,7 @@
 	 */
 	zfs_btree_leaf_t *leaf = (depth == 0 ?
 	    (zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
-	void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
+	void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
 	    leaf->btl_hdr.bth_first * size,
 	    leaf->btl_hdr.bth_count, value, &idx);
 
@@ -679,7 +671,7 @@
 	zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
 	zfs_btree_index_t idx;
 	ASSERT(zfs_btree_is_core(par_hdr));
-	VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
+	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
 	    par_hdr->bth_count, buf, &idx), ==, NULL);
 	ASSERT(idx.bti_before);
 	uint32_t offset = idx.bti_offset;
@@ -905,7 +897,7 @@
 	}
 	zfs_btree_index_t idx;
 	zfs_btree_core_t *parent = hdr->bth_parent;
-	VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
+	VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
 	    parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
 	ASSERT(idx.bti_before);
 	ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
diff --git a/sys/contrib/openzfs/module/zfs/dataset_kstats.c b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
--- a/sys/contrib/openzfs/module/zfs/dataset_kstats.c
+++ b/sys/contrib/openzfs/module/zfs/dataset_kstats.c
@@ -49,12 +49,8 @@
 	{ "zil_itx_needcopy_bytes",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_bytes",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_normal_write",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_normal_alloc",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_count",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_write",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_alloc",	KSTAT_DATA_UINT64 }
+	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 }
 	}
 };
 
diff --git a/sys/contrib/openzfs/module/zfs/dbuf.c b/sys/contrib/openzfs/module/zfs/dbuf.c
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@@ -4369,6 +4369,22 @@
 	rw_exit(&parent_db->db_rwlock);
 }
 
+static void
+dbuf_lightweight_physdone(zio_t *zio)
+{
+	dbuf_dirty_record_t *dr = zio->io_private;
+	dsl_pool_t *dp = spa_get_dsl(zio->io_spa);
+	ASSERT3U(dr->dr_txg, ==, zio->io_txg);
+
+	/*
+	 * The callback will be called io_phys_children times.  Retire one
+	 * portion of our dirty space each time we are called.  Any rounding
+	 * error will be cleaned up by dbuf_lightweight_done().
+	 */
+	int delta = dr->dr_accounted / zio->io_phys_children;
+	dsl_pool_undirty_space(dp, delta, zio->io_txg);
+}
+
 static void
 dbuf_lightweight_done(zio_t *zio)
 {
@@ -4387,8 +4403,16 @@
 		dsl_dataset_block_born(ds, zio->io_bp, tx);
 	}
 
-	dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
-	    zio->io_txg);
+	/*
+	 * See comment in dbuf_write_done().
+	 */
+	if (zio->io_phys_children == 0) {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted, zio->io_txg);
+	} else {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted % zio->io_phys_children, zio->io_txg);
+	}
 
 	abd_free(dr->dt.dll.dr_abd);
 	kmem_free(dr, sizeof (*dr));
@@ -4422,7 +4446,8 @@
 	    dmu_tx_get_txg(tx), &dr->dr_bp_copy, dr->dt.dll.dr_abd,
 	    dn->dn_datablksz, abd_get_size(dr->dt.dll.dr_abd),
 	    &dr->dt.dll.dr_props, dbuf_lightweight_ready, NULL,
-	    dbuf_lightweight_done, dr, ZIO_PRIORITY_ASYNC_WRITE,
+	    dbuf_lightweight_physdone, dbuf_lightweight_done, dr,
+	    ZIO_PRIORITY_ASYNC_WRITE,
 	    ZIO_FLAG_MUSTSUCCEED | dr->dt.dll.dr_flags, &zb);
 
 	zio_nowait(dr->dr_zio);
@@ -4764,6 +4789,37 @@
 	DB_DNODE_EXIT(db);
 }
 
+/*
+ * The SPA will call this callback several times for each zio - once
+ * for every physical child i/o (zio->io_phys_children times).  This
+ * allows the DMU to monitor the progress of each logical i/o.  For example,
+ * there may be 2 copies of an indirect block, or many fragments of a RAID-Z
+ * block.  There may be a long delay before all copies/fragments are completed,
+ * so this callback allows us to retire dirty space gradually, as the physical
+ * i/os complete.
+ */
+static void
+dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg)
+{
+	(void) buf;
+	dmu_buf_impl_t *db = arg;
+	objset_t *os = db->db_objset;
+	dsl_pool_t *dp = dmu_objset_pool(os);
+	dbuf_dirty_record_t *dr;
+	int delta = 0;
+
+	dr = db->db_data_pending;
+	ASSERT3U(dr->dr_txg, ==, zio->io_txg);
+
+	/*
+	 * The callback will be called io_phys_children times.  Retire one
+	 * portion of our dirty space each time we are called.  Any rounding
+	 * error will be cleaned up by dbuf_write_done().
+	 */
+	delta = dr->dr_accounted / zio->io_phys_children;
+	dsl_pool_undirty_space(dp, delta, zio->io_txg);
+}
+
 static void
 dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
 {
@@ -4838,8 +4894,27 @@
 	db->db_data_pending = NULL;
 	dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
 
-	dsl_pool_undirty_space(dmu_objset_pool(os), dr->dr_accounted,
-	    zio->io_txg);
+	/*
+	 * If we didn't do a physical write in this ZIO and we
+	 * still ended up here, it means that the space of the
+	 * dbuf that we just released (and undirtied) above hasn't
+	 * been marked as undirtied in the pool's accounting.
+	 *
+	 * Thus, we undirty that space in the pool's view of the
+	 * world here. For physical writes this type of update
+	 * happens in dbuf_write_physdone().
+	 *
+	 * If we did a physical write, cleanup any rounding errors
+	 * that came up due to writing multiple copies of a block
+	 * on disk [see dbuf_write_physdone()].
+	 */
+	if (zio->io_phys_children == 0) {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted, zio->io_txg);
+	} else {
+		dsl_pool_undirty_space(dmu_objset_pool(os),
+		    dr->dr_accounted % zio->io_phys_children, zio->io_txg);
+	}
 
 	kmem_free(dr, sizeof (dbuf_dirty_record_t));
 }
@@ -5087,7 +5162,7 @@
 
 		dr->dr_zio = zio_write(pio, os->os_spa, txg, &dr->dr_bp_copy,
 		    contents, db->db.db_size, db->db.db_size, &zp,
-		    dbuf_write_override_ready, NULL,
+		    dbuf_write_override_ready, NULL, NULL,
 		    dbuf_write_override_done,
 		    dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 		mutex_enter(&db->db_mtx);
@@ -5101,7 +5176,7 @@
 		    zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
 		dr->dr_zio = zio_write(pio, os->os_spa, txg,
 		    &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
-		    dbuf_write_nofill_ready, NULL,
+		    dbuf_write_nofill_ready, NULL, NULL,
 		    dbuf_write_nofill_done, db,
 		    ZIO_PRIORITY_ASYNC_WRITE,
 		    ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
@@ -5120,8 +5195,9 @@
 		dr->dr_zio = arc_write(pio, os->os_spa, txg,
 		    &dr->dr_bp_copy, data, !DBUF_IS_CACHEABLE(db),
 		    dbuf_is_l2cacheable(db), &zp, dbuf_write_ready,
-		    children_ready_cb, dbuf_write_done, db,
-		    ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
+		    children_ready_cb, dbuf_write_physdone,
+		    dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
+		    ZIO_FLAG_MUSTSUCCEED, &zb);
 	}
 }
 
diff --git a/sys/contrib/openzfs/module/zfs/ddt.c b/sys/contrib/openzfs/module/zfs/ddt.c
--- a/sys/contrib/openzfs/module/zfs/ddt.c
+++ b/sys/contrib/openzfs/module/zfs/ddt.c
@@ -1209,19 +1209,10 @@
 		ASSERT3S(dde->dde_class, <, DDT_CLASSES);
 
 		ddp = &dde->dde_phys[BP_GET_NDVAS(bp)];
-
-		/*
-		 * This entry already existed (dde_type is real), so it must
-		 * have refcnt >0 at the start of this txg. We are called from
-		 * brt_pending_apply(), before frees are issued, so the refcnt
-		 * can't be lowered yet. Therefore, it must be >0. We assert
-		 * this because if the order of BRT and DDT interactions were
-		 * ever to change and the refcnt was ever zero here, then
-		 * likely further action is required to fill out the DDT entry,
-		 * and this is a place that is likely to be missed in testing.
-		 */
-		ASSERT3U(ddp->ddp_refcnt, >, 0);
-
+		if (ddp->ddp_refcnt == 0) {
+			/* This should never happen? */
+			ddt_phys_fill(ddp, bp);
+		}
 		ddt_phys_addref(ddp);
 		result = B_TRUE;
 	} else {
diff --git a/sys/contrib/openzfs/module/zfs/ddt_zap.c b/sys/contrib/openzfs/module/zfs/ddt_zap.c
--- a/sys/contrib/openzfs/module/zfs/ddt_zap.c
+++ b/sys/contrib/openzfs/module/zfs/ddt_zap.c
@@ -31,8 +31,8 @@
 #include <sys/zap.h>
 #include <sys/dmu_tx.h>
 
-static unsigned int ddt_zap_default_bs = 15;
-static unsigned int ddt_zap_default_ibs = 15;
+static const int ddt_zap_leaf_blockshift = 12;
+static const int ddt_zap_indirect_blockshift = 12;
 
 static int
 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
@@ -43,7 +43,7 @@
 		flags |= ZAP_FLAG_PRE_HASHED_KEY;
 
 	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
-	    ddt_zap_default_bs, ddt_zap_default_ibs,
+	    ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
 	    DMU_OT_NONE, 0, tx);
 
 	return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
@@ -166,10 +166,3 @@
 	ddt_zap_walk,
 	ddt_zap_count,
 };
-
-/* BEGIN CSTYLED */
-ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
-	"DDT ZAP leaf blockshift");
-ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
-	"DDT ZAP indirect blockshift");
-/* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@@ -1698,7 +1698,7 @@
 	zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
 	    abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
 	    zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
-	    dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done,
+	    dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
 	    dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
 
 	return (0);
@@ -1864,7 +1864,7 @@
 
 	zio_nowait(arc_write(pio, os->os_spa, txg, zgd->zgd_bp,
 	    dr->dt.dl.dr_data, !DBUF_IS_CACHEABLE(db), dbuf_is_l2cacheable(db),
-	    &zp, dmu_sync_ready, NULL, dmu_sync_done, dsa,
+	    &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
 	    ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
 
 	return (0);
diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c
--- a/sys/contrib/openzfs/module/zfs/dmu_objset.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c
@@ -1698,7 +1698,7 @@
 
 	zio = arc_write(pio, os->os_spa, tx->tx_txg,
 	    blkptr_copy, os->os_phys_buf, B_FALSE, dmu_os_is_l2cacheable(os),
-	    &zp, dmu_objset_write_ready, NULL, dmu_objset_write_done,
+	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
 
 	/*
@@ -1755,8 +1755,9 @@
 	taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
 
 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
-	while ((dr = list_remove_head(list)) != NULL) {
+	while ((dr = list_head(list)) != NULL) {
 		ASSERT0(dr->dr_dbuf->db_level);
+		list_remove(list, dr);
 		zio_nowait(dr->dr_zio);
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dmu_recv.c b/sys/contrib/openzfs/module/zfs/dmu_recv.c
--- a/sys/contrib/openzfs/module/zfs/dmu_recv.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_recv.c
@@ -1371,8 +1371,8 @@
 	dnode_t *dn;
 	abd_t *abd = rrd->abd;
 	zio_cksum_t bp_cksum = bp->blk_cksum;
-	zio_flag_t flags = ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_RETRY |
-	    ZIO_FLAG_CANFAIL;
+	zio_flag_t flags = ZIO_FLAG_SPECULATIVE |
+	    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_CANFAIL;
 
 	if (rwa->raw)
 		flags |= ZIO_FLAG_RAW;
diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c
--- a/sys/contrib/openzfs/module/zfs/dmu_send.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_send.c
@@ -1955,7 +1955,7 @@
 {
 	dsl_dataset_t *to_ds = dspp->to_ds;
 	dsl_pool_t *dp = dspp->dp;
-
+#ifdef _KERNEL
 	if (dmu_objset_type(os) == DMU_OST_ZFS) {
 		uint64_t version;
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0)
@@ -1964,6 +1964,7 @@
 		if (version >= ZPL_VERSION_SA)
 			*featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
 	}
+#endif
 
 	/* raw sends imply large_block_ok */
 	if ((dspp->rawok || dspp->large_block_ok) &&
@@ -2792,7 +2793,6 @@
 			}
 
 			if (err == 0) {
-				owned = B_TRUE;
 				err = zap_lookup(dspp.dp->dp_meta_objset,
 				    dspp.to_ds->ds_object,
 				    DS_FIELD_RESUME_TOGUID, 8, 1,
@@ -2806,24 +2806,21 @@
 				    sizeof (dspp.saved_toname),
 				    dspp.saved_toname);
 			}
-			/* Only disown if there was an error in the lookups */
-			if (owned && (err != 0))
+			if (err != 0)
 				dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
 
 			kmem_strfree(name);
 		} else {
 			err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
 			    FTAG, &dspp.to_ds);
-			if (err == 0)
-				owned = B_TRUE;
 		}
+		owned = B_TRUE;
 	} else {
 		err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
 		    &dspp.to_ds);
 	}
 
 	if (err != 0) {
-		/* Note: dsl dataset is not owned at this point */
 		dsl_pool_rele(dspp.dp, FTAG);
 		return (err);
 	}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_tx.c b/sys/contrib/openzfs/module/zfs/dmu_tx.c
--- a/sys/contrib/openzfs/module/zfs/dmu_tx.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_tx.c
@@ -1396,7 +1396,8 @@
 {
 	dmu_tx_callback_t *dcb;
 
-	while ((dcb = list_remove_tail(cb_list)) != NULL) {
+	while ((dcb = list_tail(cb_list)) != NULL) {
+		list_remove(cb_list, dcb);
 		dcb->dcb_func(dcb->dcb_data, error);
 		kmem_free(dcb, sizeof (dmu_tx_callback_t));
 	}
diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@@ -520,7 +520,8 @@
 	issued = pf_end - pf_start + ipf_end - ipf_start;
 	if (issued > 1) {
 		/* More references on top of taken in dmu_zfetch_prepare(). */
-		zfs_refcount_add_few(&zs->zs_refs, issued - 1, NULL);
+		for (int i = 0; i < issued - 1; i++)
+			zfs_refcount_add(&zs->zs_refs, NULL);
 	} else if (issued == 0) {
 		/* Some other thread has done our work, so drop the ref. */
 		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
--- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c
@@ -3782,7 +3782,8 @@
 	if (l == NULL || !list_link_active(&l->list_head))
 		return;
 
-	while ((snap = list_remove_tail(l)) != NULL) {
+	while ((snap = list_tail(l)) != NULL) {
+		list_remove(l, snap);
 		dsl_dataset_rele(snap->ds, tag);
 		kmem_free(snap, sizeof (*snap));
 	}
diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c
--- a/sys/contrib/openzfs/module/zfs/dsl_dir.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c
@@ -1490,7 +1490,7 @@
 	if (tr_cookie == NULL)
 		return;
 
-	while ((tr = list_remove_head(tr_list)) != NULL) {
+	while ((tr = list_head(tr_list)) != NULL) {
 		if (tr->tr_ds) {
 			mutex_enter(&tr->tr_ds->dd_lock);
 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
@@ -1500,6 +1500,7 @@
 		} else {
 			arc_tempreserve_clear(tr->tr_size);
 		}
+		list_remove(tr_list, tr);
 		kmem_free(tr, sizeof (struct tempreserve));
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c
--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@@ -234,7 +234,7 @@
 static int zfs_free_bpobj_enabled = 1;
 
 /* Error blocks to be scrubbed in one txg. */
-static uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
+uint_t zfs_scrub_error_blocks_per_txg = 1 << 12;
 
 /* the order has to match pool_scan_type */
 static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
@@ -573,8 +573,7 @@
 		 * counter to how far we've scanned. We know we're consistent
 		 * up to here.
 		 */
-		scn->scn_issued_before_pass = scn->scn_phys.scn_examined -
-		    scn->scn_phys.scn_skipped;
+		scn->scn_issued_before_pass = scn->scn_phys.scn_examined;
 
 		if (dsl_scan_is_running(scn) &&
 		    spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
@@ -3438,8 +3437,10 @@
 	 * If we were suspended in the middle of processing,
 	 * requeue any unfinished sios and exit.
 	 */
-	while ((sio = list_remove_head(&sio_list)) != NULL)
+	while ((sio = list_head(&sio_list)) != NULL) {
+		list_remove(&sio_list, sio);
 		scan_io_queue_insert_impl(queue, sio);
+	}
 
 	queue->q_zio = NULL;
 	mutex_exit(q_lock);
@@ -4363,7 +4364,7 @@
 	 * Disabled by default, set zfs_scan_report_txgs to report
 	 * average performance over the last zfs_scan_report_txgs TXGs.
 	 */
-	if (zfs_scan_report_txgs != 0 &&
+	if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
 	    tx->tx_txg % zfs_scan_report_txgs == 0) {
 		scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
 		spa_scan_stat_init(spa);
@@ -4565,15 +4566,6 @@
 	    all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
 }
 
-static void
-count_block_skipped(dsl_scan_t *scn, const blkptr_t *bp, boolean_t all)
-{
-	if (BP_IS_EMBEDDED(bp))
-		return;
-	atomic_add_64(&scn->scn_phys.scn_skipped,
-	    all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
-}
-
 static void
 count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
 {
@@ -4719,7 +4711,7 @@
 	count_block(dp->dp_blkstats, bp);
 	if (phys_birth <= scn->scn_phys.scn_min_txg ||
 	    phys_birth >= scn->scn_phys.scn_max_txg) {
-		count_block_skipped(scn, bp, B_TRUE);
+		count_block_issued(spa, bp, B_TRUE);
 		return (0);
 	}
 
@@ -4760,7 +4752,7 @@
 	if (needs_io && !zfs_no_scrub_io) {
 		dsl_scan_enqueue(dp, bp, zio_flags, zb);
 	} else {
-		count_block_skipped(scn, bp, B_TRUE);
+		count_block_issued(spa, bp, B_TRUE);
 	}
 
 	/* do not relocate this block */
@@ -4885,7 +4877,6 @@
  * with single operation.  Plus it makes scrubs more sequential and reduces
  * chances that minor extent change move it within the B-tree.
  */
-__attribute__((always_inline)) inline
 static int
 ext_size_compare(const void *x, const void *y)
 {
@@ -4894,17 +4885,13 @@
 	return (TREE_CMP(*a, *b));
 }
 
-ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
-    ext_size_compare)
-
 static void
 ext_size_create(range_tree_t *rt, void *arg)
 {
 	(void) rt;
 	zfs_btree_t *size_tree = arg;
 
-	zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
-	    sizeof (uint64_t));
+	zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
 }
 
 static void
@@ -5129,9 +5116,9 @@
 		ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
 		range_tree_remove_fill(queue->q_exts_by_addr, start, size);
 
-		/* count the block as though we skipped it */
+		/* count the block as though we issued it */
 		sio2bp(sio, &tmpbp);
-		count_block_skipped(scn, &tmpbp, B_FALSE);
+		count_block_issued(spa, &tmpbp, B_FALSE);
 
 		sio_free(sio);
 	}
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -148,7 +148,8 @@
 	list_remove(&zevent_list, ev);
 
 	/* Remove references to this event in all private file data */
-	while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {
+	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
+		list_remove(&ev->ev_ze_list, ze);
 		ze->ze_zevent = NULL;
 		ze->ze_dropped++;
 	}
diff --git a/sys/contrib/openzfs/module/zfs/metaslab.c b/sys/contrib/openzfs/module/zfs/metaslab.c
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@@ -1342,7 +1342,6 @@
  * Comparison function for the private size-ordered tree using 32-bit
  * ranges. Tree is sorted by size, larger sizes at the end of the tree.
  */
-__attribute__((always_inline)) inline
 static int
 metaslab_rangesize32_compare(const void *x1, const void *x2)
 {
@@ -1353,15 +1352,16 @@
 	uint64_t rs_size2 = r2->rs_end - r2->rs_start;
 
 	int cmp = TREE_CMP(rs_size1, rs_size2);
+	if (likely(cmp))
+		return (cmp);
 
-	return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
+	return (TREE_CMP(r1->rs_start, r2->rs_start));
 }
 
 /*
  * Comparison function for the private size-ordered tree using 64-bit
  * ranges. Tree is sorted by size, larger sizes at the end of the tree.
  */
-__attribute__((always_inline)) inline
 static int
 metaslab_rangesize64_compare(const void *x1, const void *x2)
 {
@@ -1372,10 +1372,11 @@
 	uint64_t rs_size2 = r2->rs_end - r2->rs_start;
 
 	int cmp = TREE_CMP(rs_size1, rs_size2);
+	if (likely(cmp))
+		return (cmp);
 
-	return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
+	return (TREE_CMP(r1->rs_start, r2->rs_start));
 }
-
 typedef struct metaslab_rt_arg {
 	zfs_btree_t *mra_bt;
 	uint32_t mra_floor_shift;
@@ -1411,13 +1412,6 @@
 	range_tree_walk(rt, metaslab_size_sorted_add, &arg);
 }
 
-
-ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
-    range_seg32_t, metaslab_rangesize32_compare)
-
-ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
-    range_seg64_t, metaslab_rangesize64_compare)
-
 /*
  * Create any block allocator specific components. The current allocators
  * rely on using both a size-ordered range_tree_t and an array of uint64_t's.
@@ -1430,22 +1424,19 @@
 
 	size_t size;
 	int (*compare) (const void *, const void *);
-	bt_find_in_buf_f bt_find;
 	switch (rt->rt_type) {
 	case RANGE_SEG32:
 		size = sizeof (range_seg32_t);
 		compare = metaslab_rangesize32_compare;
-		bt_find = metaslab_rt_find_rangesize32_in_buf;
 		break;
 	case RANGE_SEG64:
 		size = sizeof (range_seg64_t);
 		compare = metaslab_rangesize64_compare;
-		bt_find = metaslab_rt_find_rangesize64_in_buf;
 		break;
 	default:
 		panic("Invalid range seg type %d", rt->rt_type);
 	}
-	zfs_btree_create(size_tree, compare, bt_find, size);
+	zfs_btree_create(size_tree, compare, size);
 	mrap->mra_floor_shift = metaslab_by_size_min_shift;
 }
 
@@ -5650,7 +5641,8 @@
 		 * We reserve the slots individually so that we can unreserve
 		 * them individually when an I/O completes.
 		 */
-		zfs_refcount_add_few(&mca->mca_alloc_slots, slots, zio);
+		for (int d = 0; d < slots; d++)
+			zfs_refcount_add(&mca->mca_alloc_slots, zio);
 		zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
 		return (B_TRUE);
 	}
@@ -5664,7 +5656,8 @@
 	metaslab_class_allocator_t *mca = &mc->mc_allocator[allocator];
 
 	ASSERT(mc->mc_alloc_throttle_enabled);
-	zfs_refcount_remove_few(&mca->mca_alloc_slots, slots, zio);
+	for (int d = 0; d < slots; d++)
+		zfs_refcount_remove(&mca->mca_alloc_slots, zio);
 }
 
 static int
diff --git a/sys/contrib/openzfs/module/zfs/range_tree.c b/sys/contrib/openzfs/module/zfs/range_tree.c
--- a/sys/contrib/openzfs/module/zfs/range_tree.c
+++ b/sys/contrib/openzfs/module/zfs/range_tree.c
@@ -151,7 +151,6 @@
 	rt->rt_histogram[idx]--;
 }
 
-__attribute__((always_inline)) inline
 static int
 range_tree_seg32_compare(const void *x1, const void *x2)
 {
@@ -164,7 +163,6 @@
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }
 
-__attribute__((always_inline)) inline
 static int
 range_tree_seg64_compare(const void *x1, const void *x2)
 {
@@ -177,7 +175,6 @@
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }
 
-__attribute__((always_inline)) inline
 static int
 range_tree_seg_gap_compare(const void *x1, const void *x2)
 {
@@ -190,15 +187,6 @@
 	return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
 }
 
-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t,
-    range_tree_seg32_compare)
-
-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
-    range_tree_seg64_compare)
-
-ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t,
-    range_tree_seg_gap_compare)
-
 range_tree_t *
 range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
     void *arg, uint64_t start, uint64_t shift, uint64_t gap)
@@ -209,27 +197,23 @@
 	ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
 	size_t size;
 	int (*compare) (const void *, const void *);
-	bt_find_in_buf_f bt_find;
 	switch (type) {
 	case RANGE_SEG32:
 		size = sizeof (range_seg32_t);
 		compare = range_tree_seg32_compare;
-		bt_find = range_tree_seg32_find_in_buf;
 		break;
 	case RANGE_SEG64:
 		size = sizeof (range_seg64_t);
 		compare = range_tree_seg64_compare;
-		bt_find = range_tree_seg64_find_in_buf;
 		break;
 	case RANGE_SEG_GAP:
 		size = sizeof (range_seg_gap_t);
 		compare = range_tree_seg_gap_compare;
-		bt_find = range_tree_seg_gap_find_in_buf;
 		break;
 	default:
 		panic("Invalid range seg type %d", type);
 	}
-	zfs_btree_create(&rt->rt_root, compare, bt_find, size);
+	zfs_btree_create(&rt->rt_root, compare, size);
 
 	rt->rt_ops = ops;
 	rt->rt_gap = gap;
diff --git a/sys/contrib/openzfs/module/zfs/refcount.c b/sys/contrib/openzfs/module/zfs/refcount.c
--- a/sys/contrib/openzfs/module/zfs/refcount.c
+++ b/sys/contrib/openzfs/module/zfs/refcount.c
@@ -36,40 +36,33 @@
 static uint_t reference_history = 3; /* tunable */
 
 static kmem_cache_t *reference_cache;
+static kmem_cache_t *reference_history_cache;
 
 void
 zfs_refcount_init(void)
 {
 	reference_cache = kmem_cache_create("reference_cache",
 	    sizeof (reference_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+	reference_history_cache = kmem_cache_create("reference_history_cache",
+	    sizeof (uint64_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 }
 
 void
 zfs_refcount_fini(void)
 {
 	kmem_cache_destroy(reference_cache);
-}
-
-static int
-zfs_refcount_compare(const void *x1, const void *x2)
-{
-	const reference_t *r1 = (const reference_t *)x1;
-	const reference_t *r2 = (const reference_t *)x2;
-
-	int cmp1 = TREE_CMP(r1->ref_holder, r2->ref_holder);
-	int cmp2 = TREE_CMP(r1->ref_number, r2->ref_number);
-	int cmp = cmp1 ? cmp1 : cmp2;
-	return ((cmp || r1->ref_search) ? cmp : TREE_PCMP(r1, r2));
+	kmem_cache_destroy(reference_history_cache);
 }
 
 void
 zfs_refcount_create(zfs_refcount_t *rc)
 {
 	mutex_init(&rc->rc_mtx, NULL, MUTEX_DEFAULT, NULL);
-	avl_create(&rc->rc_tree, zfs_refcount_compare, sizeof (reference_t),
-	    offsetof(reference_t, ref_link.a));
+	list_create(&rc->rc_list, sizeof (reference_t),
+	    offsetof(reference_t, ref_link));
 	list_create(&rc->rc_removed, sizeof (reference_t),
-	    offsetof(reference_t, ref_link.l));
+	    offsetof(reference_t, ref_link));
 	rc->rc_count = 0;
 	rc->rc_removed_count = 0;
 	rc->rc_tracked = reference_tracking_enable;
@@ -93,15 +86,19 @@
 zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number)
 {
 	reference_t *ref;
-	void *cookie = NULL;
 
 	ASSERT3U(rc->rc_count, ==, number);
-	while ((ref = avl_destroy_nodes(&rc->rc_tree, &cookie)) != NULL)
+	while ((ref = list_head(&rc->rc_list))) {
+		list_remove(&rc->rc_list, ref);
 		kmem_cache_free(reference_cache, ref);
-	avl_destroy(&rc->rc_tree);
+	}
+	list_destroy(&rc->rc_list);
 
-	while ((ref = list_remove_head(&rc->rc_removed)))
+	while ((ref = list_head(&rc->rc_removed))) {
+		list_remove(&rc->rc_removed, ref);
+		kmem_cache_free(reference_history_cache, ref->ref_removed);
 		kmem_cache_free(reference_cache, ref);
+	}
 	list_destroy(&rc->rc_removed);
 	mutex_destroy(&rc->rc_mtx);
 }
@@ -127,10 +124,10 @@
 int64_t
 zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder)
 {
-	reference_t *ref;
+	reference_t *ref = NULL;
 	int64_t count;
 
-	if (likely(!rc->rc_tracked)) {
+	if (!rc->rc_tracked) {
 		count = atomic_add_64_nv(&(rc)->rc_count, number);
 		ASSERT3U(count, >=, number);
 		return (count);
@@ -139,9 +136,8 @@
 	ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
 	ref->ref_holder = holder;
 	ref->ref_number = number;
-	ref->ref_search = B_FALSE;
 	mutex_enter(&rc->rc_mtx);
-	avl_add(&rc->rc_tree, ref);
+	list_insert_head(&rc->rc_list, ref);
 	rc->rc_count += number;
 	count = rc->rc_count;
 	mutex_exit(&rc->rc_mtx);
@@ -155,55 +151,51 @@
 	return (zfs_refcount_add_many(rc, 1, holder));
 }
 
-void
-zfs_refcount_add_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
-{
-	if (likely(!rc->rc_tracked))
-		(void) zfs_refcount_add_many(rc, number, holder);
-	else for (; number > 0; number--)
-		(void) zfs_refcount_add(rc, holder);
-}
-
 int64_t
 zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
     const void *holder)
 {
-	reference_t *ref, s;
+	reference_t *ref;
 	int64_t count;
 
-	if (likely(!rc->rc_tracked)) {
+	if (!rc->rc_tracked) {
 		count = atomic_add_64_nv(&(rc)->rc_count, -number);
 		ASSERT3S(count, >=, 0);
 		return (count);
 	}
 
-	s.ref_holder = holder;
-	s.ref_number = number;
-	s.ref_search = B_TRUE;
 	mutex_enter(&rc->rc_mtx);
 	ASSERT3U(rc->rc_count, >=, number);
-	ref = avl_find(&rc->rc_tree, &s, NULL);
-	if (unlikely(ref == NULL)) {
-		panic("No such hold %p on refcount %llx", holder,
-		    (u_longlong_t)(uintptr_t)rc);
-		return (-1);
-	}
-	avl_remove(&rc->rc_tree, ref);
-	if (reference_history > 0) {
-		list_insert_head(&rc->rc_removed, ref);
-		if (rc->rc_removed_count >= reference_history) {
-			ref = list_remove_tail(&rc->rc_removed);
-			kmem_cache_free(reference_cache, ref);
-		} else {
-			rc->rc_removed_count++;
+	for (ref = list_head(&rc->rc_list); ref;
+	    ref = list_next(&rc->rc_list, ref)) {
+		if (ref->ref_holder == holder && ref->ref_number == number) {
+			list_remove(&rc->rc_list, ref);
+			if (reference_history > 0) {
+				ref->ref_removed =
+				    kmem_cache_alloc(reference_history_cache,
+				    KM_SLEEP);
+				list_insert_head(&rc->rc_removed, ref);
+				rc->rc_removed_count++;
+				if (rc->rc_removed_count > reference_history) {
+					ref = list_tail(&rc->rc_removed);
+					list_remove(&rc->rc_removed, ref);
+					kmem_cache_free(reference_history_cache,
+					    ref->ref_removed);
+					kmem_cache_free(reference_cache, ref);
+					rc->rc_removed_count--;
+				}
+			} else {
+				kmem_cache_free(reference_cache, ref);
+			}
+			rc->rc_count -= number;
+			count = rc->rc_count;
+			mutex_exit(&rc->rc_mtx);
+			return (count);
 		}
-	} else {
-		kmem_cache_free(reference_cache, ref);
 	}
-	rc->rc_count -= number;
-	count = rc->rc_count;
-	mutex_exit(&rc->rc_mtx);
-	return (count);
+	panic("No such hold %p on refcount %llx", holder,
+	    (u_longlong_t)(uintptr_t)rc);
+	return (-1);
 }
 
 int64_t
@@ -212,50 +204,34 @@
 	return (zfs_refcount_remove_many(rc, 1, holder));
 }
 
-void
-zfs_refcount_remove_few(zfs_refcount_t *rc, uint64_t number, const void *holder)
-{
-	if (likely(!rc->rc_tracked))
-		(void) zfs_refcount_remove_many(rc, number, holder);
-	else for (; number > 0; number--)
-		(void) zfs_refcount_remove(rc, holder);
-}
-
 void
 zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
 {
-	avl_tree_t tree;
-	list_t removed;
-	reference_t *ref;
-	void *cookie = NULL;
-	uint64_t count;
-	uint_t removed_count;
+	int64_t count, removed_count;
+	list_t list, removed;
 
-	avl_create(&tree, zfs_refcount_compare, sizeof (reference_t),
-	    offsetof(reference_t, ref_link.a));
+	list_create(&list, sizeof (reference_t),
+	    offsetof(reference_t, ref_link));
 	list_create(&removed, sizeof (reference_t),
-	    offsetof(reference_t, ref_link.l));
+	    offsetof(reference_t, ref_link));
 
 	mutex_enter(&src->rc_mtx);
 	count = src->rc_count;
 	removed_count = src->rc_removed_count;
 	src->rc_count = 0;
 	src->rc_removed_count = 0;
-	avl_swap(&tree, &src->rc_tree);
+	list_move_tail(&list, &src->rc_list);
 	list_move_tail(&removed, &src->rc_removed);
 	mutex_exit(&src->rc_mtx);
 
 	mutex_enter(&dst->rc_mtx);
 	dst->rc_count += count;
 	dst->rc_removed_count += removed_count;
-	if (avl_is_empty(&dst->rc_tree))
-		avl_swap(&dst->rc_tree, &tree);
-	else while ((ref = avl_destroy_nodes(&tree, &cookie)) != NULL)
-		avl_add(&dst->rc_tree, ref);
+	list_move_tail(&dst->rc_list, &list);
 	list_move_tail(&dst->rc_removed, &removed);
 	mutex_exit(&dst->rc_mtx);
 
-	avl_destroy(&tree);
+	list_destroy(&list);
 	list_destroy(&removed);
 }
 
@@ -263,19 +239,23 @@
 zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
     const void *current_holder, const void *new_holder)
 {
-	reference_t *ref, s;
+	reference_t *ref;
+	boolean_t found = B_FALSE;
 
-	if (likely(!rc->rc_tracked))
+	if (!rc->rc_tracked)
 		return;
 
-	s.ref_holder = current_holder;
-	s.ref_number = number;
-	s.ref_search = B_TRUE;
 	mutex_enter(&rc->rc_mtx);
-	ref = avl_find(&rc->rc_tree, &s, NULL);
-	ASSERT(ref);
-	ref->ref_holder = new_holder;
-	avl_update(&rc->rc_tree, ref);
+	for (ref = list_head(&rc->rc_list); ref;
+	    ref = list_next(&rc->rc_list, ref)) {
+		if (ref->ref_holder == current_holder &&
+		    ref->ref_number == number) {
+			ref->ref_holder = new_holder;
+			found = B_TRUE;
+			break;
+		}
+	}
+	ASSERT(found);
 	mutex_exit(&rc->rc_mtx);
 }
 
@@ -295,23 +275,21 @@
 boolean_t
 zfs_refcount_held(zfs_refcount_t *rc, const void *holder)
 {
-	reference_t *ref, s;
-	avl_index_t idx;
-	boolean_t res;
+	reference_t *ref;
 
-	if (likely(!rc->rc_tracked))
+	if (!rc->rc_tracked)
 		return (zfs_refcount_count(rc) > 0);
 
-	s.ref_holder = holder;
-	s.ref_number = 0;
-	s.ref_search = B_TRUE;
 	mutex_enter(&rc->rc_mtx);
-	ref = avl_find(&rc->rc_tree, &s, &idx);
-	if (likely(ref == NULL))
-		ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER);
-	res = ref && ref->ref_holder == holder;
+	for (ref = list_head(&rc->rc_list); ref;
+	    ref = list_next(&rc->rc_list, ref)) {
+		if (ref->ref_holder == holder) {
+			mutex_exit(&rc->rc_mtx);
+			return (B_TRUE);
+		}
+	}
 	mutex_exit(&rc->rc_mtx);
-	return (res);
+	return (B_FALSE);
 }
 
 /*
@@ -322,23 +300,21 @@
 boolean_t
 zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
 {
-	reference_t *ref, s;
-	avl_index_t idx;
-	boolean_t res;
+	reference_t *ref;
 
-	if (likely(!rc->rc_tracked))
+	if (!rc->rc_tracked)
 		return (B_TRUE);
 
 	mutex_enter(&rc->rc_mtx);
-	s.ref_holder = holder;
-	s.ref_number = 0;
-	s.ref_search = B_TRUE;
-	ref = avl_find(&rc->rc_tree, &s, &idx);
-	if (likely(ref == NULL))
-		ref = avl_nearest(&rc->rc_tree, idx, AVL_AFTER);
-	res = ref == NULL || ref->ref_holder != holder;
+	for (ref = list_head(&rc->rc_list); ref;
+	    ref = list_next(&rc->rc_list, ref)) {
+		if (ref->ref_holder == holder) {
+			mutex_exit(&rc->rc_mtx);
+			return (B_FALSE);
+		}
+	}
 	mutex_exit(&rc->rc_mtx);
-	return (res);
+	return (B_TRUE);
 }
 
 EXPORT_SYMBOL(zfs_refcount_create);
diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c
--- a/sys/contrib/openzfs/module/zfs/spa.c
+++ b/sys/contrib/openzfs/module/zfs/spa.c
@@ -33,7 +33,6 @@
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
- * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
  */
 
 /*
@@ -1609,16 +1608,16 @@
 {
 	void *cookie = NULL;
 	spa_log_sm_t *sls;
-	log_summary_entry_t *e;
-
 	while ((sls = avl_destroy_nodes(&spa->spa_sm_logs_by_txg,
 	    &cookie)) != NULL) {
 		VERIFY0(sls->sls_mscount);
 		kmem_free(sls, sizeof (spa_log_sm_t));
 	}
 
-	while ((e = list_remove_head(&spa->spa_log_summary)) != NULL) {
+	for (log_summary_entry_t *e = list_head(&spa->spa_log_summary);
+	    e != NULL; e = list_head(&spa->spa_log_summary)) {
 		VERIFY0(e->lse_mscount);
+		list_remove(&spa->spa_log_summary, e);
 		kmem_free(e, sizeof (log_summary_entry_t));
 	}
 
@@ -6875,11 +6874,9 @@
 		if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
 			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
-		if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
-		    dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
+		if (dsl_scan_resilvering(spa_get_dsl(spa)))
 			return (spa_vdev_exit(spa, NULL, txg,
 			    ZFS_ERR_RESILVER_IN_PROGRESS));
-		}
 	} else {
 		if (vdev_rebuild_active(rvd))
 			return (spa_vdev_exit(spa, NULL, txg,
diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@@ -730,7 +730,7 @@
 		mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
 		    NULL);
 		avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
-		    sizeof (zio_t), offsetof(zio_t, io_queue_node.a));
+		    sizeof (zio_t), offsetof(zio_t, io_alloc_node));
 	}
 	avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
 	    sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
@@ -814,7 +814,8 @@
 	if (spa->spa_root)
 		spa_strfree(spa->spa_root);
 
-	while ((dp = list_remove_head(&spa->spa_config_list)) != NULL) {
+	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
+		list_remove(&spa->spa_config_list, dp);
 		if (dp->scd_path != NULL)
 			spa_strfree(dp->scd_path);
 		kmem_free(dp, sizeof (spa_config_dirent_t));
@@ -2438,6 +2439,7 @@
 	zio_init();
 	dmu_init();
 	zil_init();
+	vdev_cache_stat_init();
 	vdev_mirror_stat_init();
 	vdev_raidz_math_init();
 	vdev_file_init();
@@ -2461,6 +2463,7 @@
 	spa_evict_all();
 
 	vdev_file_fini();
+	vdev_cache_stat_fini();
 	vdev_mirror_stat_fini();
 	vdev_raidz_math_fini();
 	chksum_fini();
@@ -2611,7 +2614,7 @@
 	ps->pss_end_time = scn->scn_phys.scn_end_time;
 	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
 	ps->pss_examined = scn->scn_phys.scn_examined;
-	ps->pss_skipped = scn->scn_phys.scn_skipped;
+	ps->pss_to_process = scn->scn_phys.scn_to_process;
 	ps->pss_processed = scn->scn_phys.scn_processed;
 	ps->pss_errors = scn->scn_phys.scn_errors;
 
diff --git a/sys/contrib/openzfs/module/zfs/txg.c b/sys/contrib/openzfs/module/zfs/txg.c
--- a/sys/contrib/openzfs/module/zfs/txg.c
+++ b/sys/contrib/openzfs/module/zfs/txg.c
@@ -895,10 +895,15 @@
 boolean_t
 txg_all_lists_empty(txg_list_t *tl)
 {
-	boolean_t res = B_TRUE;
-	for (int i = 0; i < TXG_SIZE; i++)
-		res &= (tl->tl_head[i] == NULL);
-	return (res);
+	mutex_enter(&tl->tl_lock);
+	for (int i = 0; i < TXG_SIZE; i++) {
+		if (!txg_list_empty_impl(tl, i)) {
+			mutex_exit(&tl->tl_lock);
+			return (B_FALSE);
+		}
+	}
+	mutex_exit(&tl->tl_lock);
+	return (B_TRUE);
 }
 
 /*
diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@@ -29,7 +29,7 @@
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, Datto Inc. All rights reserved.
  * Copyright (c) 2021, Klara Inc.
- * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
+ * Copyright [2021] Hewlett Packard Enterprise Development LP
  */
 
 #include <sys/zfs_context.h>
@@ -715,6 +715,7 @@
 	    offsetof(struct vdev, vdev_dtl_node));
 	vd->vdev_stat.vs_timestamp = gethrtime();
 	vdev_queue_init(vd);
+	vdev_cache_init(vd);
 
 	return (vd);
 }
@@ -1095,6 +1096,7 @@
 	 * Clean up vdev structure.
 	 */
 	vdev_queue_fini(vd);
+	vdev_cache_fini(vd);
 
 	if (vd->vdev_path)
 		spa_strfree(vd->vdev_path);
@@ -1718,7 +1720,8 @@
 		vps = kmem_zalloc(sizeof (*vps), KM_SLEEP);
 
 		vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
-		    ZIO_FLAG_DONT_AGGREGATE | ZIO_FLAG_TRYHARD;
+		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
+		    ZIO_FLAG_TRYHARD;
 
 		if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
 			/*
@@ -2609,6 +2612,8 @@
 
 	vd->vdev_ops->vdev_op_close(vd);
 
+	vdev_cache_purge(vd);
+
 	/*
 	 * We record the previous state before we close it, so that if we are
 	 * doing a reopen(), we don't generate FMA ereports if we notice that
@@ -2694,17 +2699,6 @@
 		(void) vdev_validate(vd);
 	}
 
-	/*
-	 * Recheck if resilver is still needed and cancel any
-	 * scheduled resilver if resilver is unneeded.
-	 */
-	if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
-	    spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
-		mutex_enter(&spa->spa_async_lock);
-		spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
-		mutex_exit(&spa->spa_async_lock);
-	}
-
 	/*
 	 * Reassess parent vdev's health.
 	 */
@@ -4608,9 +4602,11 @@
 
 		memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
 
-		for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
-			vsx->vsx_active_queue[t] = vd->vdev_queue.vq_cactive[t];
-			vsx->vsx_pend_queue[t] = vdev_queue_class_length(vd, t);
+		for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
+			vsx->vsx_active_queue[t] =
+			    vd->vdev_queue.vq_class[t].vqc_active;
+			vsx->vsx_pend_queue[t] = avl_numnodes(
+			    &vd->vdev_queue.vq_class[t].vqc_queued_tree);
 		}
 	}
 }
@@ -5468,20 +5464,20 @@
 		vdev_queue_t *vq = &vd->vdev_queue;
 
 		mutex_enter(&vq->vq_lock);
-		if (vq->vq_active > 0) {
+		if (avl_numnodes(&vq->vq_active_tree) > 0) {
 			spa_t *spa = vd->vdev_spa;
 			zio_t *fio;
 			uint64_t delta;
 
-			zfs_dbgmsg("slow vdev: %s has %u active IOs",
-			    vd->vdev_path, vq->vq_active);
+			zfs_dbgmsg("slow vdev: %s has %lu active IOs",
+			    vd->vdev_path, avl_numnodes(&vq->vq_active_tree));
 
 			/*
 			 * Look at the head of all the pending queues,
 			 * if any I/O has been outstanding for longer than
 			 * the spa_deadman_synctime invoke the deadman logic.
 			 */
-			fio = list_head(&vq->vq_active_list);
+			fio = avl_first(&vq->vq_active_tree);
 			delta = gethrtime() - fio->io_timestamp;
 			if (delta > spa_deadman_synctime(spa))
 				zio_deadman(fio, tag);
diff --git a/sys/contrib/openzfs/module/zfs/vdev_cache.c b/sys/contrib/openzfs/module/zfs/vdev_cache.c
new file mode 100644
--- /dev/null
+++ b/sys/contrib/openzfs/module/zfs/vdev_cache.c
@@ -0,0 +1,436 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or https://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/spa.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#include <sys/kstat.h>
+#include <sys/abd.h>
+
+/*
+ * Virtual device read-ahead caching.
+ *
+ * This file implements a simple LRU read-ahead cache.  When the DMU reads
+ * a given block, it will often want other, nearby blocks soon thereafter.
+ * We take advantage of this by reading a larger disk region and caching
+ * the result.  In the best case, this can turn 128 back-to-back 512-byte
+ * reads into a single 64k read followed by 127 cache hits; this reduces
+ * latency dramatically.  In the worst case, it can turn an isolated 512-byte
+ * read into a 64k read, which doesn't affect latency all that much but is
+ * terribly wasteful of bandwidth.  A more intelligent version of the cache
+ * could keep track of access patterns and not do read-ahead unless it sees
+ * at least two temporally close I/Os to the same region.  Currently, only
+ * metadata I/O is inflated.  A further enhancement could take advantage of
+ * more semantic information about the I/O.  And it could use something
+ * faster than an AVL tree; that was chosen solely for convenience.
+ *
+ * There are five cache operations: allocate, fill, read, write, evict.
+ *
+ * (1) Allocate.  This reserves a cache entry for the specified region.
+ *     We separate the allocate and fill operations so that multiple threads
+ *     don't generate I/O for the same cache miss.
+ *
+ * (2) Fill.  When the I/O for a cache miss completes, the fill routine
+ *     places the data in the previously allocated cache entry.
+ *
+ * (3) Read.  Read data from the cache.
+ *
+ * (4) Write.  Update cache contents after write completion.
+ *
+ * (5) Evict.  When allocating a new entry, we evict the oldest (LRU) entry
+ *     if the total cache size exceeds zfs_vdev_cache_size.
+ */
+
+/*
+ * These tunables are for performance analysis.
+ */
+/*
+ * All i/os smaller than zfs_vdev_cache_max will be turned into
+ * 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
+ * track buffer).  At most zfs_vdev_cache_size bytes will be kept in each
+ * vdev's vdev_cache.
+ *
+ * TODO: Note that with the current ZFS code, it turns out that the
+ * vdev cache is not helpful, and in some cases actually harmful.  It
+ * is better if we disable this.  Once some time has passed, we should
+ * actually remove this to simplify the code.  For now we just disable
+ * it by setting the zfs_vdev_cache_size to zero.  Note that Solaris 11
+ * has made these same changes.
+ */
+static uint_t zfs_vdev_cache_max = 1 << 14;			/* 16KB */
+static uint_t zfs_vdev_cache_size = 0;
+static uint_t zfs_vdev_cache_bshift = 16;
+
+#define	VCBS (1 << zfs_vdev_cache_bshift)	/* 64KB */
+
+static kstat_t *vdc_ksp = NULL;
+
+typedef struct vdc_stats {
+	kstat_named_t vdc_stat_delegations;
+	kstat_named_t vdc_stat_hits;
+	kstat_named_t vdc_stat_misses;
+} vdc_stats_t;
+
+static vdc_stats_t vdc_stats = {
+	{ "delegations",	KSTAT_DATA_UINT64 },
+	{ "hits",		KSTAT_DATA_UINT64 },
+	{ "misses",		KSTAT_DATA_UINT64 }
+};
+
+#define	VDCSTAT_BUMP(stat)	atomic_inc_64(&vdc_stats.stat.value.ui64);
+
+static inline int
+vdev_cache_offset_compare(const void *a1, const void *a2)
+{
+	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
+	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
+
+	return (TREE_CMP(ve1->ve_offset, ve2->ve_offset));
+}
+
+static int
+vdev_cache_lastused_compare(const void *a1, const void *a2)
+{
+	const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
+	const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
+
+	int cmp = TREE_CMP(ve1->ve_lastused, ve2->ve_lastused);
+	if (likely(cmp))
+		return (cmp);
+
+	/*
+	 * Among equally old entries, sort by offset to ensure uniqueness.
+	 */
+	return (vdev_cache_offset_compare(a1, a2));
+}
+
+/*
+ * Evict the specified entry from the cache.
+ */
+static void
+vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
+{
+	ASSERT(MUTEX_HELD(&vc->vc_lock));
+	ASSERT3P(ve->ve_fill_io, ==, NULL);
+	ASSERT3P(ve->ve_abd, !=, NULL);
+
+	avl_remove(&vc->vc_lastused_tree, ve);
+	avl_remove(&vc->vc_offset_tree, ve);
+	abd_free(ve->ve_abd);
+	kmem_free(ve, sizeof (vdev_cache_entry_t));
+}
+
+/*
+ * Allocate an entry in the cache.  At the point we don't have the data,
+ * we're just creating a placeholder so that multiple threads don't all
+ * go off and read the same blocks.
+ */
+static vdev_cache_entry_t *
+vdev_cache_allocate(zio_t *zio)
+{
+	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
+	uint64_t offset = P2ALIGN(zio->io_offset, VCBS);
+	vdev_cache_entry_t *ve;
+
+	ASSERT(MUTEX_HELD(&vc->vc_lock));
+
+	if (zfs_vdev_cache_size == 0)
+		return (NULL);
+
+	/*
+	 * If adding a new entry would exceed the cache size,
+	 * evict the oldest entry (LRU).
+	 */
+	if ((avl_numnodes(&vc->vc_lastused_tree) << zfs_vdev_cache_bshift) >
+	    zfs_vdev_cache_size) {
+		ve = avl_first(&vc->vc_lastused_tree);
+		if (ve->ve_fill_io != NULL)
+			return (NULL);
+		ASSERT3U(ve->ve_hits, !=, 0);
+		vdev_cache_evict(vc, ve);
+	}
+
+	ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
+	ve->ve_offset = offset;
+	ve->ve_lastused = ddi_get_lbolt();
+	ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE);
+
+	avl_add(&vc->vc_offset_tree, ve);
+	avl_add(&vc->vc_lastused_tree, ve);
+
+	return (ve);
+}
+
+static void
+vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
+{
+	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
+
+	ASSERT(MUTEX_HELD(&vc->vc_lock));
+	ASSERT3P(ve->ve_fill_io, ==, NULL);
+
+	if (ve->ve_lastused != ddi_get_lbolt()) {
+		avl_remove(&vc->vc_lastused_tree, ve);
+		ve->ve_lastused = ddi_get_lbolt();
+		avl_add(&vc->vc_lastused_tree, ve);
+	}
+
+	ve->ve_hits++;
+	abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size);
+}
+
+/*
+ * Fill a previously allocated cache entry with data.
+ */
+static void
+vdev_cache_fill(zio_t *fio)
+{
+	vdev_t *vd = fio->io_vd;
+	vdev_cache_t *vc = &vd->vdev_cache;
+	vdev_cache_entry_t *ve = fio->io_private;
+	zio_t *pio;
+
+	ASSERT3U(fio->io_size, ==, VCBS);
+
+	/*
+	 * Add data to the cache.
+	 */
+	mutex_enter(&vc->vc_lock);
+
+	ASSERT3P(ve->ve_fill_io, ==, fio);
+	ASSERT3U(ve->ve_offset, ==, fio->io_offset);
+	ASSERT3P(ve->ve_abd, ==, fio->io_abd);
+
+	ve->ve_fill_io = NULL;
+
+	/*
+	 * Even if this cache line was invalidated by a missed write update,
+	 * any reads that were queued up before the missed update are still
+	 * valid, so we can satisfy them from this line before we evict it.
+	 */
+	zio_link_t *zl = NULL;
+	while ((pio = zio_walk_parents(fio, &zl)) != NULL)
+		vdev_cache_hit(vc, ve, pio);
+
+	if (fio->io_error || ve->ve_missed_update)
+		vdev_cache_evict(vc, ve);
+
+	mutex_exit(&vc->vc_lock);
+}
+
+/*
+ * Read data from the cache.  Returns B_TRUE cache hit, B_FALSE on miss.
+ */
+boolean_t
+vdev_cache_read(zio_t *zio)
+{
+	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
+	vdev_cache_entry_t *ve, ve_search;
+	uint64_t cache_offset = P2ALIGN(zio->io_offset, VCBS);
+	zio_t *fio;
+	uint64_t cache_phase __maybe_unused = P2PHASE(zio->io_offset, VCBS);
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
+
+	if (zfs_vdev_cache_size == 0)
+		return (B_FALSE);
+
+	if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
+		return (B_FALSE);
+
+	if (zio->io_size > zfs_vdev_cache_max)
+		return (B_FALSE);
+
+	/*
+	 * If the I/O straddles two or more cache blocks, don't cache it.
+	 */
+	if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
+		return (B_FALSE);
+
+	ASSERT3U(cache_phase + zio->io_size, <=, VCBS);
+
+	mutex_enter(&vc->vc_lock);
+
+	ve_search.ve_offset = cache_offset;
+	ve = avl_find(&vc->vc_offset_tree, &ve_search, NULL);
+
+	if (ve != NULL) {
+		if (ve->ve_missed_update) {
+			mutex_exit(&vc->vc_lock);
+			return (B_FALSE);
+		}
+
+		if ((fio = ve->ve_fill_io) != NULL) {
+			zio_vdev_io_bypass(zio);
+			zio_add_child(zio, fio);
+			mutex_exit(&vc->vc_lock);
+			VDCSTAT_BUMP(vdc_stat_delegations);
+			return (B_TRUE);
+		}
+
+		vdev_cache_hit(vc, ve, zio);
+		zio_vdev_io_bypass(zio);
+
+		mutex_exit(&vc->vc_lock);
+		VDCSTAT_BUMP(vdc_stat_hits);
+		return (B_TRUE);
+	}
+
+	ve = vdev_cache_allocate(zio);
+
+	if (ve == NULL) {
+		mutex_exit(&vc->vc_lock);
+		return (B_FALSE);
+	}
+
+	fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
+	    ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
+	    ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
+
+	ve->ve_fill_io = fio;
+	zio_vdev_io_bypass(zio);
+	zio_add_child(zio, fio);
+
+	mutex_exit(&vc->vc_lock);
+	zio_nowait(fio);
+	VDCSTAT_BUMP(vdc_stat_misses);
+
+	return (B_TRUE);
+}
+
+/*
+ * Update cache contents upon write completion.
+ */
+void
+vdev_cache_write(zio_t *zio)
+{
+	vdev_cache_t *vc = &zio->io_vd->vdev_cache;
+	vdev_cache_entry_t *ve, ve_search;
+	uint64_t io_start = zio->io_offset;
+	uint64_t io_end = io_start + zio->io_size;
+	uint64_t min_offset = P2ALIGN(io_start, VCBS);
+	uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
+	avl_index_t where;
+
+	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
+
+	mutex_enter(&vc->vc_lock);
+
+	ve_search.ve_offset = min_offset;
+	ve = avl_find(&vc->vc_offset_tree, &ve_search, &where);
+
+	if (ve == NULL)
+		ve = avl_nearest(&vc->vc_offset_tree, where, AVL_AFTER);
+
+	while (ve != NULL && ve->ve_offset < max_offset) {
+		uint64_t start = MAX(ve->ve_offset, io_start);
+		uint64_t end = MIN(ve->ve_offset + VCBS, io_end);
+
+		if (ve->ve_fill_io != NULL) {
+			ve->ve_missed_update = 1;
+		} else {
+			abd_copy_off(ve->ve_abd, zio->io_abd,
+			    start - ve->ve_offset, start - io_start,
+			    end - start);
+		}
+		ve = AVL_NEXT(&vc->vc_offset_tree, ve);
+	}
+	mutex_exit(&vc->vc_lock);
+}
+
+void
+vdev_cache_purge(vdev_t *vd)
+{
+	vdev_cache_t *vc = &vd->vdev_cache;
+	vdev_cache_entry_t *ve;
+
+	mutex_enter(&vc->vc_lock);
+	while ((ve = avl_first(&vc->vc_offset_tree)) != NULL)
+		vdev_cache_evict(vc, ve);
+	mutex_exit(&vc->vc_lock);
+}
+
+void
+vdev_cache_init(vdev_t *vd)
+{
+	vdev_cache_t *vc = &vd->vdev_cache;
+
+	mutex_init(&vc->vc_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	avl_create(&vc->vc_offset_tree, vdev_cache_offset_compare,
+	    sizeof (vdev_cache_entry_t),
+	    offsetof(struct vdev_cache_entry, ve_offset_node));
+
+	avl_create(&vc->vc_lastused_tree, vdev_cache_lastused_compare,
+	    sizeof (vdev_cache_entry_t),
+	    offsetof(struct vdev_cache_entry, ve_lastused_node));
+}
+
+void
+vdev_cache_fini(vdev_t *vd)
+{
+	vdev_cache_t *vc = &vd->vdev_cache;
+
+	vdev_cache_purge(vd);
+
+	avl_destroy(&vc->vc_offset_tree);
+	avl_destroy(&vc->vc_lastused_tree);
+
+	mutex_destroy(&vc->vc_lock);
+}
+
+void
+vdev_cache_stat_init(void)
+{
+	vdc_ksp = kstat_create("zfs", 0, "vdev_cache_stats", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (vdc_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (vdc_ksp != NULL) {
+		vdc_ksp->ks_data = &vdc_stats;
+		kstat_install(vdc_ksp);
+	}
+}
+
+void
+vdev_cache_stat_fini(void)
+{
+	if (vdc_ksp != NULL) {
+		kstat_delete(vdc_ksp);
+		vdc_ksp = NULL;
+	}
+}
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_max, UINT, ZMOD_RW,
+	"Inflate reads small than max");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_size, UINT, ZMOD_RD,
+	"Total size of the per-disk cache");
+
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, cache_bshift, UINT, ZMOD_RW,
+	"Shift size to inflate reads too");
diff --git a/sys/contrib/openzfs/module/zfs/vdev_indirect.c b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@@ -293,16 +293,17 @@
 	indirect_vsd_t *iv = zio->io_vsd;
 
 	indirect_split_t *is;
-	while ((is = list_remove_head(&iv->iv_splits)) != NULL) {
+	while ((is = list_head(&iv->iv_splits)) != NULL) {
 		for (int c = 0; c < is->is_children; c++) {
 			indirect_child_t *ic = &is->is_child[c];
 			if (ic->ic_data != NULL)
 				abd_free(ic->ic_data);
 		}
+		list_remove(&iv->iv_splits, is);
 
 		indirect_child_t *ic;
-		while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
-			;
+		while ((ic = list_head(&is->is_unique_child)) != NULL)
+			list_remove(&is->is_unique_child, ic);
 
 		list_destroy(&is->is_unique_child);
 
@@ -1658,8 +1659,8 @@
 	for (indirect_split_t *is = list_head(&iv->iv_splits);
 	    is != NULL; is = list_next(&iv->iv_splits, is)) {
 		indirect_child_t *ic;
-		while ((ic = list_remove_head(&is->is_unique_child)) != NULL)
-			;
+		while ((ic = list_head(&is->is_unique_child)) != NULL)
+			list_remove(&is->is_unique_child, ic);
 
 		is->is_unique_children = 0;
 	}
diff --git a/sys/contrib/openzfs/module/zfs/vdev_label.c b/sys/contrib/openzfs/module/zfs/vdev_label.c
--- a/sys/contrib/openzfs/module/zfs/vdev_label.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_label.c
@@ -486,9 +486,6 @@
 	if (vd->vdev_isspare)
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
 
-	if (flags & VDEV_CONFIG_L2CACHE)
-		fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
-
 	if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
 	    vd == vd->vdev_top) {
 		fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c
--- a/sys/contrib/openzfs/module/zfs/vdev_queue.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c
@@ -228,6 +228,13 @@
  */
 uint_t zfs_vdev_def_queue_depth = 32;
 
+/*
+ * Allow TRIM I/Os to be aggregated.  This should normally not be needed since
+ * TRIM I/O for extents up to zfs_trim_extent_bytes_max (128M) can be submitted
+ * by the TRIM code in zfs_trim.c.
+ */
+static uint_t zfs_vdev_aggregate_trim = 0;
+
 static int
 vdev_queue_offset_compare(const void *x1, const void *x2)
 {
@@ -242,60 +249,38 @@
 	return (TREE_PCMP(z1, z2));
 }
 
-#define	VDQ_T_SHIFT 29
+static inline avl_tree_t *
+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
+{
+	return (&vq->vq_class[p].vqc_queued_tree);
+}
+
+static inline avl_tree_t *
+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
+{
+	ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE || t == ZIO_TYPE_TRIM);
+	if (t == ZIO_TYPE_READ)
+		return (&vq->vq_read_offset_tree);
+	else if (t == ZIO_TYPE_WRITE)
+		return (&vq->vq_write_offset_tree);
+	else
+		return (&vq->vq_trim_offset_tree);
+}
 
 static int
-vdev_queue_to_compare(const void *x1, const void *x2)
+vdev_queue_timestamp_compare(const void *x1, const void *x2)
 {
 	const zio_t *z1 = (const zio_t *)x1;
 	const zio_t *z2 = (const zio_t *)x2;
 
-	int tcmp = TREE_CMP(z1->io_timestamp >> VDQ_T_SHIFT,
-	    z2->io_timestamp >> VDQ_T_SHIFT);
-	int ocmp = TREE_CMP(z1->io_offset, z2->io_offset);
-	int cmp = tcmp ? tcmp : ocmp;
+	int cmp = TREE_CMP(z1->io_timestamp, z2->io_timestamp);
 
-	if (likely(cmp | (z1->io_queue_state == ZIO_QS_NONE)))
+	if (likely(cmp))
 		return (cmp);
 
 	return (TREE_PCMP(z1, z2));
 }
 
-static inline boolean_t
-vdev_queue_class_fifo(zio_priority_t p)
-{
-	return (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE ||
-	    p == ZIO_PRIORITY_TRIM);
-}
-
-static void
-vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
-{
-	zio_priority_t p = zio->io_priority;
-	vq->vq_cqueued |= 1U << p;
-	if (vdev_queue_class_fifo(p))
-		list_insert_tail(&vq->vq_class[p].vqc_list, zio);
-	else
-		avl_add(&vq->vq_class[p].vqc_tree, zio);
-}
-
-static void
-vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
-{
-	zio_priority_t p = zio->io_priority;
-	uint32_t empty;
-	if (vdev_queue_class_fifo(p)) {
-		list_t *list = &vq->vq_class[p].vqc_list;
-		list_remove(list, zio);
-		empty = list_is_empty(list);
-	} else {
-		avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
-		avl_remove(tree, zio);
-		empty = avl_is_empty(tree);
-	}
-	vq->vq_cqueued &= ~(empty << p);
-}
-
 static uint_t
 vdev_queue_class_min_active(vdev_queue_t *vq, zio_priority_t p)
 {
@@ -375,7 +360,7 @@
 }
 
 static uint_t
-vdev_queue_class_max_active(vdev_queue_t *vq, zio_priority_t p)
+vdev_queue_class_max_active(spa_t *spa, vdev_queue_t *vq, zio_priority_t p)
 {
 	switch (p) {
 	case ZIO_PRIORITY_SYNC_READ:
@@ -385,7 +370,7 @@
 	case ZIO_PRIORITY_ASYNC_READ:
 		return (zfs_vdev_async_read_max_active);
 	case ZIO_PRIORITY_ASYNC_WRITE:
-		return (vdev_queue_max_async_writes(vq->vq_vdev->vdev_spa));
+		return (vdev_queue_max_async_writes(spa));
 	case ZIO_PRIORITY_SCRUB:
 		if (vq->vq_ia_active > 0) {
 			return (MIN(vq->vq_nia_credit,
@@ -429,10 +414,10 @@
 static zio_priority_t
 vdev_queue_class_to_issue(vdev_queue_t *vq)
 {
-	uint32_t cq = vq->vq_cqueued;
-	zio_priority_t p, p1;
+	spa_t *spa = vq->vq_vdev->vdev_spa;
+	zio_priority_t p, n;
 
-	if (cq == 0 || vq->vq_active >= zfs_vdev_max_active)
+	if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
 		return (ZIO_PRIORITY_NUM_QUEUEABLE);
 
 	/*
@@ -440,18 +425,14 @@
 	 * Do round-robin to reduce starvation due to zfs_vdev_max_active
 	 * and vq_nia_credit limits.
 	 */
-	p1 = vq->vq_last_prio + 1;
-	if (p1 >= ZIO_PRIORITY_NUM_QUEUEABLE)
-		p1 = 0;
-	for (p = p1; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
-		    vdev_queue_class_min_active(vq, p))
-			goto found;
-	}
-	for (p = 0; p < p1; p++) {
-		if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
-		    vdev_queue_class_min_active(vq, p))
-			goto found;
+	for (n = 0; n < ZIO_PRIORITY_NUM_QUEUEABLE; n++) {
+		p = (vq->vq_last_prio + n + 1) % ZIO_PRIORITY_NUM_QUEUEABLE;
+		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
+		    vq->vq_class[p].vqc_active <
+		    vdev_queue_class_min_active(vq, p)) {
+			vq->vq_last_prio = p;
+			return (p);
+		}
 	}
 
 	/*
@@ -459,14 +440,16 @@
 	 * maximum # outstanding i/os.
 	 */
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if ((cq & (1U << p)) != 0 && vq->vq_cactive[p] <
-		    vdev_queue_class_max_active(vq, p))
-			break;
+		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
+		    vq->vq_class[p].vqc_active <
+		    vdev_queue_class_max_active(spa, vq, p)) {
+			vq->vq_last_prio = p;
+			return (p);
+		}
 	}
 
-found:
-	vq->vq_last_prio = p;
-	return (p);
+	/* No eligible queued i/os */
+	return (ZIO_PRIORITY_NUM_QUEUEABLE);
 }
 
 void
@@ -475,30 +458,42 @@
 	vdev_queue_t *vq = &vd->vdev_queue;
 	zio_priority_t p;
 
+	mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
 	vq->vq_vdev = vd;
+	taskq_init_ent(&vd->vdev_queue.vq_io_search.io_tqent);
 
-	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if (vdev_queue_class_fifo(p)) {
-			list_create(&vq->vq_class[p].vqc_list,
-			    sizeof (zio_t),
-			    offsetof(struct zio, io_queue_node.l));
-		} else {
-			avl_create(&vq->vq_class[p].vqc_tree,
-			    vdev_queue_to_compare, sizeof (zio_t),
-			    offsetof(struct zio, io_queue_node.a));
-		}
-	}
-	avl_create(&vq->vq_read_offset_tree,
+	avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
+	    sizeof (zio_t), offsetof(struct zio, io_queue_node));
+	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
+	    vdev_queue_offset_compare, sizeof (zio_t),
+	    offsetof(struct zio, io_offset_node));
+	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
 	    vdev_queue_offset_compare, sizeof (zio_t),
 	    offsetof(struct zio, io_offset_node));
-	avl_create(&vq->vq_write_offset_tree,
+	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM),
 	    vdev_queue_offset_compare, sizeof (zio_t),
 	    offsetof(struct zio, io_offset_node));
 
+	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+		int (*compfn) (const void *, const void *);
+
+		/*
+		 * The synchronous/trim i/o queues are dispatched in FIFO rather
+		 * than LBA order. This provides more consistent latency for
+		 * these i/os.
+		 */
+		if (p == ZIO_PRIORITY_SYNC_READ ||
+		    p == ZIO_PRIORITY_SYNC_WRITE ||
+		    p == ZIO_PRIORITY_TRIM) {
+			compfn = vdev_queue_timestamp_compare;
+		} else {
+			compfn = vdev_queue_offset_compare;
+		}
+		avl_create(vdev_queue_class_tree(vq, p), compfn,
+		    sizeof (zio_t), offsetof(struct zio, io_queue_node));
+	}
+
 	vq->vq_last_offset = 0;
-	list_create(&vq->vq_active_list, sizeof (struct zio),
-	    offsetof(struct zio, io_queue_node.l));
-	mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
 void
@@ -506,39 +501,30 @@
 {
 	vdev_queue_t *vq = &vd->vdev_queue;
 
-	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if (vdev_queue_class_fifo(p))
-			list_destroy(&vq->vq_class[p].vqc_list);
-		else
-			avl_destroy(&vq->vq_class[p].vqc_tree);
-	}
-	avl_destroy(&vq->vq_read_offset_tree);
-	avl_destroy(&vq->vq_write_offset_tree);
+	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
+		avl_destroy(vdev_queue_class_tree(vq, p));
+	avl_destroy(&vq->vq_active_tree);
+	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
+	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
+	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_TRIM));
 
-	list_destroy(&vq->vq_active_list);
 	mutex_destroy(&vq->vq_lock);
 }
 
 static void
 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 {
-	zio->io_queue_state = ZIO_QS_QUEUED;
-	vdev_queue_class_add(vq, zio);
-	if (zio->io_type == ZIO_TYPE_READ)
-		avl_add(&vq->vq_read_offset_tree, zio);
-	else if (zio->io_type == ZIO_TYPE_WRITE)
-		avl_add(&vq->vq_write_offset_tree, zio);
+	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+	avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+	avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
 }
 
 static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
-	vdev_queue_class_remove(vq, zio);
-	if (zio->io_type == ZIO_TYPE_READ)
-		avl_remove(&vq->vq_read_offset_tree, zio);
-	else if (zio->io_type == ZIO_TYPE_WRITE)
-		avl_remove(&vq->vq_write_offset_tree, zio);
-	zio->io_queue_state = ZIO_QS_NONE;
+	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+	avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
+	avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
 }
 
 static boolean_t
@@ -560,16 +546,14 @@
 {
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
-	vq->vq_cactive[zio->io_priority]++;
-	vq->vq_active++;
+	vq->vq_class[zio->io_priority].vqc_active++;
 	if (vdev_queue_is_interactive(zio->io_priority)) {
 		if (++vq->vq_ia_active == 1)
 			vq->vq_nia_credit = 1;
 	} else if (vq->vq_ia_active > 0) {
 		vq->vq_nia_credit--;
 	}
-	zio->io_queue_state = ZIO_QS_ACTIVE;
-	list_insert_tail(&vq->vq_active_list, zio);
+	avl_add(&vq->vq_active_tree, zio);
 }
 
 static void
@@ -577,8 +561,7 @@
 {
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
-	vq->vq_cactive[zio->io_priority]--;
-	vq->vq_active--;
+	vq->vq_class[zio->io_priority].vqc_active--;
 	if (vdev_queue_is_interactive(zio->io_priority)) {
 		if (--vq->vq_ia_active == 0)
 			vq->vq_nia_credit = 0;
@@ -586,8 +569,7 @@
 			vq->vq_nia_credit = zfs_vdev_nia_credit;
 	} else if (vq->vq_ia_active == 0)
 		vq->vq_nia_credit++;
-	list_remove(&vq->vq_active_list, zio);
-	zio->io_queue_state = ZIO_QS_NONE;
+	avl_remove(&vq->vq_active_tree, zio);
 }
 
 static void
@@ -620,28 +602,29 @@
 	uint64_t maxgap = 0;
 	uint64_t size;
 	uint64_t limit;
+	int maxblocksize;
 	boolean_t stretch = B_FALSE;
+	avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
+	zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
 	uint64_t next_offset;
 	abd_t *abd;
-	avl_tree_t *t;
-
-	/*
-	 * TRIM aggregation should not be needed since code in zfs_trim.c can
-	 * submit TRIM I/O for extents up to zfs_trim_extent_bytes_max (128M).
-	 */
-	if (zio->io_type == ZIO_TYPE_TRIM)
-		return (NULL);
-
-	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
-		return (NULL);
 
+	maxblocksize = spa_maxblocksize(vq->vq_vdev->vdev_spa);
 	if (vq->vq_vdev->vdev_nonrot)
 		limit = zfs_vdev_aggregation_limit_non_rotating;
 	else
 		limit = zfs_vdev_aggregation_limit;
-	if (limit == 0)
+	limit = MIN(limit, maxblocksize);
+
+	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE || limit == 0)
+		return (NULL);
+
+	/*
+	 * While TRIM commands could be aggregated based on offset this
+	 * behavior is disabled until it's determined to be beneficial.
+	 */
+	if (zio->io_type == ZIO_TYPE_TRIM && !zfs_vdev_aggregate_trim)
 		return (NULL);
-	limit = MIN(limit, SPA_MAXBLOCKSIZE);
 
 	/*
 	 * I/Os to distributed spares are directly dispatched to the dRAID
@@ -652,13 +635,8 @@
 
 	first = last = zio;
 
-	if (zio->io_type == ZIO_TYPE_READ) {
+	if (zio->io_type == ZIO_TYPE_READ)
 		maxgap = zfs_vdev_read_gap_limit;
-		t = &vq->vq_read_offset_tree;
-	} else {
-		ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
-		t = &vq->vq_write_offset_tree;
-	}
 
 	/*
 	 * We can aggregate I/Os that are sufficiently adjacent and of
@@ -679,7 +657,6 @@
 	 * Walk backwards through sufficiently contiguous I/Os
 	 * recording the last non-optional I/O.
 	 */
-	zio_flag_t flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
 	while ((dio = AVL_PREV(t, first)) != NULL &&
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    IO_SPAN(dio, last) <= limit &&
@@ -709,7 +686,7 @@
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    (IO_SPAN(first, dio) <= limit ||
 	    (dio->io_flags & ZIO_FLAG_OPTIONAL)) &&
-	    IO_SPAN(first, dio) <= SPA_MAXBLOCKSIZE &&
+	    IO_SPAN(first, dio) <= maxblocksize &&
 	    IO_GAP(last, dio) <= maxgap &&
 	    dio->io_type == zio->io_type) {
 		last = dio;
@@ -763,7 +740,7 @@
 		return (NULL);
 
 	size = IO_SPAN(first, last);
-	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
+	ASSERT3U(size, <=, maxblocksize);
 
 	abd = abd_alloc_gang();
 	if (abd == NULL)
@@ -771,7 +748,8 @@
 
 	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
 	    abd, size, first->io_type, zio->io_priority,
-	    flags | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL);
+	    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
+	    vdev_queue_agg_io_done, NULL);
 	aio->io_timestamp = first->io_timestamp;
 
 	nio = first;
@@ -847,30 +825,19 @@
 		return (NULL);
 	}
 
-	if (vdev_queue_class_fifo(p)) {
-		zio = list_head(&vq->vq_class[p].vqc_list);
-	} else {
-		/*
-		 * For LBA-ordered queues (async / scrub / initializing),
-		 * issue the I/O which follows the most recently issued I/O
-		 * in LBA (offset) order, but to avoid starvation only within
-		 * the same 0.5 second interval as the first I/O.
-		 */
-		tree = &vq->vq_class[p].vqc_tree;
-		zio = aio = avl_first(tree);
-		if (zio->io_offset < vq->vq_last_offset) {
-			vq->vq_io_search.io_timestamp = zio->io_timestamp;
-			vq->vq_io_search.io_offset = vq->vq_last_offset;
-			zio = avl_find(tree, &vq->vq_io_search, &idx);
-			if (zio == NULL) {
-				zio = avl_nearest(tree, idx, AVL_AFTER);
-				if (zio == NULL ||
-				    (zio->io_timestamp >> VDQ_T_SHIFT) !=
-				    (aio->io_timestamp >> VDQ_T_SHIFT))
-					zio = aio;
-			}
-		}
-	}
+	/*
+	 * For LBA-ordered queues (async / scrub / initializing), issue the
+	 * i/o which follows the most recently issued i/o in LBA (offset) order.
+	 *
+	 * For FIFO queues (sync/trim), issue the i/o with the lowest timestamp.
+	 */
+	tree = vdev_queue_class_tree(vq, p);
+	vq->vq_io_search.io_timestamp = 0;
+	vq->vq_io_search.io_offset = vq->vq_last_offset - 1;
+	VERIFY3P(avl_find(tree, &vq->vq_io_search, &idx), ==, NULL);
+	zio = avl_nearest(tree, idx, AVL_AFTER);
+	if (zio == NULL)
+		zio = avl_first(tree);
 	ASSERT3U(zio->io_priority, ==, p);
 
 	aio = vdev_queue_aggregate(vq, zio);
@@ -940,7 +907,7 @@
 		ASSERT(zio->io_priority == ZIO_PRIORITY_TRIM);
 	}
 
-	zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
+	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
 	zio->io_timestamp = gethrtime();
 
 	mutex_enter(&vq->vq_lock);
@@ -1001,6 +968,7 @@
 vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
+	avl_tree_t *tree;
 
 	/*
 	 * ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio
@@ -1035,11 +1003,12 @@
 	 * Otherwise, the zio is currently active and we cannot change its
 	 * priority.
 	 */
-	if (zio->io_queue_state == ZIO_QS_QUEUED) {
-		vdev_queue_class_remove(vq, zio);
+	tree = vdev_queue_class_tree(vq, zio->io_priority);
+	if (avl_find(tree, zio, NULL) == zio) {
+		avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
 		zio->io_priority = priority;
-		vdev_queue_class_add(vq, zio);
-	} else if (zio->io_queue_state == ZIO_QS_NONE) {
+		avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+	} else if (avl_find(&vq->vq_active_tree, zio, NULL) != zio) {
 		zio->io_priority = priority;
 	}
 
@@ -1052,10 +1021,10 @@
  * vq_lock mutex use here, instead we prefer to keep it lock free for
  * performance.
  */
-uint32_t
+int
 vdev_queue_length(vdev_t *vd)
 {
-	return (vd->vdev_queue.vq_active);
+	return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
 }
 
 uint64_t
@@ -1064,22 +1033,15 @@
 	return (vd->vdev_queue.vq_last_offset);
 }
 
-uint64_t
-vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
-{
-	vdev_queue_t *vq = &vd->vdev_queue;
-	if (vdev_queue_class_fifo(p))
-		return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
-	else
-		return (avl_numnodes(&vq->vq_class[p].vqc_tree));
-}
-
 ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit, UINT, ZMOD_RW,
 	"Max vdev I/O aggregation size");
 
 ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregation_limit_non_rotating, UINT,
 	ZMOD_RW, "Max vdev I/O aggregation size for non-rotating media");
 
+ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, aggregate_trim, UINT, ZMOD_RW,
+	"Allow TRIM I/O to be aggregated");
+
 ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, read_gap_limit, UINT, ZMOD_RW,
 	"Aggregate read I/O over gap");
 
diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
--- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c
@@ -571,10 +571,8 @@
 	vdev_rebuild_blkptr_init(&blk, vd, start, size);
 	uint64_t psize = BP_GET_PSIZE(&blk);
 
-	if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) {
-		vr->vr_pass_bytes_skipped += size;
+	if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
 		return (0);
-	}
 
 	mutex_enter(&vr->vr_io_lock);
 
@@ -788,7 +786,6 @@
 	vr->vr_pass_start_time = gethrtime();
 	vr->vr_pass_bytes_scanned = 0;
 	vr->vr_pass_bytes_issued = 0;
-	vr->vr_pass_bytes_skipped = 0;
 
 	uint64_t update_est_time = gethrtime();
 	vdev_rebuild_update_bytes_est(vd, 0);
@@ -1156,7 +1153,6 @@
 		    vr->vr_pass_start_time);
 		vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned;
 		vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued;
-		vrs->vrs_pass_bytes_skipped = vr->vr_pass_bytes_skipped;
 		mutex_exit(&tvd->vdev_rebuild_lock);
 	}
 
diff --git a/sys/contrib/openzfs/module/zfs/zap_micro.c b/sys/contrib/openzfs/module/zfs/zap_micro.c
--- a/sys/contrib/openzfs/module/zfs/zap_micro.c
+++ b/sys/contrib/openzfs/module/zfs/zap_micro.c
@@ -285,7 +285,6 @@
 	}
 }
 
-__attribute__((always_inline)) inline
 static int
 mze_compare(const void *arg1, const void *arg2)
 {
@@ -296,9 +295,6 @@
 	    (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
 }
 
-ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
-    mze_compare)
-
 static void
 mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
 {
@@ -465,7 +461,7 @@
 		 * 62 entries before we have to add 2KB B-tree core node.
 		 */
 		zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
-		    mze_find_in_buf, sizeof (mzap_ent_t), 512);
+		    sizeof (mzap_ent_t), 512);
 
 		zap_name_t *zn = zap_name_alloc(zap);
 		for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fm.c b/sys/contrib/openzfs/module/zfs/zfs_fm.c
--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@@ -1522,8 +1522,9 @@
 {
 	recent_events_node_t *entry;
 
-	while ((entry = list_remove_head(&recent_events_list)) != NULL) {
+	while ((entry = list_head(&recent_events_list)) != NULL) {
 		avl_remove(&recent_events_tree, entry);
+		list_remove(&recent_events_list, entry);
 		kmem_free(entry, sizeof (*entry));
 	}
 	avl_destroy(&recent_events_tree);
diff --git a/sys/contrib/openzfs/module/zfs/zfs_fuid.c b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@@ -699,15 +699,19 @@
 	zfs_fuid_t *zfuid;
 	zfs_fuid_domain_t *zdomain;
 
-	while ((zfuid = list_remove_head(&fuidp->z_fuids)) != NULL)
+	while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
+		list_remove(&fuidp->z_fuids, zfuid);
 		kmem_free(zfuid, sizeof (zfs_fuid_t));
+	}
 
 	if (fuidp->z_domain_table != NULL)
 		kmem_free(fuidp->z_domain_table,
 		    (sizeof (char *)) * fuidp->z_domain_cnt);
 
-	while ((zdomain = list_remove_head(&fuidp->z_domains)) != NULL)
+	while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
+		list_remove(&fuidp->z_domains, zdomain);
 		kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
+	}
 
 	kmem_free(fuidp, sizeof (zfs_fuid_info_t));
 }
diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@@ -87,7 +87,8 @@
 	zfs_onexit_action_node_t *ap;
 
 	mutex_enter(&zo->zo_lock);
-	while ((ap = list_remove_head(&zo->zo_actions)) != NULL) {
+	while ((ap = list_head(&zo->zo_actions)) != NULL) {
+		list_remove(&zo->zo_actions, ap);
 		mutex_exit(&zo->zo_lock);
 		ap->za_func(ap->za_data);
 		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@@ -462,12 +462,14 @@
 		return (SET_ERROR(EINVAL));
 	}
 
+	const uint64_t max_blksz = zfsvfs->z_max_blksz;
+
 	/*
 	 * Pre-fault the pages to ensure slow (eg NFS) pages
 	 * don't hold up txg.
+	 * Skip this if uio contains loaned arc_buf.
 	 */
-	ssize_t pfbytes = MIN(n, DMU_MAX_ACCESS >> 1);
-	if (zfs_uio_prefaultpages(pfbytes, uio)) {
+	if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFAULT));
 	}
@@ -542,31 +544,10 @@
 			break;
 		}
 
-		uint64_t blksz;
-		if (lr->lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) {
-			if (zp->z_blksz > zfsvfs->z_max_blksz &&
-			    !ISP2(zp->z_blksz)) {
-				/*
-				 * File's blocksize is already larger than the
-				 * "recordsize" property.  Only let it grow to
-				 * the next power of 2.
-				 */
-				blksz = 1 << highbit64(zp->z_blksz);
-			} else {
-				blksz = zfsvfs->z_max_blksz;
-			}
-			blksz = MIN(blksz, P2ROUNDUP(end_size,
-			    SPA_MINBLOCKSIZE));
-			blksz = MAX(blksz, zp->z_blksz);
-		} else {
-			blksz = zp->z_blksz;
-		}
-
 		arc_buf_t *abuf = NULL;
-		ssize_t nbytes = n;
-		if (n >= blksz && woff >= zp->z_size &&
-		    P2PHASE(woff, blksz) == 0 &&
-		    (blksz >= SPA_OLD_MAXBLOCKSIZE || n < 4 * blksz)) {
+		if (n >= max_blksz && woff >= zp->z_size &&
+		    P2PHASE(woff, max_blksz) == 0 &&
+		    zp->z_blksz == max_blksz) {
 			/*
 			 * This write covers a full block.  "Borrow" a buffer
 			 * from the dmu so that we can fill it before we enter
@@ -574,26 +555,18 @@
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
+			size_t cbytes;
+
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
-			    blksz);
+			    max_blksz);
 			ASSERT(abuf != NULL);
-			ASSERT(arc_buf_size(abuf) == blksz);
-			if ((error = zfs_uiocopy(abuf->b_data, blksz,
-			    UIO_WRITE, uio, &nbytes))) {
+			ASSERT(arc_buf_size(abuf) == max_blksz);
+			if ((error = zfs_uiocopy(abuf->b_data, max_blksz,
+			    UIO_WRITE, uio, &cbytes))) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
-			ASSERT3S(nbytes, ==, blksz);
-		} else {
-			nbytes = MIN(n, (DMU_MAX_ACCESS >> 1) -
-			    P2PHASE(woff, blksz));
-			if (pfbytes < nbytes) {
-				if (zfs_uio_prefaultpages(nbytes, uio)) {
-					error = SET_ERROR(EFAULT);
-					break;
-				}
-				pfbytes = nbytes;
-			}
+			ASSERT3S(cbytes, ==, max_blksz);
 		}
 
 		/*
@@ -603,7 +576,8 @@
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
 		DB_DNODE_ENTER(db);
-		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff, nbytes);
+		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
+		    MIN(n, max_blksz));
 		DB_DNODE_EXIT(db);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
@@ -626,10 +600,31 @@
 		 * shrink down lr_length to the appropriate size.
 		 */
 		if (lr->lr_length == UINT64_MAX) {
-			zfs_grow_blocksize(zp, blksz, tx);
+			uint64_t new_blksz;
+
+			if (zp->z_blksz > max_blksz) {
+				/*
+				 * File's blocksize is already larger than the
+				 * "recordsize" property.  Only let it grow to
+				 * the next power of 2.
+				 */
+				ASSERT(!ISP2(zp->z_blksz));
+				new_blksz = MIN(end_size,
+				    1 << highbit64(zp->z_blksz));
+			} else {
+				new_blksz = MIN(end_size, max_blksz);
+			}
+			zfs_grow_blocksize(zp, new_blksz, tx);
 			zfs_rangelock_reduce(lr, woff, n);
 		}
 
+		/*
+		 * XXX - should we really limit each write to z_max_blksz?
+		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
+		 */
+		const ssize_t nbytes =
+		    MIN(n, max_blksz - P2PHASE(woff, max_blksz));
+
 		ssize_t tx_bytes;
 		if (abuf == NULL) {
 			tx_bytes = zfs_uio_resid(uio);
@@ -649,8 +644,12 @@
 				 * zfs_uio_prefaultpages, or prefaultpages may
 				 * error, and we may break the loop early.
 				 */
-				n -= tx_bytes - zfs_uio_resid(uio);
-				pfbytes -= tx_bytes - zfs_uio_resid(uio);
+				if (tx_bytes != zfs_uio_resid(uio))
+					n -= tx_bytes - zfs_uio_resid(uio);
+				if (zfs_uio_prefaultpages(MIN(n, max_blksz),
+				    uio)) {
+					break;
+				}
 				continue;
 			}
 #endif
@@ -666,6 +665,15 @@
 			}
 			tx_bytes -= zfs_uio_resid(uio);
 		} else {
+			/* Implied by abuf != NULL: */
+			ASSERT3S(n, >=, max_blksz);
+			ASSERT0(P2PHASE(woff, max_blksz));
+			/*
+			 * We can simplify nbytes to MIN(n, max_blksz) since
+			 * P2PHASE(woff, max_blksz) is 0, and knowing
+			 * n >= max_blksz lets us simplify further:
+			 */
+			ASSERT3S(nbytes, ==, max_blksz);
 			/*
 			 * Thus, we're writing a full block at a block-aligned
 			 * offset and extending the file past EOF.
@@ -750,7 +758,13 @@
 			break;
 		ASSERT3S(tx_bytes, ==, nbytes);
 		n -= nbytes;
-		pfbytes -= nbytes;
+
+		if (n > 0) {
+			if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
+				error = SET_ERROR(EFAULT);
+				break;
+			}
+		}
 	}
 
 	zfs_znode_update_vfs(zp);
diff --git a/sys/contrib/openzfs/module/zfs/zil.c b/sys/contrib/openzfs/module/zfs/zil.c
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@@ -116,12 +116,8 @@
 	{ "zil_itx_needcopy_bytes",		KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_normal_bytes",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_normal_write",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_normal_alloc",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_count",	KSTAT_DATA_UINT64 },
 	{ "zil_itx_metaslab_slog_bytes",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_write",	KSTAT_DATA_UINT64 },
-	{ "zil_itx_metaslab_slog_alloc",	KSTAT_DATA_UINT64 },
 };
 
 static zil_sums_t zil_sums_global;
@@ -150,10 +146,6 @@
 static kmem_cache_t *zil_lwb_cache;
 static kmem_cache_t *zil_zcw_cache;
 
-static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
-static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
-static itx_t *zil_itx_clone(itx_t *oitx);
-
 static int
 zil_bp_compare(const void *x1, const void *x2)
 {
@@ -249,10 +241,11 @@
  */
 static int
 zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
-    blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
+    blkptr_t *nbp, void *dst, char **end)
 {
 	zio_flag_t zio_flags = ZIO_FLAG_CANFAIL;
 	arc_flags_t aflags = ARC_FLAG_WAIT;
+	arc_buf_t *abuf = NULL;
 	zbookmark_phys_t zb;
 	int error;
 
@@ -269,7 +262,7 @@
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
-	    abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
+	    &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
 
 	if (error == 0) {
 		zio_cksum_t cksum = bp->blk_cksum;
@@ -284,23 +277,23 @@
 		 */
 		cksum.zc_word[ZIL_ZC_SEQ]++;
 
-		uint64_t size = BP_GET_LSIZE(bp);
 		if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
-			zil_chain_t *zilc = (*abuf)->b_data;
+			zil_chain_t *zilc = abuf->b_data;
 			char *lr = (char *)(zilc + 1);
+			uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
 
 			if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
-			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
-			    zilc->zc_nused < sizeof (*zilc) ||
-			    zilc->zc_nused > size) {
+			    sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				*begin = lr;
-				*end = lr + zilc->zc_nused - sizeof (*zilc);
+				ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
+				memcpy(dst, lr, len);
+				*end = (char *)dst + len;
 				*nbp = zilc->zc_next_blk;
 			}
 		} else {
-			char *lr = (*abuf)->b_data;
+			char *lr = abuf->b_data;
+			uint64_t size = BP_GET_LSIZE(bp);
 			zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
 
 			if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
@@ -308,11 +301,15 @@
 			    (zilc->zc_nused > (size - sizeof (*zilc)))) {
 				error = SET_ERROR(ECKSUM);
 			} else {
-				*begin = lr;
-				*end = lr + zilc->zc_nused;
+				ASSERT3U(zilc->zc_nused, <=,
+				    SPA_OLD_MAXBLOCKSIZE);
+				memcpy(dst, lr, zilc->zc_nused);
+				*end = (char *)dst + zilc->zc_nused;
 				*nbp = zilc->zc_next_blk;
 			}
 		}
+
+		arc_buf_destroy(abuf, &abuf);
 	}
 
 	return (error);
@@ -378,12 +375,8 @@
 	wmsum_init(&zs->zil_itx_needcopy_bytes, 0);
 	wmsum_init(&zs->zil_itx_metaslab_normal_count, 0);
 	wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0);
-	wmsum_init(&zs->zil_itx_metaslab_normal_write, 0);
-	wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0);
 	wmsum_init(&zs->zil_itx_metaslab_slog_count, 0);
 	wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0);
-	wmsum_init(&zs->zil_itx_metaslab_slog_write, 0);
-	wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0);
 }
 
 void
@@ -400,12 +393,8 @@
 	wmsum_fini(&zs->zil_itx_needcopy_bytes);
 	wmsum_fini(&zs->zil_itx_metaslab_normal_count);
 	wmsum_fini(&zs->zil_itx_metaslab_normal_bytes);
-	wmsum_fini(&zs->zil_itx_metaslab_normal_write);
-	wmsum_fini(&zs->zil_itx_metaslab_normal_alloc);
 	wmsum_fini(&zs->zil_itx_metaslab_slog_count);
 	wmsum_fini(&zs->zil_itx_metaslab_slog_bytes);
-	wmsum_fini(&zs->zil_itx_metaslab_slog_write);
-	wmsum_fini(&zs->zil_itx_metaslab_slog_alloc);
 }
 
 void
@@ -433,18 +422,10 @@
 	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_count);
 	zs->zil_itx_metaslab_normal_bytes.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes);
-	zs->zil_itx_metaslab_normal_write.value.ui64 =
-	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_write);
-	zs->zil_itx_metaslab_normal_alloc.value.ui64 =
-	    wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc);
 	zs->zil_itx_metaslab_slog_count.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_count);
 	zs->zil_itx_metaslab_slog_bytes.value.ui64 =
 	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes);
-	zs->zil_itx_metaslab_slog_write.value.ui64 =
-	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_write);
-	zs->zil_itx_metaslab_slog_alloc.value.ui64 =
-	    wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc);
 }
 
 /*
@@ -464,6 +445,7 @@
 	uint64_t blk_count = 0;
 	uint64_t lr_count = 0;
 	blkptr_t blk, next_blk = {{{{0}}}};
+	char *lrbuf, *lrp;
 	int error = 0;
 
 	/*
@@ -481,13 +463,13 @@
 	 * If the log has been claimed, stop if we encounter a sequence
 	 * number greater than the highest claimed sequence number.
 	 */
+	lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
 	zil_bp_tree_init(zilog);
 
 	for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
 		uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
 		int reclen;
-		char *lrp, *end;
-		arc_buf_t *abuf = NULL;
+		char *end = NULL;
 
 		if (blk_seq > claim_blk_seq)
 			break;
@@ -503,10 +485,8 @@
 			break;
 
 		error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
-		    &lrp, &end, &abuf);
+		    lrbuf, &end);
 		if (error != 0) {
-			if (abuf)
-				arc_buf_destroy(abuf, &abuf);
 			if (claimed) {
 				char name[ZFS_MAX_DATASET_NAME_LEN];
 
@@ -519,25 +499,20 @@
 			break;
 		}
 
-		for (; lrp < end; lrp += reclen) {
+		for (lrp = lrbuf; lrp < end; lrp += reclen) {
 			lr_t *lr = (lr_t *)lrp;
 			reclen = lr->lrc_reclen;
 			ASSERT3U(reclen, >=, sizeof (lr_t));
-			if (lr->lrc_seq > claim_lr_seq) {
-				arc_buf_destroy(abuf, &abuf);
+			if (lr->lrc_seq > claim_lr_seq)
 				goto done;
-			}
 
 			error = parse_lr_func(zilog, lr, arg, txg);
-			if (error != 0) {
-				arc_buf_destroy(abuf, &abuf);
+			if (error != 0)
 				goto done;
-			}
 			ASSERT3U(max_lr_seq, <, lr->lrc_seq);
 			max_lr_seq = lr->lrc_seq;
 			lr_count++;
 		}
-		arc_buf_destroy(abuf, &abuf);
 	}
 done:
 	zilog->zl_parse_error = error;
@@ -547,6 +522,7 @@
 	zilog->zl_parse_lr_count = lr_count;
 
 	zil_bp_tree_fini(zilog);
+	zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
 
 	return (error);
 }
@@ -771,21 +747,20 @@
 	lwb->lwb_blk = *bp;
 	lwb->lwb_fastwrite = fastwrite;
 	lwb->lwb_slog = slog;
-	lwb->lwb_indirect = B_FALSE;
-	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
-		lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t);
-		lwb->lwb_sz = BP_GET_LSIZE(bp);
-	} else {
-		lwb->lwb_nused = lwb->lwb_nfilled = 0;
-		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
-	}
 	lwb->lwb_state = LWB_STATE_CLOSED;
 	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
+	lwb->lwb_max_txg = txg;
 	lwb->lwb_write_zio = NULL;
 	lwb->lwb_root_zio = NULL;
 	lwb->lwb_issued_timestamp = 0;
 	lwb->lwb_issued_txg = 0;
-	lwb->lwb_max_txg = txg;
+	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
+		lwb->lwb_nused = sizeof (zil_chain_t);
+		lwb->lwb_sz = BP_GET_LSIZE(bp);
+	} else {
+		lwb->lwb_nused = 0;
+		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
+	}
 
 	mutex_enter(&zilog->zl_lock);
 	list_insert_tail(&zilog->zl_lwb_list, lwb);
@@ -799,8 +774,8 @@
 {
 	ASSERT(MUTEX_HELD(&zilog->zl_lock));
 	ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock));
-	VERIFY(list_is_empty(&lwb->lwb_waiters));
-	VERIFY(list_is_empty(&lwb->lwb_itxs));
+	ASSERT(list_is_empty(&lwb->lwb_waiters));
+	ASSERT(list_is_empty(&lwb->lwb_itxs));
 	ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
 	ASSERT3P(lwb->lwb_write_zio, ==, NULL);
 	ASSERT3P(lwb->lwb_root_zio, ==, NULL);
@@ -1398,14 +1373,9 @@
 	zil_commit_waiter_t *zcw;
 	itx_t *itx;
 	uint64_t txg;
-	list_t itxs, waiters;
 
 	spa_config_exit(zilog->zl_spa, SCL_STATE, lwb);
 
-	list_create(&itxs, sizeof (itx_t), offsetof(itx_t, itx_node));
-	list_create(&waiters, sizeof (zil_commit_waiter_t),
-	    offsetof(zil_commit_waiter_t, zcw_node));
-
 	hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp;
 
 	mutex_enter(&zilog->zl_lock);
@@ -1414,6 +1384,9 @@
 
 	lwb->lwb_root_zio = NULL;
 
+	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
+	lwb->lwb_state = LWB_STATE_FLUSH_DONE;
+
 	if (zilog->zl_last_lwb_opened == lwb) {
 		/*
 		 * Remember the highest committed log sequence number
@@ -1424,22 +1397,13 @@
 		zilog->zl_commit_lr_seq = zilog->zl_lr_seq;
 	}
 
-	list_move_tail(&itxs, &lwb->lwb_itxs);
-	list_move_tail(&waiters, &lwb->lwb_waiters);
-	txg = lwb->lwb_issued_txg;
-
-	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
-	lwb->lwb_state = LWB_STATE_FLUSH_DONE;
-
-	mutex_exit(&zilog->zl_lock);
-
-	while ((itx = list_remove_head(&itxs)) != NULL)
+	while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL)
 		zil_itx_destroy(itx);
-	list_destroy(&itxs);
 
-	while ((zcw = list_remove_head(&waiters)) != NULL) {
+	while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) {
 		mutex_enter(&zcw->zcw_lock);
 
+		ASSERT3P(zcw->zcw_lwb, ==, lwb);
 		zcw->zcw_lwb = NULL;
 		/*
 		 * We expect any ZIO errors from child ZIOs to have been
@@ -1464,9 +1428,11 @@
 
 		mutex_exit(&zcw->zcw_lock);
 	}
-	list_destroy(&waiters);
+
+	mutex_exit(&zilog->zl_lock);
 
 	mutex_enter(&zilog->zl_lwb_io_lock);
+	txg = lwb->lwb_issued_txg;
 	ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0);
 	zilog->zl_lwb_inflight[txg & TXG_MASK]--;
 	if (zilog->zl_lwb_inflight[txg & TXG_MASK] == 0)
@@ -1700,41 +1666,46 @@
 	EQUIV(lwb->lwb_root_zio == NULL, lwb->lwb_state == LWB_STATE_CLOSED);
 	EQUIV(lwb->lwb_root_zio != NULL, lwb->lwb_state == LWB_STATE_OPENED);
 
-	if (lwb->lwb_root_zio != NULL)
-		return;
-
-	lwb->lwb_root_zio = zio_root(zilog->zl_spa,
-	    zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
-
-	abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
-	    BP_GET_LSIZE(&lwb->lwb_blk));
-
-	if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
-		prio = ZIO_PRIORITY_SYNC_WRITE;
-	else
-		prio = ZIO_PRIORITY_ASYNC_WRITE;
-
 	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
 	mutex_enter(&zilog->zl_lock);
-	if (!lwb->lwb_fastwrite) {
-		metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
-		lwb->lwb_fastwrite = 1;
-	}
+	if (lwb->lwb_root_zio == NULL) {
+		abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
+		    BP_GET_LSIZE(&lwb->lwb_blk));
+
+		if (!lwb->lwb_fastwrite) {
+			metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
+			lwb->lwb_fastwrite = 1;
+		}
+
+		if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
+			prio = ZIO_PRIORITY_SYNC_WRITE;
+		else
+			prio = ZIO_PRIORITY_ASYNC_WRITE;
 
-	lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
-	    &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
-	    zil_lwb_write_done, lwb, prio,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
+		lwb->lwb_root_zio = zio_root(zilog->zl_spa,
+		    zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
+		ASSERT3P(lwb->lwb_root_zio, !=, NULL);
 
-	lwb->lwb_state = LWB_STATE_OPENED;
+		lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio,
+		    zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd,
+		    BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb,
+		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb);
+		ASSERT3P(lwb->lwb_write_zio, !=, NULL);
 
-	zil_lwb_set_zio_dependency(zilog, lwb);
-	zilog->zl_last_lwb_opened = lwb;
+		lwb->lwb_state = LWB_STATE_OPENED;
+
+		zil_lwb_set_zio_dependency(zilog, lwb);
+		zilog->zl_last_lwb_opened = lwb;
+	}
 	mutex_exit(&zilog->zl_lock);
+
+	ASSERT3P(lwb->lwb_root_zio, !=, NULL);
+	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
+	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
 }
 
 /*
@@ -1765,11 +1736,11 @@
 static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
 
 /*
- * Close the log block for being issued and allocate the next one.
- * Has to be called under zl_issuer_lock to chain more lwbs.
+ * Start a log block write and advance to the next log block.
+ * Calls are serialized.
  */
 static lwb_t *
-zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
+zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 {
 	lwb_t *nlwb = NULL;
 	zil_chain_t *zilc;
@@ -1777,7 +1748,7 @@
 	blkptr_t *bp;
 	dmu_tx_t *tx;
 	uint64_t txg;
-	uint64_t zil_blksz;
+	uint64_t zil_blksz, wsz;
 	int i, error;
 	boolean_t slog;
 
@@ -1786,17 +1757,16 @@
 	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
 
-	/*
-	 * If this lwb includes indirect writes, we have to commit before
-	 * creating the transaction, otherwise we may end up in dead lock.
-	 */
-	if (lwb->lwb_indirect) {
-		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
-		    itx = list_next(&lwb->lwb_itxs, itx))
-			zil_lwb_commit(zilog, lwb, itx);
-		lwb->lwb_nused = lwb->lwb_nfilled;
+	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
+		zilc = (zil_chain_t *)lwb->lwb_buf;
+		bp = &zilc->zc_next_blk;
+	} else {
+		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
+		bp = &zilc->zc_next_blk;
 	}
 
+	ASSERT(lwb->lwb_nused <= lwb->lwb_sz);
+
 	/*
 	 * Allocate the next block and save its address in this block
 	 * before writing it in order to establish the log chain.
@@ -1844,18 +1814,19 @@
 	zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
 	for (i = 0; i < ZIL_PREV_BLKS; i++)
 		zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
-	DTRACE_PROBE3(zil__block__size, zilog_t *, zilog,
-	    uint64_t, zil_blksz,
-	    uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
 	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
 
-	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
-		zilc = (zil_chain_t *)lwb->lwb_buf;
-	else
-		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
-	bp = &zilc->zc_next_blk;
 	BP_ZERO(bp);
 	error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
+	if (slog) {
+		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
+		    lwb->lwb_nused);
+	} else {
+		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
+		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
+		    lwb->lwb_nused);
+	}
 	if (error == 0) {
 		ASSERT3U(bp->blk_birth, ==, txg);
 		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
@@ -1867,68 +1838,17 @@
 		nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
 	}
 
-	lwb->lwb_state = LWB_STATE_ISSUED;
-
-	dmu_tx_commit(tx);
-
-	/*
-	 * We need to acquire the config lock for the lwb to issue it later.
-	 * However, if we already have a queue of closed parent lwbs already
-	 * holding the config lock (but not yet issued), we can't block here
-	 * waiting on the lock or we will deadlock.  In that case we must
-	 * first issue to parent IOs before waiting on the lock.
-	 */
-	if (ilwbs && !list_is_empty(ilwbs)) {
-		if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
-			lwb_t *tlwb;
-			while ((tlwb = list_remove_head(ilwbs)) != NULL)
-				zil_lwb_write_issue(zilog, tlwb);
-			spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
-		}
-	} else {
-		spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
-	}
-
-	if (ilwbs)
-		list_insert_tail(ilwbs, lwb);
-
-	/*
-	 * If there was an allocation failure then nlwb will be null which
-	 * forces a txg_wait_synced().
-	 */
-	return (nlwb);
-}
-
-/*
- * Finalize previously closed block and issue the write zio.
- * Does not require locking.
- */
-static void
-zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
-{
-	zil_chain_t *zilc;
-	int wsz;
-
-	/* Actually fill the lwb with the data if not yet. */
-	if (!lwb->lwb_indirect) {
-		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
-		    itx = list_next(&lwb->lwb_itxs, itx))
-			zil_lwb_commit(zilog, lwb, itx);
-		lwb->lwb_nused = lwb->lwb_nfilled;
-	}
-
 	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
 		/* For Slim ZIL only write what is used. */
-		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int);
-		ASSERT3S(wsz, <=, lwb->lwb_sz);
+		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
+		ASSERT3U(wsz, <=, lwb->lwb_sz);
 		zio_shrink(lwb->lwb_write_zio, wsz);
 		wsz = lwb->lwb_write_zio->io_size;
 
-		zilc = (zil_chain_t *)lwb->lwb_buf;
 	} else {
 		wsz = lwb->lwb_sz;
-		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
 	}
+
 	zilc->zc_pad = 0;
 	zilc->zc_nused = lwb->lwb_nused;
 	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
@@ -1938,28 +1858,22 @@
 	 */
 	memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);
 
-	if (lwb->lwb_slog) {
-		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_bytes,
-		    lwb->lwb_nused);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_write,
-		    wsz);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_slog_alloc,
-		    BP_GET_LSIZE(&lwb->lwb_blk));
-	} else {
-		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_normal_count);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_bytes,
-		    lwb->lwb_nused);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_write,
-		    wsz);
-		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
-		    BP_GET_LSIZE(&lwb->lwb_blk));
-	}
-	ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
+	spa_config_enter(zilog->zl_spa, SCL_STATE, lwb, RW_READER);
+
 	zil_lwb_add_block(lwb, &lwb->lwb_blk);
 	lwb->lwb_issued_timestamp = gethrtime();
+	lwb->lwb_state = LWB_STATE_ISSUED;
+
 	zio_nowait(lwb->lwb_root_zio);
 	zio_nowait(lwb->lwb_write_zio);
+
+	dmu_tx_commit(tx);
+
+	/*
+	 * If there was an allocation failure then nlwb will be null which
+	 * forces a txg_wait_synced().
+	 */
+	return (nlwb);
 }
 
 /*
@@ -1995,19 +1909,13 @@
 	    sizeof (lr_write_t));
 }
 
-/*
- * Estimate space needed in the lwb for the itx.  Allocate more lwbs or
- * split the itx as needed, but don't touch the actual transaction data.
- * Has to be called under zl_issuer_lock to call zil_lwb_write_close()
- * to chain more lwbs.
- */
 static lwb_t *
-zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
+zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 {
-	itx_t *citx;
-	lr_t *lr, *clr;
-	lr_write_t *lrw;
-	uint64_t dlen, dnow, lwb_sp, reclen, max_log_data;
+	lr_t *lrcb, *lrc;
+	lr_write_t *lrwb, *lrw;
+	char *lr_buf;
+	uint64_t dlen, dnow, dpad, lwb_sp, reclen, txg, max_log_data;
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 	ASSERT3P(lwb, !=, NULL);
@@ -2015,8 +1923,8 @@
 
 	zil_lwb_write_open(zilog, lwb);
 
-	lr = &itx->itx_lr;
-	lrw = (lr_write_t *)lr;
+	lrc = &itx->itx_lr;
+	lrw = (lr_write_t *)lrc;
 
 	/*
 	 * A commit itx doesn't represent any on-disk state; instead
@@ -2030,23 +1938,24 @@
 	 *
 	 * For more details, see the comment above zil_commit().
 	 */
-	if (lr->lrc_txtype == TX_COMMIT) {
+	if (lrc->lrc_txtype == TX_COMMIT) {
 		mutex_enter(&zilog->zl_lock);
 		zil_commit_waiter_link_lwb(itx->itx_private, lwb);
 		itx->itx_private = NULL;
 		mutex_exit(&zilog->zl_lock);
-		list_insert_tail(&lwb->lwb_itxs, itx);
 		return (lwb);
 	}
 
-	if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
+	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
 		dlen = P2ROUNDUP_TYPED(
 		    lrw->lr_length, sizeof (uint64_t), uint64_t);
+		dpad = dlen - lrw->lr_length;
 	} else {
-		dlen = 0;
+		dlen = dpad = 0;
 	}
-	reclen = lr->lrc_reclen;
+	reclen = lrc->lrc_reclen;
 	zilog->zl_cur_used += (reclen + dlen);
+	txg = lrc->lrc_txg;
 
 cont:
 	/*
@@ -2059,7 +1968,7 @@
 	    lwb_sp < zil_max_waste_space(zilog) &&
 	    (dlen % max_log_data == 0 ||
 	    lwb_sp < reclen + dlen % max_log_data))) {
-		lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
+		lwb = zil_lwb_write_issue(zilog, lwb);
 		if (lwb == NULL)
 			return (NULL);
 		zil_lwb_write_open(zilog, lwb);
@@ -2078,99 +1987,19 @@
 	}
 
 	dnow = MIN(dlen, lwb_sp - reclen);
-	if (dlen > dnow) {
-		ASSERT3U(lr->lrc_txtype, ==, TX_WRITE);
-		ASSERT3U(itx->itx_wr_state, ==, WR_NEED_COPY);
-		citx = zil_itx_clone(itx);
-		clr = &citx->itx_lr;
-		lr_write_t *clrw = (lr_write_t *)clr;
-		clrw->lr_length = dnow;
-		lrw->lr_offset += dnow;
-		lrw->lr_length -= dnow;
-	} else {
-		citx = itx;
-		clr = lr;
-	}
-
-	/*
-	 * We're actually making an entry, so update lrc_seq to be the
-	 * log record sequence number.  Note that this is generally not
-	 * equal to the itx sequence number because not all transactions
-	 * are synchronous, and sometimes spa_sync() gets there first.
-	 */
-	clr->lrc_seq = ++zilog->zl_lr_seq;
-
-	lwb->lwb_nused += reclen + dnow;
-	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
-	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
-
-	zil_lwb_add_txg(lwb, lr->lrc_txg);
-	list_insert_tail(&lwb->lwb_itxs, citx);
-
-	dlen -= dnow;
-	if (dlen > 0) {
-		zilog->zl_cur_used += reclen;
-		goto cont;
-	}
-
-	/*
-	 * We have to really issue all queued LWBs before we may have to
-	 * wait for a txg sync.  Otherwise we may end up in a dead lock.
-	 */
-	if (lr->lrc_txtype == TX_WRITE) {
-		boolean_t frozen = lr->lrc_txg > spa_freeze_txg(zilog->zl_spa);
-		if (frozen || itx->itx_wr_state == WR_INDIRECT) {
-			lwb_t *tlwb;
-			while ((tlwb = list_remove_head(ilwbs)) != NULL)
-				zil_lwb_write_issue(zilog, tlwb);
-		}
-		if (itx->itx_wr_state == WR_INDIRECT)
-			lwb->lwb_indirect = B_TRUE;
-		if (frozen)
-			txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
-	}
-
-	return (lwb);
-}
-
-/*
- * Fill the actual transaction data into the lwb, following zil_lwb_assign().
- * Does not require locking.
- */
-static void
-zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
-{
-	lr_t *lr, *lrb;
-	lr_write_t *lrw, *lrwb;
-	char *lr_buf;
-	uint64_t dlen, reclen;
-
-	lr = &itx->itx_lr;
-	lrw = (lr_write_t *)lr;
-
-	if (lr->lrc_txtype == TX_COMMIT)
-		return;
-
-	if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
-		dlen = P2ROUNDUP_TYPED(
-		    lrw->lr_length, sizeof (uint64_t), uint64_t);
-	} else {
-		dlen = 0;
-	}
-	reclen = lr->lrc_reclen;
-	ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled);
-
-	lr_buf = lwb->lwb_buf + lwb->lwb_nfilled;
-	memcpy(lr_buf, lr, reclen);
-	lrb = (lr_t *)lr_buf;		/* Like lr, but inside lwb. */
-	lrwb = (lr_write_t *)lrb;	/* Like lrw, but inside lwb. */
+	lr_buf = lwb->lwb_buf + lwb->lwb_nused;
+	memcpy(lr_buf, lrc, reclen);
+	lrcb = (lr_t *)lr_buf;		/* Like lrc, but inside lwb. */
+	lrwb = (lr_write_t *)lrcb;	/* Like lrw, but inside lwb. */
 
 	ZIL_STAT_BUMP(zilog, zil_itx_count);
 
 	/*
 	 * If it's a write, fetch the data or get its blkptr as appropriate.
 	 */
-	if (lr->lrc_txtype == TX_WRITE) {
+	if (lrc->lrc_txtype == TX_WRITE) {
+		if (txg > spa_freeze_txg(zilog->zl_spa))
+			txg_wait_synced(zilog->zl_dmu_pool, txg);
 		if (itx->itx_wr_state == WR_COPIED) {
 			ZIL_STAT_BUMP(zilog, zil_itx_copied_count);
 			ZIL_STAT_INCR(zilog, zil_itx_copied_bytes,
@@ -2181,10 +2010,14 @@
 
 			if (itx->itx_wr_state == WR_NEED_COPY) {
 				dbuf = lr_buf + reclen;
-				lrb->lrc_reclen += dlen;
+				lrcb->lrc_reclen += dnow;
+				if (lrwb->lr_length > dnow)
+					lrwb->lr_length = dnow;
+				lrw->lr_offset += dnow;
+				lrw->lr_length -= dnow;
 				ZIL_STAT_BUMP(zilog, zil_itx_needcopy_count);
 				ZIL_STAT_INCR(zilog, zil_itx_needcopy_bytes,
-				    dlen);
+				    dnow);
 			} else {
 				ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT);
 				dbuf = NULL;
@@ -2211,11 +2044,9 @@
 			error = zilog->zl_get_data(itx->itx_private,
 			    itx->itx_gen, lrwb, dbuf, lwb,
 			    lwb->lwb_write_zio);
-			if (dbuf != NULL && error == 0) {
+			if (dbuf != NULL && error == 0 && dnow == dlen)
 				/* Zero any padding bytes in the last block. */
-				memset((char *)dbuf + lrwb->lr_length, 0,
-				    dlen - lrwb->lr_length);
-			}
+				memset((char *)dbuf + lrwb->lr_length, 0, dpad);
 
 			/*
 			 * Typically, the only return values we should see from
@@ -2243,26 +2074,39 @@
 				    error);
 				zfs_fallthrough;
 			case EIO:
-				if (lwb->lwb_indirect) {
-					txg_wait_synced(zilog->zl_dmu_pool,
-					    lr->lrc_txg);
-				} else {
-					lwb->lwb_write_zio->io_error = error;
-				}
+				txg_wait_synced(zilog->zl_dmu_pool, txg);
 				zfs_fallthrough;
 			case ENOENT:
 				zfs_fallthrough;
 			case EEXIST:
 				zfs_fallthrough;
 			case EALREADY:
-				return;
+				return (lwb);
 			}
 		}
 	}
 
-	lwb->lwb_nfilled += reclen + dlen;
-	ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused);
-	ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t)));
+	/*
+	 * We're actually making an entry, so update lrc_seq to be the
+	 * log record sequence number.  Note that this is generally not
+	 * equal to the itx sequence number because not all transactions
+	 * are synchronous, and sometimes spa_sync() gets there first.
+	 */
+	lrcb->lrc_seq = ++zilog->zl_lr_seq;
+	lwb->lwb_nused += reclen + dnow;
+
+	zil_lwb_add_txg(lwb, txg);
+
+	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
+	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
+
+	dlen -= dnow;
+	if (dlen > 0) {
+		zilog->zl_cur_used += reclen;
+		goto cont;
+	}
+
+	return (lwb);
 }
 
 itx_t *
@@ -2287,16 +2131,6 @@
 	return (itx);
 }
 
-static itx_t *
-zil_itx_clone(itx_t *oitx)
-{
-	itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
-	memcpy(itx, oitx, oitx->itx_size);
-	itx->itx_callback = NULL;
-	itx->itx_callback_data = NULL;
-	return (itx);
-}
-
 void
 zil_itx_destroy(itx_t *itx)
 {
@@ -2328,7 +2162,7 @@
 		/*
 		 * In the general case, commit itxs will not be found
 		 * here, as they'll be committed to an lwb via
-		 * zil_lwb_assign(), and free'd in that function. Having
+		 * zil_lwb_commit(), and free'd in that function. Having
 		 * said that, it is still possible for commit itxs to be
 		 * found here, due to the following race:
 		 *
@@ -2546,10 +2380,10 @@
  * This function will traverse the queue of itxs that need to be
  * committed, and move them onto the ZIL's zl_itx_commit_list.
  */
-static uint64_t
+static void
 zil_get_commit_list(zilog_t *zilog)
 {
-	uint64_t otxg, txg, wtxg = 0;
+	uint64_t otxg, txg;
 	list_t *commit_list = &zilog->zl_itx_commit_list;
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
@@ -2583,22 +2417,10 @@
 		 */
 		ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
 		    spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
-		list_t *sync_list = &itxg->itxg_itxs->i_sync_list;
-		if (unlikely(zilog->zl_suspend > 0)) {
-			/*
-			 * ZIL was just suspended, but we lost the race.
-			 * Allow all earlier itxs to be committed, but ask
-			 * caller to do txg_wait_synced(txg) for any new.
-			 */
-			if (!list_is_empty(sync_list))
-				wtxg = MAX(wtxg, txg);
-		} else {
-			list_move_tail(commit_list, sync_list);
-		}
+		list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
 
 		mutex_exit(&itxg->itxg_lock);
 	}
-	return (wtxg);
 }
 
 /*
@@ -2739,7 +2561,7 @@
  * lwb will be issued to the zio layer to be written to disk.
  */
 static void
-zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
+zil_process_commit_list(zilog_t *zilog)
 {
 	spa_t *spa = zilog->zl_spa;
 	list_t nolwb_itxs;
@@ -2841,23 +2663,18 @@
 		 */
 		if (frozen || !synced || lrc->lrc_txtype == TX_COMMIT) {
 			if (lwb != NULL) {
-				lwb = zil_lwb_assign(zilog, lwb, itx, ilwbs);
-				if (lwb == NULL) {
+				lwb = zil_lwb_commit(zilog, itx, lwb);
+
+				if (lwb == NULL)
 					list_insert_tail(&nolwb_itxs, itx);
-				} else if ((zcw->zcw_lwb != NULL &&
-				    zcw->zcw_lwb != lwb) || zcw->zcw_done) {
-					/*
-					 * Our lwb is done, leave the rest of
-					 * itx list to somebody else who care.
-					 */
-					first = B_FALSE;
-					break;
-				}
+				else
+					list_insert_tail(&lwb->lwb_itxs, itx);
 			} else {
 				if (lrc->lrc_txtype == TX_COMMIT) {
 					zil_commit_waiter_link_nolwb(
 					    itx->itx_private, &nolwb_waiters);
 				}
+
 				list_insert_tail(&nolwb_itxs, itx);
 			}
 		} else {
@@ -2873,8 +2690,6 @@
 		 * the ZIL write pipeline; see the comment within
 		 * zil_commit_writer_stall() for more details.
 		 */
-		while ((lwb = list_remove_head(ilwbs)) != NULL)
-			zil_lwb_write_issue(zilog, lwb);
 		zil_commit_writer_stall(zilog);
 
 		/*
@@ -2920,13 +2735,13 @@
 		 * on the system, such that this function will be
 		 * immediately called again (not necessarily by the same
 		 * thread) and this lwb's zio will be issued via
-		 * zil_lwb_assign(). This way, the lwb is guaranteed to
+		 * zil_lwb_commit(). This way, the lwb is guaranteed to
 		 * be "full" when it is issued to disk, and we'll make
 		 * use of the lwb's size the best we can.
 		 *
 		 * 2. If there isn't sufficient ZIL activity occurring on
 		 * the system, such that this lwb's zio isn't issued via
-		 * zil_lwb_assign(), zil_commit_waiter() will issue the
+		 * zil_lwb_commit(), zil_commit_waiter() will issue the
 		 * lwb's zio. If this occurs, the lwb is not guaranteed
 		 * to be "full" by the time its zio is issued, and means
 		 * the size of the lwb was "too large" given the amount
@@ -2958,14 +2773,10 @@
 			    zfs_commit_timeout_pct / 100;
 			if (sleep < zil_min_commit_timeout ||
 			    lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
-				lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
+				lwb = zil_lwb_write_issue(zilog, lwb);
 				zilog->zl_cur_used = 0;
-				if (lwb == NULL) {
-					while ((lwb = list_remove_head(ilwbs))
-					    != NULL)
-						zil_lwb_write_issue(zilog, lwb);
+				if (lwb == NULL)
 					zil_commit_writer_stall(zilog);
-				}
 			}
 		}
 	}
@@ -2985,17 +2796,12 @@
  * not issued, we rely on future calls to zil_commit_writer() to issue
  * the lwb, or the timeout mechanism found in zil_commit_waiter().
  */
-static uint64_t
+static void
 zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
 {
-	list_t ilwbs;
-	lwb_t *lwb;
-	uint64_t wtxg = 0;
-
 	ASSERT(!MUTEX_HELD(&zilog->zl_lock));
 	ASSERT(spa_writeable(zilog->zl_spa));
 
-	list_create(&ilwbs, sizeof (lwb_t), offsetof(lwb_t, lwb_issue_node));
 	mutex_enter(&zilog->zl_issuer_lock);
 
 	if (zcw->zcw_lwb != NULL || zcw->zcw_done) {
@@ -3020,16 +2826,12 @@
 
 	ZIL_STAT_BUMP(zilog, zil_commit_writer_count);
 
-	wtxg = zil_get_commit_list(zilog);
+	zil_get_commit_list(zilog);
 	zil_prune_commit_list(zilog);
-	zil_process_commit_list(zilog, zcw, &ilwbs);
+	zil_process_commit_list(zilog);
 
 out:
 	mutex_exit(&zilog->zl_issuer_lock);
-	while ((lwb = list_remove_head(&ilwbs)) != NULL)
-		zil_lwb_write_issue(zilog, lwb);
-	list_destroy(&ilwbs);
-	return (wtxg);
 }
 
 static void
@@ -3056,7 +2858,7 @@
 		return;
 
 	/*
-	 * In order to call zil_lwb_write_close() we must hold the
+	 * In order to call zil_lwb_write_issue() we must hold the
 	 * zilog's "zl_issuer_lock". We can't simply acquire that lock,
 	 * since we're already holding the commit waiter's "zcw_lock",
 	 * and those two locks are acquired in the opposite order
@@ -3074,10 +2876,8 @@
 	 * the waiter is marked "done"), so without this check we could
 	 * wind up with a use-after-free error below.
 	 */
-	if (zcw->zcw_done) {
-		lwb = NULL;
+	if (zcw->zcw_done)
 		goto out;
-	}
 
 	ASSERT3P(lwb, ==, zcw->zcw_lwb);
 
@@ -3096,17 +2896,15 @@
 	 * if it's ISSUED or OPENED, and block any other threads that might
 	 * attempt to issue this lwb. For that reason we hold the
 	 * zl_issuer_lock when checking the lwb_state; we must not call
-	 * zil_lwb_write_close() if the lwb had already been issued.
+	 * zil_lwb_write_issue() if the lwb had already been issued.
 	 *
 	 * See the comment above the lwb_state_t structure definition for
 	 * more details on the lwb states, and locking requirements.
 	 */
 	if (lwb->lwb_state == LWB_STATE_ISSUED ||
 	    lwb->lwb_state == LWB_STATE_WRITE_DONE ||
-	    lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
-		lwb = NULL;
+	    lwb->lwb_state == LWB_STATE_FLUSH_DONE)
 		goto out;
-	}
 
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
 
@@ -3116,7 +2914,7 @@
 	 * since we've reached the commit waiter's timeout and it still
 	 * hasn't been issued.
 	 */
-	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
+	lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
 
 	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
 
@@ -3136,7 +2934,7 @@
 
 	if (nlwb == NULL) {
 		/*
-		 * When zil_lwb_write_close() returns NULL, this
+		 * When zil_lwb_write_issue() returns NULL, this
 		 * indicates zio_alloc_zil() failed to allocate the
 		 * "next" lwb on-disk. When this occurs, the ZIL write
 		 * pipeline must be stalled; see the comment within the
@@ -3158,16 +2956,12 @@
 		 *   lock, which occurs prior to calling dmu_tx_commit()
 		 */
 		mutex_exit(&zcw->zcw_lock);
-		zil_lwb_write_issue(zilog, lwb);
-		lwb = NULL;
 		zil_commit_writer_stall(zilog);
 		mutex_enter(&zcw->zcw_lock);
 	}
 
 out:
 	mutex_exit(&zilog->zl_issuer_lock);
-	if (lwb)
-		zil_lwb_write_issue(zilog, lwb);
 	ASSERT(MUTEX_HELD(&zcw->zcw_lock));
 }
 
@@ -3182,7 +2976,7 @@
  *    waited "long enough" and the lwb is still in the "open" state.
  *
  * Given a sufficient amount of itxs being generated and written using
- * the ZIL, the lwb's zio will be issued via the zil_lwb_assign()
+ * the ZIL, the lwb's zio will be issued via the zil_lwb_commit()
  * function. If this does not occur, this secondary responsibility will
  * ensure the lwb is issued even if there is not other synchronous
  * activity on the system.
@@ -3545,7 +3339,7 @@
 	zil_commit_waiter_t *zcw = zil_alloc_commit_waiter();
 	zil_commit_itx_assign(zilog, zcw);
 
-	uint64_t wtxg = zil_commit_writer(zilog, zcw);
+	zil_commit_writer(zilog, zcw);
 	zil_commit_waiter(zilog, zcw);
 
 	if (zcw->zcw_zio_error != 0) {
@@ -3560,8 +3354,6 @@
 		DTRACE_PROBE2(zil__commit__io__error,
 		    zilog_t *, zilog, zil_commit_waiter_t *, zcw);
 		txg_wait_synced(zilog->zl_dmu_pool, 0);
-	} else if (wtxg != 0) {
-		txg_wait_synced(zilog->zl_dmu_pool, wtxg);
 	}
 
 	zil_free_commit_waiter(zcw);
@@ -3864,7 +3656,7 @@
 	/*
 	 * zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends
 	 * on the time when the dmu_tx transaction is assigned in
-	 * zil_lwb_write_close().
+	 * zil_lwb_write_issue().
 	 */
 	mutex_enter(&zilog->zl_lwb_io_lock);
 	txg = MAX(zilog->zl_lwb_max_issued_txg, txg);
diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@@ -626,6 +626,8 @@
 void
 zio_add_child(zio_t *pio, zio_t *cio)
 {
+	zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
+
 	/*
 	 * Logical I/Os can have logical, gang, or vdev children.
 	 * Gang I/Os can have gang or vdev children.
@@ -634,7 +636,6 @@
 	 */
 	ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
 
-	zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
 	zl->zl_parent = pio;
 	zl->zl_child = cio;
 
@@ -643,45 +644,16 @@
 
 	ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
 
-	uint64_t *countp = pio->io_children[cio->io_child_type];
 	for (int w = 0; w < ZIO_WAIT_TYPES; w++)
-		countp[w] += !cio->io_state[w];
+		pio->io_children[cio->io_child_type][w] += !cio->io_state[w];
 
 	list_insert_head(&pio->io_child_list, zl);
 	list_insert_head(&cio->io_parent_list, zl);
 
-	mutex_exit(&cio->io_lock);
-	mutex_exit(&pio->io_lock);
-}
-
-void
-zio_add_child_first(zio_t *pio, zio_t *cio)
-{
-	/*
-	 * Logical I/Os can have logical, gang, or vdev children.
-	 * Gang I/Os can have gang or vdev children.
-	 * Vdev I/Os can only have vdev children.
-	 * The following ASSERT captures all of these constraints.
-	 */
-	ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
-
-	zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
-	zl->zl_parent = pio;
-	zl->zl_child = cio;
-
-	ASSERT(list_is_empty(&cio->io_parent_list));
-	list_insert_head(&cio->io_parent_list, zl);
-
-	mutex_enter(&pio->io_lock);
-
-	ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
-
-	uint64_t *countp = pio->io_children[cio->io_child_type];
-	for (int w = 0; w < ZIO_WAIT_TYPES; w++)
-		countp[w] += !cio->io_state[w];
-
-	list_insert_head(&pio->io_child_list, zl);
+	pio->io_child_count++;
+	cio->io_parent_count++;
 
+	mutex_exit(&cio->io_lock);
 	mutex_exit(&pio->io_lock);
 }
 
@@ -697,6 +669,9 @@
 	list_remove(&pio->io_child_list, zl);
 	list_remove(&cio->io_parent_list, zl);
 
+	pio->io_child_count--;
+	cio->io_parent_count--;
+
 	mutex_exit(&cio->io_lock);
 	mutex_exit(&pio->io_lock);
 	kmem_cache_free(zio_link_cache, zl);
@@ -871,14 +846,12 @@
 		zio->io_child_type = ZIO_CHILD_LOGICAL;
 
 	if (bp != NULL) {
+		zio->io_bp = (blkptr_t *)bp;
+		zio->io_bp_copy = *bp;
+		zio->io_bp_orig = *bp;
 		if (type != ZIO_TYPE_WRITE ||
-		    zio->io_child_type == ZIO_CHILD_DDT) {
-			zio->io_bp_copy = *bp;
+		    zio->io_child_type == ZIO_CHILD_DDT)
 			zio->io_bp = &zio->io_bp_copy;	/* so caller can free */
-		} else {
-			zio->io_bp = (blkptr_t *)bp;
-		}
-		zio->io_bp_orig = *bp;
 		if (zio->io_child_type == ZIO_CHILD_LOGICAL)
 			zio->io_logical = zio;
 		if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
@@ -913,7 +886,7 @@
 			zio->io_logical = pio->io_logical;
 		if (zio->io_child_type == ZIO_CHILD_GANG)
 			zio->io_gang_leader = pio->io_gang_leader;
-		zio_add_child_first(pio, zio);
+		zio_add_child(pio, zio);
 	}
 
 	taskq_init_ent(&zio->io_tqent);
@@ -1189,8 +1162,9 @@
 zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
     abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp,
     zio_done_func_t *ready, zio_done_func_t *children_ready,
-    zio_done_func_t *done, void *private, zio_priority_t priority,
-    zio_flag_t flags, const zbookmark_phys_t *zb)
+    zio_done_func_t *physdone, zio_done_func_t *done,
+    void *private, zio_priority_t priority, zio_flag_t flags,
+    const zbookmark_phys_t *zb)
 {
 	zio_t *zio;
 
@@ -1210,6 +1184,7 @@
 
 	zio->io_ready = ready;
 	zio->io_children_ready = children_ready;
+	zio->io_physdone = physdone;
 	zio->io_prop = *zp;
 
 	/*
@@ -1542,11 +1517,16 @@
 		flags &= ~ZIO_FLAG_IO_ALLOCATING;
 	}
 
+
 	zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
 	    done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
 	    ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
 	ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
 
+	zio->io_physdone = pio->io_physdone;
+	if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
+		zio->io_logical->io_phys_children++;
+
 	return (zio);
 }
 
@@ -1634,8 +1614,15 @@
 		abd_return_buf_copy(zio->io_abd, data, psize);
 	} else {
 		ASSERT(!BP_IS_EMBEDDED(bp));
+		ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
 	}
 
+	if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
+		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
+
+	if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
+		zio->io_flags |= ZIO_FLAG_DONT_CACHE;
+
 	if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
 		zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
 
@@ -2730,7 +2717,7 @@
 	blkptr_t *bp = zio->io_bp;
 
 	ASSERT(gio == zio_unique_parent(zio));
-	ASSERT(list_is_empty(&zio->io_child_list));
+	ASSERT(zio->io_child_count == 0);
 
 	if (zio->io_error)
 		return;
@@ -2988,7 +2975,7 @@
 		zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
 		    has_data ? abd_get_offset(pio->io_abd, pio->io_size -
 		    resid) : NULL, lsize, lsize, &zp,
-		    zio_write_gang_member_ready, NULL,
+		    zio_write_gang_member_ready, NULL, NULL,
 		    zio_write_gang_done, &gn->gn_child[g], pio->io_priority,
 		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
 
@@ -3450,7 +3437,7 @@
 	} else {
 		cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
 		    zio->io_orig_size, zio->io_orig_size, zp,
-		    zio_ddt_child_write_ready, NULL,
+		    zio_ddt_child_write_ready, NULL, NULL,
 		    zio_ddt_child_write_done, dde, zio->io_priority,
 		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
 
@@ -3968,6 +3955,9 @@
 	    zio->io_type == ZIO_TYPE_WRITE ||
 	    zio->io_type == ZIO_TYPE_TRIM)) {
 
+		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
+			return (zio);
+
 		if ((zio = vdev_queue_io(zio)) == NULL)
 			return (NULL);
 
@@ -4004,6 +3994,9 @@
 	    vd->vdev_ops != &vdev_draid_spare_ops) {
 		vdev_queue_io_done(zio);
 
+		if (zio->io_type == ZIO_TYPE_WRITE)
+			vdev_cache_write(zio);
+
 		if (zio_injection_enabled && zio->io_error == 0)
 			zio->io_error = zio_handle_device_injections(vd, zio,
 			    EIO, EILSEQ);
@@ -4113,7 +4106,8 @@
 		ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE));	/* not a leaf */
 		ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS));	/* not a leaf */
 		zio->io_error = 0;
-		zio->io_flags |= ZIO_FLAG_IO_RETRY | ZIO_FLAG_DONT_AGGREGATE;
+		zio->io_flags |= ZIO_FLAG_IO_RETRY |
+		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE;
 		zio->io_stage = ZIO_STAGE_VDEV_IO_START >> 1;
 		zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE,
 		    zio_requeue_io_start_cut_in_line);
@@ -4153,6 +4147,13 @@
 	if (zio->io_error)
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 
+	if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
+	    zio->io_physdone != NULL) {
+		ASSERT(!(zio->io_flags & ZIO_FLAG_DELEGATED));
+		ASSERT(zio->io_child_type == ZIO_CHILD_VDEV);
+		zio->io_physdone(zio->io_logical);
+	}
+
 	return (zio);
 }
 
@@ -4474,10 +4475,8 @@
 		zio->io_ready(zio);
 	}
 
-#ifdef ZFS_DEBUG
 	if (bp != NULL && bp != &zio->io_bp_copy)
 		zio->io_bp_copy = *bp;
-#endif
 
 	if (zio->io_error != 0) {
 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -4904,7 +4903,7 @@
 		return (NULL);
 	}
 
-	ASSERT(list_is_empty(&zio->io_child_list));
+	ASSERT(zio->io_child_count == 0);
 	ASSERT(zio->io_reexecute == 0);
 	ASSERT(zio->io_error == 0 || (zio->io_flags & ZIO_FLAG_CANFAIL));
 
diff --git a/sys/contrib/openzfs/module/zfs/zvol.c b/sys/contrib/openzfs/module/zfs/zvol.c
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@@ -1203,7 +1203,8 @@
 	 * Prefetch is completed, we can do zvol_os_create_minor
 	 * sequentially.
 	 */
-	while ((job = list_remove_head(&minors_list)) != NULL) {
+	while ((job = list_head(&minors_list)) != NULL) {
+		list_remove(&minors_list, job);
 		if (!job->error)
 			(void) zvol_os_create_minor(job->name);
 		kmem_strfree(job->name);
@@ -1310,8 +1311,10 @@
 	rw_exit(&zvol_state_lock);
 
 	/* Drop zvol_state_lock before calling zvol_free() */
-	while ((zv = list_remove_head(&free_list)) != NULL)
+	while ((zv = list_head(&free_list)) != NULL) {
+		list_remove(&free_list, zv);
 		zvol_os_free(zv);
+	}
 }
 
 /* Remove minor for this specific volume only */
diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@@ -128,7 +128,7 @@
     'zdb_block_size_histogram', 'zdb_checksum', 'zdb_decompress',
     'zdb_display_block', 'zdb_encrypted', 'zdb_label_checksum',
     'zdb_object_range_neg', 'zdb_object_range_pos', 'zdb_objset_id',
-    'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2', 'zdb_backup']
+    'zdb_decompress_zstd', 'zdb_recover', 'zdb_recover_2']
 pre =
 post =
 tags = ['functional', 'cli_root', 'zdb']
@@ -472,8 +472,7 @@
 tags = ['functional', 'cli_root', 'zpool_replace']
 
 [tests/functional/cli_root/zpool_resilver]
-tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
-    'zpool_resilver_concurrent']
+tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
 tags = ['functional', 'cli_root', 'zpool_resilver']
 
 [tests/functional/cli_root/zpool_scrub]
diff --git a/sys/contrib/openzfs/tests/runfiles/freebsd.run b/sys/contrib/openzfs/tests/runfiles/freebsd.run
--- a/sys/contrib/openzfs/tests/runfiles/freebsd.run
+++ b/sys/contrib/openzfs/tests/runfiles/freebsd.run
@@ -25,8 +25,3 @@
 [tests/functional/cli_root/zfs_jail:FreeBSD]
 tests = ['zfs_jail_001_pos']
 tags = ['functional', 'cli_root', 'zfs_jail']
-
-[tests/functional/pam:FreeBSD]
-tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
-    'pam_short_password']
-tags = ['functional', 'pam']
diff --git a/sys/contrib/openzfs/tests/runfiles/linux.run b/sys/contrib/openzfs/tests/runfiles/linux.run
--- a/sys/contrib/openzfs/tests/runfiles/linux.run
+++ b/sys/contrib/openzfs/tests/runfiles/linux.run
@@ -140,8 +140,7 @@
 tags = ['functional', 'mount']
 
 [tests/functional/pam:Linux]
-tests = ['pam_basic', 'pam_change_unmounted', 'pam_nounmount', 'pam_recursive',
-    'pam_short_password']
+tests = ['pam_basic', 'pam_nounmount', 'pam_short_password']
 tags = ['functional', 'pam']
 
 [tests/functional/procfs:Linux]
diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
--- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
+++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
@@ -152,7 +152,6 @@
         ['FAIL', rewind_reason],
     'cli_user/misc/zfs_share_001_neg': ['SKIP', na_reason],
     'cli_user/misc/zfs_unshare_001_neg': ['SKIP', na_reason],
-    'pool_checkpoint/checkpoint_discard_busy': ['SKIP', 12053],
     'privilege/setup': ['SKIP', na_reason],
     'refreserv/refreserv_004_pos': ['FAIL', known_reason],
     'rootpool/setup': ['SKIP', na_reason],
@@ -164,8 +163,6 @@
     known.update({
         'cli_root/zfs_receive/receive-o-x_props_override':
             ['FAIL', known_reason],
-        'cli_root/zpool_resilver/zpool_resilver_concurrent':
-            ['SKIP', na_reason],
         'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
         'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
         'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
@@ -173,7 +170,6 @@
         'link_count/link_count_001': ['SKIP', na_reason],
         'casenorm/mixed_create_failure': ['FAIL', 13215],
         'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
-        'rsend/send_raw_ashift': ['SKIP', 14961],
     })
 elif sys.platform.startswith('linux'):
     known.update({
@@ -281,8 +277,6 @@
         'mmp/mmp_inactive_import': ['FAIL', known_reason],
         'zvol/zvol_misc/zvol_misc_snapdev': ['FAIL', 12621],
         'zvol/zvol_misc/zvol_misc_volmode': ['FAIL', known_reason],
-        'zvol/zvol_misc/zvol_misc_fua': ['SKIP', 14872],
-        'zvol/zvol_misc/zvol_misc_trim': ['SKIP', 14872],
         'idmap_mount/idmap_mount_001': ['SKIP', idmap_reason],
         'idmap_mount/idmap_mount_002': ['SKIP', idmap_reason],
         'idmap_mount/idmap_mount_003': ['SKIP', idmap_reason],
diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
--- a/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/btree_test.c
@@ -501,7 +501,7 @@
 	srandom(seed);
 
 	zfs_btree_init();
-	zfs_btree_create(&bt, zfs_btree_compare, NULL, sizeof (uint64_t));
+	zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));
 
 	/*
 	 * This runs the named negative test. None of them should
diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
--- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
+++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
@@ -3706,7 +3706,7 @@
 
 	while $do_once || [ $stat1 -ne $stat2 ] || [ $stat2 -eq 0 ]; do
 		typeset stat1=$(get_arcstat $stat)
-		sleep 0.5
+		sleep 2
 		typeset stat2=$(get_arcstat $stat)
 		do_once=false
 	done
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
@@ -572,7 +572,6 @@
 	functional/cli_root/zdb/zdb_006_pos.ksh \
 	functional/cli_root/zdb/zdb_args_neg.ksh \
 	functional/cli_root/zdb/zdb_args_pos.ksh \
-	functional/cli_root/zdb/zdb_backup.ksh \
 	functional/cli_root/zdb/zdb_block_size_histogram.ksh \
 	functional/cli_root/zdb/zdb_checksum.ksh \
 	functional/cli_root/zdb/zdb_decompress.ksh \
@@ -1143,7 +1142,6 @@
 	functional/cli_root/zpool_resilver/setup.ksh \
 	functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh \
 	functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh \
-	functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh \
 	functional/cli_root/zpool_scrub/cleanup.ksh \
 	functional/cli_root/zpool_scrub/setup.ksh \
 	functional/cli_root/zpool_scrub/zpool_scrub_001_neg.ksh \
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
deleted file mode 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zdb/zdb_backup.ksh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/ksh
-
-#
-# This file and its contents are supplied under the terms of the
-# Common Development and Distribution License ("CDDL"), version 1.0.
-# You may only use this file in accordance with the terms of version
-# 1.0 of the CDDL.
-#
-# A full copy of the text of the CDDL should have accompanied this
-# source.  A copy of the CDDL is also available via the Internet at
-# http://www.illumos.org/license/CDDL.
-#
-
-#
-# Copyright (c) 2023, Klara Inc.
-#
-
-. $STF_SUITE/include/libtest.shlib
-
-write_count=8
-blksize=131072
-
-tmpfile=$TEST_BASE_DIR/tmpfile
-
-function cleanup
-{
-	datasetexists $TESTPOOL && destroy_pool $TESTPOOL
-	rm $tmpfile.1 $tmpfile.2
-}
-
-log_onexit cleanup
-
-log_assert "Verify that zfs send and zdb -B produce the same stream"
-
-verify_runnable "global"
-verify_disk_count "$DISKS" 2
-
-default_mirror_setup_noexit $DISKS
-file_write -o create -w -f $TESTDIR/file -b $blksize -c $write_count
-
-snap=$TESTPOOL/$TESTFS@snap
-log_must zfs snapshot $snap
-typeset -i objsetid=$(zfs get -Ho value objsetid $snap)
-
-sync_pool $TESTPOOL
-
-log_must eval "zfs send -ecL $snap > $tmpfile.1"
-log_must eval "zdb -B $TESTPOOL/$objsetid ecL > $tmpfile.2"
-
-typeset sum1=$(cat $tmpfile.1 | md5sum)
-typeset sum2=$(cat $tmpfile.2 | md5sum)
-
-log_must test "$sum1" = "$sum2"
-
-log_pass "zfs send and zdb -B produce the same stream"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
deleted file mode 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
-#
-
-. $STF_SUITE/include/libtest.shlib
-. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
-
-#
-# DESCRIPTION:
-#	Verify 'zpool clear' doesn't cause concurrent resilvers
-#
-# STRATEGY:
-#	1. Create N(10) virtual disk files.
-#	2. Create draid pool based on the virtual disk files.
-#	3. Fill the filesystem with directories and files.
-#	4. Force-fault 2 vdevs and verify distributed spare is kicked in.
-#	5. Free the distributed spare by replacing the faulty drive.
-#	6. Run zpool clear and verify that it does not initiate 2 resilvers
-#	   concurrently while distributed spare gets kicked in.
-#
-
-verify_runnable "global"
-
-typeset -ir devs=10
-typeset -ir nparity=1
-typeset -ir ndata=8
-typeset -ir dspare=1
-
-function cleanup
-{
-	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
-
-	for i in {0..$devs}; do
-		log_must rm -f "$BASEDIR/vdev$i"
-	done
-
-	for dir in $BASEDIR; do
-		if [[ -d $dir ]]; then
-			log_must rm -rf $dir
-		fi
-	done
-
-	zed_stop
-	zed_cleanup
-}
-
-log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
-log_onexit cleanup
-
-setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
-
-# ZED needed for sequential resilver
-zed_setup
-log_must zed_start
-
-log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
-log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
-log_must zpool wait -t resilver $TESTPOOL
-log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
-
-log_must zpool labelclear -f $BASEDIR/vdev5
-log_must zpool labelclear -f $BASEDIR/vdev6
-
-log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
-sync_pool $TESTPOOL
-
-log_must zpool events -c
-log_must zpool clear $TESTPOOL
-log_must wait_vdev_state  $TESTPOOL draid1-0-0 "ONLINE" 60
-log_must zpool wait -t resilver $TESTPOOL
-log_must zpool wait -t scrub $TESTPOOL
-
-nof_resilver=$(zpool events | grep -c resilver_start)
-if [ $nof_resilver = 1 ] ; then
-	log_must verify_pool $TESTPOOL
-	log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
-else
-	log_fail "FAIL: sequential and healing resilver initiated concurrently"
-fi
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/misc/zilstat_001_pos.ksh
@@ -25,7 +25,7 @@
 is_freebsd && ! python3 -c 'import sysctl' 2>/dev/null && log_unsupported "python3 sysctl module missing"
 
 set -A args  "" "-s \",\"" "-v" \
-    "-f time,cwc,imnb,imsb"
+    "-f time,zcwc,zimnb,zimsb"
 
 log_assert "zilstat generates output and doesn't return an error code"
 
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/l2arc/persist_l2arc_001_pos.ksh
@@ -27,14 +27,15 @@
 #
 # STRATEGY:
 #	1. Create pool with a cache device.
-#	2. Create a random file in that pool and random read for 10 sec.
-#	3. Export pool.
-#	4. Read the amount of log blocks written from the header of the
+#	2. Export and re-import pool without writing any data.
+#	3. Create a random file in that pool and random read for 10 sec.
+#	4. Export pool.
+#	5. Read the amount of log blocks written from the header of the
 #		L2ARC device.
-#	5. Import pool.
-#	6. Read the amount of log blocks rebuilt in arcstats and compare to
+#	6. Import pool.
+#	7. Read the amount of log blocks rebuilt in arcstats and compare to
 #		(5).
-#	7. Check if the labels of the L2ARC device are intact.
+#	8. Check if the labels of the L2ARC device are intact.
 #
 #	* We can predict the minimum bytes of L2ARC restored if we subtract
 #	from the effective size of the cache device the bytes l2arc_evict()
@@ -76,8 +77,10 @@
 
 log_must truncate -s ${cache_sz}M $VDEV_CACHE
 
-log_must zpool create -f -o ashift=12 $TESTPOOL $VDEV
-log_must zpool add $TESTPOOL cache $VDEV_CACHE
+log_must zpool create -f $TESTPOOL $VDEV cache $VDEV_CACHE
+
+log_must zpool export $TESTPOOL
+log_must zpool import -d $VDIR $TESTPOOL
 
 log_must fio $FIO_SCRIPTS/mkfiles.fio
 log_must fio $FIO_SCRIPTS/random_reads.fio
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/cleanup.ksh
@@ -25,6 +25,5 @@
 rmconfig
 destroy_pool $TESTPOOL
 del_user ${username}
-del_user ${username}rec
 del_group pamtestgroup
 log_must rm -rf "$runstatedir" $TESTDIRS
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
deleted file mode 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_change_unmounted.ksh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or https://opensource.org/licenses/CDDL-1.0.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-. $STF_SUITE/tests/functional/pam/utilities.kshlib
-
-if [ -n "$ASAN_OPTIONS" ]; then
-	export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
-fi
-
-log_mustnot ismounted "$TESTPOOL/pam/${username}"
-keystatus unavailable
-
-genconfig "homes=$TESTPOOL/pam runstatedir=${runstatedir}"
-
-printf "testpass\nsecondpass\nsecondpass\n" | pamtester -v ${pamservice} ${username} chauthtok
-
-log_mustnot ismounted "$TESTPOOL/pam/${username}"
-keystatus unavailable
-
-echo "secondpass" | pamtester ${pamservice} ${username} open_session
-references 1
-log_must ismounted "$TESTPOOL/pam/${username}"
-keystatus available
-
-printf "secondpass\ntestpass\ntestpass\n" | pamtester -v ${pamservice} ${username} chauthtok
-
-log_must ismounted "$TESTPOOL/pam/${username}"
-log_must ismounted "$TESTPOOL/pam/${username}"
-keystatus available
-
-log_must pamtester ${pamservice} ${username} close_session
-references 0
-log_mustnot ismounted "$TESTPOOL/pam/${username}"
-keystatus unavailable
-
-log_pass "done."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
deleted file mode 100755
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_recursive.ksh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/ksh -p
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or https://opensource.org/licenses/CDDL-1.0.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-. $STF_SUITE/tests/functional/pam/utilities.kshlib
-
-if [ -n "$ASAN_OPTIONS" ]; then
-	export LD_PRELOAD=$(ldd "$(command -v zfs)" | awk '/libasan\.so/ {print $3}')
-fi
-
-username="${username}rec"
-
-# Set up a deeper hierarchy, a mountpoint that doesn't interfere with other tests,
-# and a user which references that mountpoint
-log_must zfs create "$TESTPOOL/pampam"
-log_must zfs create -o mountpoint="$TESTDIR/rec" "$TESTPOOL/pampam/pam"
-echo "recurpass" | zfs create -o encryption=aes-256-gcm -o keyformat=passphrase \
-	-o keylocation=prompt "$TESTPOOL/pampam/pam/${username}"
-log_must zfs unmount "$TESTPOOL/pampam/pam/${username}"
-log_must zfs unload-key "$TESTPOOL/pampam/pam/${username}"
-log_must add_user pamtestgroup ${username} "$TESTDIR/rec"
-
-function keystatus {
-	log_must [ "$(get_prop keystatus "$TESTPOOL/pampam/pam/${username}")" = "$1" ]
-}
-
-log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
-keystatus unavailable
-
-function test_session {
-	echo "recurpass" | pamtester ${pamservice} ${username} open_session
-	references 1
-	log_must ismounted "$TESTPOOL/pampam/pam/${username}"
-	keystatus available
-
-	log_must pamtester ${pamservice} ${username} close_session
-	references 0
-	log_mustnot ismounted "$TESTPOOL/pampam/pam/${username}"
-	keystatus unavailable
-}
-
-genconfig "homes=$TESTPOOL/pampam/pam prop_mountpoint runstatedir=${runstatedir}"
-test_session
-
-genconfig "homes=$TESTPOOL/pampam recursive_homes prop_mountpoint runstatedir=${runstatedir}"
-test_session
-
-genconfig "homes=$TESTPOOL recursive_homes prop_mountpoint runstatedir=${runstatedir}"
-test_session
-
-genconfig "homes=* recursive_homes prop_mountpoint runstatedir=${runstatedir}"
-test_session
-
-log_pass "done."
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pam/pam_short_password.ksh
@@ -52,7 +52,7 @@
 keystatus available
 
 # Change user and dataset password to short one.
-printf "testpass\nshort\nshort\n" | pamtester -v ${pamservice} ${username} chauthtok
+printf "short\nshort\n" | pamtester ${pamservice} ${username} chauthtok
 
 # Unmount and unload key.
 log_must pamtester ${pamservice} ${username} close_session
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/pool_checkpoint/checkpoint_discard_busy.ksh
@@ -38,8 +38,6 @@
 
 verify_runnable "global"
 
-log_unsupported "Skipping, issue https://github.com/openzfs/zfs/issues/12053"
-
 function test_cleanup
 {
 	# reset memory limit to 16M
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/rsend/send_raw_ashift.ksh
@@ -37,10 +37,6 @@
 
 log_assert "Verify raw sending to pools with greater ashift succeeds"
 
-if is_freebsd; then
-	log_unsupported "Runs too long on FreeBSD 14 (Issue #14961)"
-fi
-
 function cleanup
 {
 	rm -f $BACKDIR/fs@*
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_fua.ksh
@@ -45,15 +45,6 @@
 
 if ! is_linux ; then
 	log_unsupported "Only linux supports dd with oflag=dsync for FUA writes"
-else
-	if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
-		log_unsupported "Disabled while issue #14872 is being worked"
-	fi
-
-	# Disabled for the CentOS 9 kernel
-	if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
-		log_unsupported "Disabled while issue #14872 is being worked"
-	fi
 fi
 
 typeset datafile1="$(mktemp zvol_misc_fua1.XXXXXX)"
diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh
@@ -44,15 +44,6 @@
 verify_runnable "global"
 
 if is_linux ; then
-	if [[ $(linux_version) -gt $(linux_version "6.2") ]]; then
-		log_unsupported "Disabled while issue #14872 is being worked"
-	fi
-
-	# Disabled for the CentOS 9 kernel
-	if [[ $(linux_version) -eq $(linux_version "5.14") ]]; then
-		log_unsupported "Disabled while issue #14872 is being worked"
-	fi
-
 	# We need '--force' here since the prior tests may leave a filesystem
 	# on the zvol, and blkdiscard will see that filesystem and print a
 	# warning unless you force it.
@@ -132,6 +123,7 @@
 # Remove old data from previous tests
 log_must $trimcmd $zvolpath
 
+
 set_blk_mq 1
 log_must_busy zpool export $TESTPOOL
 log_must zpool import $TESTPOOL
diff --git a/sys/modules/zfs/Makefile b/sys/modules/zfs/Makefile
--- a/sys/modules/zfs/Makefile
+++ b/sys/modules/zfs/Makefile
@@ -38,7 +38,7 @@
 
 CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS \
 	-DHAVE_UIO_ZEROCOPY -DWITHOUT_NETDUMP -D__KERNEL -D_SYS_CONDVAR_H_ \
-	-D_SYS_VMEM_H_
+	-D_SYS_VMEM_H_ -DIN_FREEBSD_BASE
 
 .if ${MACHINE_ARCH} == "amd64"
 CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
@@ -295,6 +295,7 @@
 	uberblock.c \
 	unique.c \
 	vdev.c \
+	vdev_cache.c \
 	vdev_draid.c \
 	vdev_draid_rand.c \
 	vdev_indirect.c \
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -653,9 +653,6 @@
 /* qat is enabled and existed */
 /* #undef HAVE_QAT */
 
-/* struct reclaim_state has reclaimed */
-/* #undef HAVE_RECLAIM_STATE_RECLAIMED */
-
 /* register_shrinker is vararg */
 /* #undef HAVE_REGISTER_SHRINKER_VARARG */
 
@@ -1051,7 +1048,7 @@
 /* #undef ZFS_IS_GPL_COMPATIBLE */
 
 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.2.0-FreeBSD_g009d3288"
+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gad0a55461"
 
 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
@@ -1060,7 +1057,7 @@
 /* #undef ZFS_META_DATA */
 
 /* Define the maximum compatible kernel version. */
-#define ZFS_META_KVER_MAX "6.3"
+#define ZFS_META_KVER_MAX "6.2"
 
 /* Define the minimum compatible kernel version. */
 #define ZFS_META_KVER_MIN "3.10"
@@ -1081,10 +1078,10 @@
 #define ZFS_META_NAME "zfs"
 
 /* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_g009d3288"
+#define ZFS_META_RELEASE "FreeBSD_gad0a55461"
 
 /* Define the project version. */
-#define ZFS_META_VERSION "2.2.0"
+#define ZFS_META_VERSION "2.1.99"
 
 /* count is located in percpu_ref.data */
 /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */
diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h
--- a/sys/modules/zfs/zfs_gitrev.h
+++ b/sys/modules/zfs/zfs_gitrev.h
@@ -1 +1 @@
-#define	ZFS_META_GITREV "zfs-2.2.0-rc1-0-g009d3288d"
+#define	ZFS_META_GITREV "zfs-2.1.99-1955-gad0a55461"