diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h
index b9bac7e252e5..3cf2b1274dd2 100644
--- a/include/sys/fm/fs/zfs.h
+++ b/include/sys/fm/fs/zfs.h
@@ -1,139 +1,137 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  *  Copyright (c) 2020 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_FM_FS_ZFS_H
 #define	_SYS_FM_FS_ZFS_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	ZFS_ERROR_CLASS				"fs.zfs"
 
 #define	FM_EREPORT_ZFS_CHECKSUM			"checksum"
 #define	FM_EREPORT_ZFS_AUTHENTICATION		"authentication"
 #define	FM_EREPORT_ZFS_IO			"io"
 #define	FM_EREPORT_ZFS_DATA			"data"
 #define	FM_EREPORT_ZFS_DELAY			"delay"
 #define	FM_EREPORT_ZFS_DEADMAN			"deadman"
 #define	FM_EREPORT_ZFS_POOL			"zpool"
 #define	FM_EREPORT_ZFS_DEVICE_UNKNOWN		"vdev.unknown"
 #define	FM_EREPORT_ZFS_DEVICE_OPEN_FAILED	"vdev.open_failed"
 #define	FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA	"vdev.corrupt_data"
 #define	FM_EREPORT_ZFS_DEVICE_NO_REPLICAS	"vdev.no_replicas"
 #define	FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM	"vdev.bad_guid_sum"
 #define	FM_EREPORT_ZFS_DEVICE_TOO_SMALL		"vdev.too_small"
 #define	FM_EREPORT_ZFS_DEVICE_BAD_LABEL		"vdev.bad_label"
 #define	FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT	"vdev.bad_ashift"
 #define	FM_EREPORT_ZFS_IO_FAILURE		"io_failure"
 #define	FM_EREPORT_ZFS_PROBE_FAILURE		"probe_failure"
 #define	FM_EREPORT_ZFS_LOG_REPLAY		"log_replay"
 #define	FM_EREPORT_ZFS_CONFIG_CACHE_WRITE	"config_cache_write"
 
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL		"pool"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE	"pool_failmode"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_GUID	"pool_guid"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT	"pool_context"
 #define	FM_EREPORT_PAYLOAD_ZFS_POOL_STATE	"pool_state"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID	"vdev_guid"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE	"vdev_type"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH	"vdev_path"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH	"vdev_physpath"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH	"vdev_enc_sysfs_path"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID	"vdev_devid"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU		"vdev_fru"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE	"vdev_state"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE	"vdev_laststate"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT	"vdev_ashift"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS	"vdev_complete_ts"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS	"vdev_delta_ts"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS	"vdev_spare_paths"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS	"vdev_spare_guids"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS	"vdev_read_errors"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS "vdev_write_errors"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS "vdev_cksum_errors"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N	"vdev_cksum_n"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T	"vdev_cksum_t"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N	"vdev_io_n"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T	"vdev_io_t"
 #define	FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS	"vdev_delays"
 #define	FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID	"parent_guid"
 #define	FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE	"parent_type"
 #define	FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH	"parent_path"
 #define	FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID	"parent_devid"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET	"zio_objset"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT	"zio_object"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL	"zio_level"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID	"zio_blkid"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR		"zio_err"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET	"zio_offset"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE		"zio_size"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS	"zio_flags"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE	"zio_stage"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY	"zio_priority"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE	"zio_pipeline"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY	"zio_delay"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP	"zio_timestamp"
 #define	FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA	"zio_delta"
 #define	FM_EREPORT_PAYLOAD_ZFS_PREV_STATE	"prev_state"
 #define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED	"cksum_expected"
 #define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL	"cksum_actual"
 #define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO	"cksum_algorithm"
 #define	FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP	"cksum_byteswap"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS	"bad_range_sets"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS	"bad_range_clears"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS	"bad_set_bits"
 #define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS	"bad_cleared_bits"
-#define	FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
-#define	FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
 #define	FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME	"snapshot_name"
 #define	FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME	"device_name"
 #define	FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME	"raw_name"
 #define	FM_EREPORT_PAYLOAD_ZFS_VOLUME	"volume"
 
 #define	FM_EREPORT_FAILMODE_WAIT		"wait"
 #define	FM_EREPORT_FAILMODE_CONTINUE		"continue"
 #define	FM_EREPORT_FAILMODE_PANIC		"panic"
 
 #define	FM_RESOURCE_REMOVED			"removed"
 #define	FM_RESOURCE_AUTOREPLACE			"autoreplace"
 #define	FM_RESOURCE_STATECHANGE			"statechange"
 
 #define	FM_RESOURCE_ZFS_SNAPSHOT_MOUNT		"snapshot_mount"
 #define	FM_RESOURCE_ZFS_SNAPSHOT_UNMOUNT		"snapshot_unmount"
 #define	FM_RESOURCE_ZVOL_CREATE_SYMLINK		"zvol_create"
 #define	FM_RESOURCE_ZVOL_REMOVE_SYMLINK		"zvol_remove"
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_FM_FS_ZFS_H */
diff --git a/man/man8/zpool-events.8 b/man/man8/zpool-events.8
index 341f902fe66e..c3bd3208e63b 100644
--- a/man/man8/zpool-events.8
+++ b/man/man8/zpool-events.8
@@ -1,487 +1,470 @@
 .\"
 .\" CDDL HEADER START
 .\"
 .\" The contents of this file are subject to the terms of the
 .\" Common Development and Distribution License (the "License").
 .\" You may not use this file except in compliance with the License.
 .\"
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 .\" or https://opensource.org/licenses/CDDL-1.0.
 .\" See the License for the specific language governing permissions
 .\" and limitations under the License.
 .\"
 .\" When distributing Covered Code, include this CDDL HEADER in each
 .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 .\" If applicable, add the following below this CDDL HEADER, with the
 .\" fields enclosed by brackets "[]" replaced with your own identifying
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" CDDL HEADER END
 .\"
 .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
 .\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
 .\" Copyright (c) 2017 Datto Inc.
 .\" Copyright (c) 2018 George Melikov. All Rights Reserved.
 .\" Copyright 2017 Nexenta Systems, Inc.
 .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 .\"
-.Dd May 27, 2021
+.Dd July 11, 2023
 .Dt ZPOOL-EVENTS 8
 .Os
 .
 .Sh NAME
 .Nm zpool-events
 .Nd list recent events generated by kernel
 .Sh SYNOPSIS
 .Nm zpool
 .Cm events
 .Op Fl vHf
 .Op Ar pool
 .Nm zpool
 .Cm events
 .Fl c
 .
 .Sh DESCRIPTION
 Lists all recent events generated by the ZFS kernel modules.
 These events are consumed by the
 .Xr zed 8
 and used to automate administrative tasks such as replacing a failed device
 with a hot spare.
 For more information about the subclasses and event payloads
 that can be generated see
 .Sx EVENTS
 and the following sections.
 .
 .Sh OPTIONS
 .Bl -tag -compact -width Ds
 .It Fl c
 Clear all previous events.
 .It Fl f
 Follow mode.
 .It Fl H
 Scripted mode.
 Do not display headers, and separate fields by a
 single tab instead of arbitrary space.
 .It Fl v
 Print the entire payload for each event.
 .El
 .
 .Sh EVENTS
 These are the different event subclasses.
 The full event name would be
 .Sy ereport.fs.zfs.\& Ns Em SUBCLASS ,
 but only the last part is listed here.
 .Pp
 .Bl -tag -compact -width "vdev.bad_guid_sum"
 .It Sy checksum
 Issued when a checksum error has been detected.
 .It Sy io
 Issued when there is an I/O error in a vdev in the pool.
 .It Sy data
 Issued when there have been data errors in the pool.
 .It Sy deadman
 Issued when an I/O request is determined to be "hung", this can be caused
 by lost completion events due to flaky hardware or drivers.
 See
 .Sy zfs_deadman_failmode
 in
 .Xr zfs 4
 for additional information regarding "hung" I/O detection and configuration.
 .It Sy delay
 Issued when a completed I/O request exceeds the maximum allowed time
 specified by the
 .Sy zio_slow_io_ms
 module parameter.
 This can be an indicator of problems with the underlying storage device.
 The number of delay events is ratelimited by the
 .Sy zfs_slow_io_events_per_second
 module parameter.
 .It Sy config
 Issued every time a vdev change have been done to the pool.
 .It Sy zpool
 Issued when a pool cannot be imported.
 .It Sy zpool.destroy
 Issued when a pool is destroyed.
 .It Sy zpool.export
 Issued when a pool is exported.
 .It Sy zpool.import
 Issued when a pool is imported.
 .It Sy zpool.reguid
 Issued when a REGUID (new unique identifier for the pool have been regenerated)
 have been detected.
 .It Sy vdev.unknown
 Issued when the vdev is unknown.
 Such as trying to clear device errors on a vdev that have failed/been kicked
 from the system/pool and is no longer available.
 .It Sy vdev.open_failed
 Issued when a vdev could not be opened (because it didn't exist for example).
 .It Sy vdev.corrupt_data
 Issued when corrupt data have been detected on a vdev.
 .It Sy vdev.no_replicas
 Issued when there are no more replicas to sustain the pool.
 This would lead to the pool being
 .Em DEGRADED .
 .It Sy vdev.bad_guid_sum
 Issued when a missing device in the pool have been detected.
 .It Sy vdev.too_small
 Issued when the system (kernel) have removed a device, and ZFS
 notices that the device isn't there any more.
 This is usually followed by a
 .Sy probe_failure
 event.
 .It Sy vdev.bad_label
 Issued when the label is OK but invalid.
 .It Sy vdev.bad_ashift
 Issued when the ashift alignment requirement has increased.
 .It Sy vdev.remove
 Issued when a vdev is detached from a mirror (or a spare detached from a
 vdev where it have been used to replace a failed drive - only works if
 the original drive have been re-added).
 .It Sy vdev.clear
 Issued when clearing device errors in a pool.
 Such as running
 .Nm zpool Cm clear
 on a device in the pool.
 .It Sy vdev.check
 Issued when a check to see if a given vdev could be opened is started.
 .It Sy vdev.spare
 Issued when a spare have kicked in to replace a failed device.
 .It Sy vdev.autoexpand
 Issued when a vdev can be automatically expanded.
 .It Sy io_failure
 Issued when there is an I/O failure in a vdev in the pool.
 .It Sy probe_failure
 Issued when a probe fails on a vdev.
 This would occur if a vdev
 have been kicked from the system outside of ZFS (such as the kernel
 have removed the device).
 .It Sy log_replay
 Issued when the intent log cannot be replayed.
 The can occur in the case of a missing or damaged log device.
 .It Sy resilver.start
 Issued when a resilver is started.
 .It Sy resilver.finish
 Issued when the running resilver have finished.
 .It Sy scrub.start
 Issued when a scrub is started on a pool.
 .It Sy scrub.finish
 Issued when a pool has finished scrubbing.
 .It Sy scrub.abort
 Issued when a scrub is aborted on a pool.
 .It Sy scrub.resume
 Issued when a scrub is resumed on a pool.
 .It Sy scrub.paused
 Issued when a scrub is paused on a pool.
 .It Sy bootfs.vdev.attach
 .El
 .
 .Sh PAYLOADS
 This is the payload (data, information) that accompanies an
 event.
 .Pp
 For
 .Xr zed 8 ,
 these are set to uppercase and prefixed with
 .Sy ZEVENT_ .
 .Pp
 .Bl -tag -compact -width "vdev_cksum_errors"
 .It Sy pool
 Pool name.
 .It Sy pool_failmode
 Failmode -
 .Sy wait ,
 .Sy continue ,
 or
 .Sy panic .
 See the
 .Sy failmode
 property in
 .Xr zpoolprops 7
 for more information.
 .It Sy pool_guid
 The GUID of the pool.
 .It Sy pool_context
 The load state for the pool (0=none, 1=open, 2=import, 3=tryimport, 4=recover
 5=error).
 .It Sy vdev_guid
 The GUID of the vdev in question (the vdev failing or operated upon with
 .Nm zpool Cm clear ,
 etc.).
 .It Sy vdev_type
 Type of vdev -
 .Sy disk ,
 .Sy file ,
 .Sy mirror ,
 etc.
 See the
 .Sy Virtual Devices
 section of
 .Xr zpoolconcepts 7
 for more information on possible values.
 .It Sy vdev_path
 Full path of the vdev, including any
 .Em -partX .
 .It Sy vdev_devid
 ID of vdev (if any).
 .It Sy vdev_fru
 Physical FRU location.
 .It Sy vdev_state
 State of vdev (0=uninitialized, 1=closed, 2=offline, 3=removed, 4=failed to
 open, 5=faulted, 6=degraded, 7=healthy).
 .It Sy vdev_ashift
 The ashift value of the vdev.
 .It Sy vdev_complete_ts
 The time the last I/O request completed for the specified vdev.
 .It Sy vdev_delta_ts
 The time since the last I/O request completed for the specified vdev.
 .It Sy vdev_spare_paths
 List of spares, including full path and any
 .Em -partX .
 .It Sy vdev_spare_guids
 GUID(s) of spares.
 .It Sy vdev_read_errors
 How many read errors that have been detected on the vdev.
 .It Sy vdev_write_errors
 How many write errors that have been detected on the vdev.
 .It Sy vdev_cksum_errors
 How many checksum errors that have been detected on the vdev.
 .It Sy parent_guid
 GUID of the vdev parent.
 .It Sy parent_type
 Type of parent.
 See
 .Sy vdev_type .
 .It Sy parent_path
 Path of the vdev parent (if any).
 .It Sy parent_devid
 ID of the vdev parent (if any).
 .It Sy zio_objset
 The object set number for a given I/O request.
 .It Sy zio_object
 The object number for a given I/O request.
 .It Sy zio_level
 The indirect level for the block.
 Level 0 is the lowest level and includes data blocks.
 Values > 0 indicate metadata blocks at the appropriate level.
 .It Sy zio_blkid
 The block ID for a given I/O request.
 .It Sy zio_err
 The error number for a failure when handling a given I/O request,
 compatible with
 .Xr errno 3
 with the value of
 .Sy EBADE
 used to indicate a ZFS checksum error.
 .It Sy zio_offset
 The offset in bytes of where to write the I/O request for the specified vdev.
 .It Sy zio_size
 The size in bytes of the I/O request.
 .It Sy zio_flags
 The current flags describing how the I/O request should be handled.
 See the
 .Sy I/O FLAGS
 section for the full list of I/O flags.
 .It Sy zio_stage
 The current stage of the I/O in the pipeline.
 See the
 .Sy I/O STAGES
 section for a full list of all the I/O stages.
 .It Sy zio_pipeline
 The valid pipeline stages for the I/O.
 See the
 .Sy I/O STAGES
 section for a full list of all the I/O stages.
 .It Sy zio_delay
 The time elapsed (in nanoseconds) waiting for the block layer to complete the
 I/O request.
 Unlike
 .Sy zio_delta ,
 this does not include any vdev queuing time and is
 therefore solely a measure of the block layer performance.
 .It Sy zio_timestamp
 The time when a given I/O request was submitted.
 .It Sy zio_delta
 The time required to service a given I/O request.
 .It Sy prev_state
 The previous state of the vdev.
 .It Sy cksum_expected
 The expected checksum value for the block.
 .It Sy cksum_actual
 The actual checksum value for an errant block.
 .It Sy cksum_algorithm
 Checksum algorithm used.
 See
 .Xr zfsprops 7
 for more information on the available checksum algorithms.
 .It Sy cksum_byteswap
 Whether or not the data is byteswapped.
 .It Sy bad_ranges
 .No [\& Ns Ar start , end )
 pairs of corruption offsets.
 Offsets are always aligned on a 64-bit boundary,
 and can include some gaps of non-corruption.
 (See
 .Sy bad_ranges_min_gap )
 .It Sy bad_ranges_min_gap
 In order to bound the size of the
 .Sy bad_ranges
 array, gaps of non-corruption
 less than or equal to
 .Sy bad_ranges_min_gap
 bytes have been merged with
 adjacent corruption.
 Always at least 8 bytes, since corruption is detected on a 64-bit word basis.
 .It Sy bad_range_sets
 This array has one element per range in
 .Sy bad_ranges .
 Each element contains
 the count of bits in that range which were clear in the good data and set
 in the bad data.
 .It Sy bad_range_clears
 This array has one element per range in
 .Sy bad_ranges .
 Each element contains
 the count of bits for that range which were set in the good data and clear in
 the bad data.
 .It Sy bad_set_bits
 If this field exists, it is an array of
 .Pq Ar bad data No & ~( Ns Ar good data ) ;
 that is, the bits set in the bad data which are cleared in the good data.
 Each element corresponds a byte whose offset is in a range in
 .Sy bad_ranges ,
 and the array is ordered by offset.
 Thus, the first element is the first byte in the first
 .Sy bad_ranges
 range, and the last element is the last byte in the last
 .Sy bad_ranges
 range.
 .It Sy bad_cleared_bits
 Like
 .Sy bad_set_bits ,
 but contains
 .Pq Ar good data No & ~( Ns Ar bad data ) ;
 that is, the bits set in the good data which are cleared in the bad data.
-.It Sy bad_set_histogram
-If this field exists, it is an array of counters.
-Each entry counts bits set in a particular bit of a big-endian uint64 type.
-The first entry counts bits
-set in the high-order bit of the first byte, the 9th byte, etc, and the last
-entry counts bits set of the low-order bit of the 8th byte, the 16th byte, etc.
-This information is useful for observing a stuck bit in a parallel data path,
-such as IDE or parallel SCSI.
-.It Sy bad_cleared_histogram
-If this field exists, it is an array of counters.
-Each entry counts bit clears in a particular bit of a big-endian uint64 type.
-The first entry counts bits
-clears of the high-order bit of the first byte, the 9th byte, etc, and the
-last entry counts clears of the low-order bit of the 8th byte, the 16th byte,
-etc.
-This information is useful for observing a stuck bit in a parallel data
-path, such as IDE or parallel SCSI.
 .El
 .
 .Sh I/O STAGES
 The ZFS I/O pipeline is comprised of various stages which are defined below.
 The individual stages are used to construct these basic I/O
 operations: Read, Write, Free, Claim, and Ioctl.
 These stages may be
 set on an event to describe the life cycle of a given I/O request.
 .Pp
 .TS
 tab(:);
 l l l .
 Stage:Bit Mask:Operations
 _:_:_
 ZIO_STAGE_OPEN:0x00000001:RWFCI
 
 ZIO_STAGE_READ_BP_INIT:0x00000002:R----
 ZIO_STAGE_WRITE_BP_INIT:0x00000004:-W---
 ZIO_STAGE_FREE_BP_INIT:0x00000008:--F--
 ZIO_STAGE_ISSUE_ASYNC:0x00000010:RWF--
 ZIO_STAGE_WRITE_COMPRESS:0x00000020:-W---
 
 ZIO_STAGE_ENCRYPT:0x00000040:-W---
 ZIO_STAGE_CHECKSUM_GENERATE:0x00000080:-W---
 
 ZIO_STAGE_NOP_WRITE:0x00000100:-W---
 
 ZIO_STAGE_BRT_FREE:0x00000200:--F--
 
 ZIO_STAGE_DDT_READ_START:0x00000400:R----
 ZIO_STAGE_DDT_READ_DONE:0x00000800:R----
 ZIO_STAGE_DDT_WRITE:0x00001000:-W---
 ZIO_STAGE_DDT_FREE:0x00002000:--F--
 
 ZIO_STAGE_GANG_ASSEMBLE:0x00004000:RWFC-
 ZIO_STAGE_GANG_ISSUE:0x00008000:RWFC-
 
 ZIO_STAGE_DVA_THROTTLE:0x00010000:-W---
 ZIO_STAGE_DVA_ALLOCATE:0x00020000:-W---
 ZIO_STAGE_DVA_FREE:0x00040000:--F--
 ZIO_STAGE_DVA_CLAIM:0x00080000:---C-
 
 ZIO_STAGE_READY:0x00100000:RWFCI
 
 ZIO_STAGE_VDEV_IO_START:0x00200000:RW--I
 ZIO_STAGE_VDEV_IO_DONE:0x00400000:RW--I
 ZIO_STAGE_VDEV_IO_ASSESS:0x00800000:RW--I
 
 ZIO_STAGE_CHECKSUM_VERIFY:0x01000000:R----
 
 ZIO_STAGE_DONE:0x02000000:RWFCI
 .TE
 .
 .Sh I/O FLAGS
 Every I/O request in the pipeline contains a set of flags which describe its
 function and are used to govern its behavior.
 These flags will be set in an event as a
 .Sy zio_flags
 payload entry.
 .Pp
 .TS
 tab(:);
 l l .
 Flag:Bit Mask
 _:_
 ZIO_FLAG_DONT_AGGREGATE:0x00000001
 ZIO_FLAG_IO_REPAIR:0x00000002
 ZIO_FLAG_SELF_HEAL:0x00000004
 ZIO_FLAG_RESILVER:0x00000008
 ZIO_FLAG_SCRUB:0x00000010
 ZIO_FLAG_SCAN_THREAD:0x00000020
 ZIO_FLAG_PHYSICAL:0x00000040
 
 ZIO_FLAG_CANFAIL:0x00000080
 ZIO_FLAG_SPECULATIVE:0x00000100
 ZIO_FLAG_CONFIG_WRITER:0x00000200
 ZIO_FLAG_DONT_RETRY:0x00000400
 ZIO_FLAG_NODATA:0x00001000
 ZIO_FLAG_INDUCE_DAMAGE:0x00002000
 
 ZIO_FLAG_IO_ALLOCATING:0x00004000
 ZIO_FLAG_IO_RETRY:0x00008000
 ZIO_FLAG_PROBE:0x00010000
 ZIO_FLAG_TRYHARD:0x00020000
 ZIO_FLAG_OPTIONAL:0x00040000
 
 ZIO_FLAG_DONT_QUEUE:0x00080000
 ZIO_FLAG_DONT_PROPAGATE:0x00100000
 ZIO_FLAG_IO_BYPASS:0x00200000
 ZIO_FLAG_IO_REWRITE:0x00400000
 ZIO_FLAG_RAW_COMPRESS:0x00800000
 ZIO_FLAG_RAW_ENCRYPT:0x01000000
 
 ZIO_FLAG_GANG_CHILD:0x02000000
 ZIO_FLAG_DDT_CHILD:0x04000000
 ZIO_FLAG_GODFATHER:0x08000000
 ZIO_FLAG_NOPWRITE:0x10000000
 ZIO_FLAG_REEXECUTED:0x20000000
 ZIO_FLAG_DELEGATED:0x40000000
 ZIO_FLAG_FASTWRITE:0x80000000
 .TE
 .
 .Sh SEE ALSO
 .Xr zfs 4 ,
 .Xr zed 8 ,
 .Xr zpool-wait 8
diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c
index 2754ceec83ca..9365ca500d7d 100644
--- a/module/zfs/zfs_fm.c
+++ b/module/zfs/zfs_fm.c
@@ -1,1597 +1,1580 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2012,2021 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 
 #include <sys/fm/fs/zfs.h>
 #include <sys/fm/protocol.h>
 #include <sys/fm/util.h>
 #include <sys/sysevent.h>
 
 /*
  * This general routine is responsible for generating all the different ZFS
  * ereports.  The payload is dependent on the class, and which arguments are
  * supplied to the function:
  *
  * 	EREPORT			POOL	VDEV	IO
  * 	block			X	X	X
  * 	data			X		X
  * 	device			X	X
  * 	pool			X
  *
  * If we are in a loading state, all errors are chained together by the same
  * SPA-wide ENA (Error Numeric Association).
  *
  * For isolated I/O requests, we get the ENA from the zio_t. The propagation
  * gets very complicated due to RAID-Z, gang blocks, and vdev caching.  We want
  * to chain together all ereports associated with a logical piece of data.  For
  * read I/Os, there  are basically three 'types' of I/O, which form a roughly
  * layered diagram:
  *
  * 	+---------------+
  * 	| Aggregate I/O |	No associated logical data or device
  * 	+---------------+
  *              |
  *              V
  * 	+---------------+	Reads associated with a piece of logical data.
  * 	|   Read I/O    |	This includes reads on behalf of RAID-Z,
  * 	+---------------+       mirrors, gang blocks, retries, etc.
  *              |
  *              V
  * 	+---------------+	Reads associated with a particular device, but
  * 	| Physical I/O  |	no logical data.  Issued as part of vdev caching
  * 	+---------------+	and I/O aggregation.
  *
  * Note that 'physical I/O' here is not the same terminology as used in the rest
  * of ZIO.  Typically, 'physical I/O' simply means that there is no attached
  * blockpointer.  But I/O with no associated block pointer can still be related
  * to a logical piece of data (i.e. RAID-Z requests).
  *
  * Purely physical I/O always have unique ENAs.  They are not related to a
  * particular piece of logical data, and therefore cannot be chained together.
  * We still generate an ereport, but the DE doesn't correlate it with any
  * logical piece of data.  When such an I/O fails, the delegated I/O requests
  * will issue a retry, which will trigger the 'real' ereport with the correct
  * ENA.
  *
  * We keep track of the ENA for a ZIO chain through the 'io_logical' member.
  * When a new logical I/O is issued, we set this to point to itself.  Child I/Os
  * then inherit this pointer, so that when it is first set subsequent failures
  * will use the same ENA.  For vdev cache fill and queue aggregation I/O,
  * this pointer is set to NULL, and no ereport will be generated (since it
  * doesn't actually correspond to any particular device or piece of data,
  * and the caller will always retry without caching or queueing anyway).
  *
  * For checksum errors, we want to include more information about the actual
  * error which occurs.  Accordingly, we build an ereport when the error is
  * noticed, but instead of sending it in immediately, we hang it off of the
  * io_cksum_report field of the logical IO.  When the logical IO completes
  * (successfully or not), zfs_ereport_finish_checksum() is called with the
  * good and bad versions of the buffer (if available), and we annotate the
  * ereport with information about the differences.
  */
 
 #ifdef _KERNEL
 /*
  * Duplicate ereport Detection
  *
  * Some ereports are retained momentarily for detecting duplicates.  These
  * are kept in a recent_events_node_t in both a time-ordered list and an AVL
  * tree of recent unique ereports.
  *
  * The lifespan of these recent ereports is bounded (15 mins) and a cleaner
  * task is used to purge stale entries.
  */
 static list_t recent_events_list;
 static avl_tree_t recent_events_tree;
 static kmutex_t recent_events_lock;
 static taskqid_t recent_events_cleaner_tqid;
 
 /*
  * Each node is about 128 bytes so 2,000 would consume 1/4 MiB.
  *
  * This setting can be changed dynamically and setting it to zero
  * disables duplicate detection.
  */
 static unsigned int zfs_zevent_retain_max = 2000;
 
 /*
  * The lifespan for a recent ereport entry. The default of 15 minutes is
  * intended to outlive the zfs diagnosis engine's threshold of 10 errors
  * over a period of 10 minutes.
  */
 static unsigned int zfs_zevent_retain_expire_secs = 900;
 
 typedef enum zfs_subclass {
 	ZSC_IO,
 	ZSC_DATA,
 	ZSC_CHECKSUM
 } zfs_subclass_t;
 
 typedef struct {
 	/* common criteria */
 	uint64_t	re_pool_guid;
 	uint64_t	re_vdev_guid;
 	int		re_io_error;
 	uint64_t	re_io_size;
 	uint64_t	re_io_offset;
 	zfs_subclass_t	re_subclass;
 	zio_priority_t	re_io_priority;
 
 	/* logical zio criteria (optional) */
 	zbookmark_phys_t re_io_bookmark;
 
 	/* internal state */
 	avl_node_t	re_tree_link;
 	list_node_t	re_list_link;
 	uint64_t	re_timestamp;
 } recent_events_node_t;
 
 static int
 recent_events_compare(const void *a, const void *b)
 {
 	const recent_events_node_t *node1 = a;
 	const recent_events_node_t *node2 = b;
 	int cmp;
 
 	/*
 	 * The comparison order here is somewhat arbitrary.
 	 * What's important is that if every criteria matches, then it
 	 * is a duplicate (i.e. compare returns 0)
 	 */
 	if ((cmp = TREE_CMP(node1->re_subclass, node2->re_subclass)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_pool_guid, node2->re_pool_guid)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_vdev_guid, node2->re_vdev_guid)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_io_error, node2->re_io_error)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_io_priority, node2->re_io_priority)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_io_size, node2->re_io_size)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(node1->re_io_offset, node2->re_io_offset)) != 0)
 		return (cmp);
 
 	const zbookmark_phys_t *zb1 = &node1->re_io_bookmark;
 	const zbookmark_phys_t *zb2 = &node2->re_io_bookmark;
 
 	if ((cmp = TREE_CMP(zb1->zb_objset, zb2->zb_objset)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(zb1->zb_object, zb2->zb_object)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(zb1->zb_level, zb2->zb_level)) != 0)
 		return (cmp);
 	if ((cmp = TREE_CMP(zb1->zb_blkid, zb2->zb_blkid)) != 0)
 		return (cmp);
 
 	return (0);
 }
 
 /*
  * workaround: vdev properties don't have inheritance
  */
 static uint64_t
 vdev_prop_get_inherited(vdev_t *vd, vdev_prop_t prop)
 {
 	uint64_t propdef, propval;
 
 	propdef = vdev_prop_default_numeric(prop);
 	switch (prop) {
 		case VDEV_PROP_CHECKSUM_N:
 			propval = vd->vdev_checksum_n;
 			break;
 		case VDEV_PROP_CHECKSUM_T:
 			propval = vd->vdev_checksum_t;
 			break;
 		case VDEV_PROP_IO_N:
 			propval = vd->vdev_io_n;
 			break;
 		case VDEV_PROP_IO_T:
 			propval = vd->vdev_io_t;
 			break;
 		default:
 			propval = propdef;
 			break;
 	}
 
 	if (propval != propdef)
 		return (propval);
 
 	if (vd->vdev_parent == NULL)
 		return (propdef);
 
 	return (vdev_prop_get_inherited(vd->vdev_parent, prop));
 }
 
 static void zfs_ereport_schedule_cleaner(void);
 
 /*
  * background task to clean stale recent event nodes.
  */
 static void
 zfs_ereport_cleaner(void *arg)
 {
 	recent_events_node_t *entry;
 	uint64_t now = gethrtime();
 
 	/*
 	 * purge expired entries
 	 */
 	mutex_enter(&recent_events_lock);
 	while ((entry = list_tail(&recent_events_list)) != NULL) {
 		uint64_t age = NSEC2SEC(now - entry->re_timestamp);
 		if (age <= zfs_zevent_retain_expire_secs)
 			break;
 
 		/* remove expired node */
 		avl_remove(&recent_events_tree, entry);
 		list_remove(&recent_events_list, entry);
 		kmem_free(entry, sizeof (*entry));
 	}
 
 	/* Restart the cleaner if more entries remain */
 	recent_events_cleaner_tqid = 0;
 	if (!list_is_empty(&recent_events_list))
 		zfs_ereport_schedule_cleaner();
 
 	mutex_exit(&recent_events_lock);
 }
 
 static void
 zfs_ereport_schedule_cleaner(void)
 {
 	ASSERT(MUTEX_HELD(&recent_events_lock));
 
 	uint64_t timeout = SEC2NSEC(zfs_zevent_retain_expire_secs + 1);
 
 	recent_events_cleaner_tqid = taskq_dispatch_delay(
 	    system_delay_taskq, zfs_ereport_cleaner, NULL, TQ_SLEEP,
 	    ddi_get_lbolt() + NSEC_TO_TICK(timeout));
 }
 
 /*
  * Clear entries for a given vdev or all vdevs in a pool when vdev == NULL
  */
 void
 zfs_ereport_clear(spa_t *spa, vdev_t *vd)
 {
 	uint64_t vdev_guid, pool_guid;
 
 	ASSERT(vd != NULL || spa != NULL);
 	if (vd == NULL) {
 		vdev_guid = 0;
 		pool_guid = spa_guid(spa);
 	} else {
 		vdev_guid = vd->vdev_guid;
 		pool_guid = 0;
 	}
 
 	mutex_enter(&recent_events_lock);
 
 	recent_events_node_t *next = list_head(&recent_events_list);
 	while (next != NULL) {
 		recent_events_node_t *entry = next;
 
 		next = list_next(&recent_events_list, next);
 
 		if (entry->re_vdev_guid == vdev_guid ||
 		    entry->re_pool_guid == pool_guid) {
 			avl_remove(&recent_events_tree, entry);
 			list_remove(&recent_events_list, entry);
 			kmem_free(entry, sizeof (*entry));
 		}
 	}
 
 	mutex_exit(&recent_events_lock);
 }
 
 /*
  * Check if an ereport would be a duplicate of one recently posted.
  *
  * An ereport is considered a duplicate if the set of criteria in
  * recent_events_node_t all match.
  *
  * Only FM_EREPORT_ZFS_IO, FM_EREPORT_ZFS_DATA, and FM_EREPORT_ZFS_CHECKSUM
  * are candidates for duplicate checking.
  */
 static boolean_t
 zfs_ereport_is_duplicate(const char *subclass, spa_t *spa, vdev_t *vd,
     const zbookmark_phys_t *zb, zio_t *zio, uint64_t offset, uint64_t size)
 {
 	recent_events_node_t search = {0}, *entry;
 
 	if (vd == NULL || zio == NULL)
 		return (B_FALSE);
 
 	if (zfs_zevent_retain_max == 0)
 		return (B_FALSE);
 
 	if (strcmp(subclass, FM_EREPORT_ZFS_IO) == 0)
 		search.re_subclass = ZSC_IO;
 	else if (strcmp(subclass, FM_EREPORT_ZFS_DATA) == 0)
 		search.re_subclass = ZSC_DATA;
 	else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0)
 		search.re_subclass = ZSC_CHECKSUM;
 	else
 		return (B_FALSE);
 
 	search.re_pool_guid = spa_guid(spa);
 	search.re_vdev_guid = vd->vdev_guid;
 	search.re_io_error = zio->io_error;
 	search.re_io_priority = zio->io_priority;
 	/* if size is supplied use it over what's in zio */
 	if (size) {
 		search.re_io_size = size;
 		search.re_io_offset = offset;
 	} else {
 		search.re_io_size = zio->io_size;
 		search.re_io_offset = zio->io_offset;
 	}
 
 	/* grab optional logical zio criteria */
 	if (zb != NULL) {
 		search.re_io_bookmark.zb_objset = zb->zb_objset;
 		search.re_io_bookmark.zb_object = zb->zb_object;
 		search.re_io_bookmark.zb_level = zb->zb_level;
 		search.re_io_bookmark.zb_blkid = zb->zb_blkid;
 	}
 
 	uint64_t now = gethrtime();
 
 	mutex_enter(&recent_events_lock);
 
 	/* check if we have seen this one recently */
 	entry = avl_find(&recent_events_tree, &search, NULL);
 	if (entry != NULL) {
 		uint64_t age = NSEC2SEC(now - entry->re_timestamp);
 
 		/*
 		 * There is still an active cleaner (since we're here).
 		 * Reset the last seen time for this duplicate entry
 		 * so that its lifespand gets extended.
 		 */
 		list_remove(&recent_events_list, entry);
 		list_insert_head(&recent_events_list, entry);
 		entry->re_timestamp = now;
 
 		zfs_zevent_track_duplicate();
 		mutex_exit(&recent_events_lock);
 
 		return (age <= zfs_zevent_retain_expire_secs);
 	}
 
 	if (avl_numnodes(&recent_events_tree) >= zfs_zevent_retain_max) {
 		/* recycle oldest node */
 		entry = list_tail(&recent_events_list);
 		ASSERT(entry != NULL);
 		list_remove(&recent_events_list, entry);
 		avl_remove(&recent_events_tree, entry);
 	} else {
 		entry = kmem_alloc(sizeof (recent_events_node_t), KM_SLEEP);
 	}
 
 	/* record this as a recent ereport */
 	*entry = search;
 	avl_add(&recent_events_tree, entry);
 	list_insert_head(&recent_events_list, entry);
 	entry->re_timestamp = now;
 
 	/* Start a cleaner if not already scheduled */
 	if (recent_events_cleaner_tqid == 0)
 		zfs_ereport_schedule_cleaner();
 
 	mutex_exit(&recent_events_lock);
 	return (B_FALSE);
 }
 
 void
 zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
 {
 	if (nvl)
 		fm_nvlist_destroy(nvl, FM_NVA_FREE);
 
 	if (detector)
 		fm_nvlist_destroy(detector, FM_NVA_FREE);
 }
 
 /*
  * We want to rate limit ZIO delay, deadman, and checksum events so as to not
  * flood zevent consumers when a disk is acting up.
  *
  * Returns 1 if we're ratelimiting, 0 if not.
  */
 static int
 zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
 {
 	int rc = 0;
 	/*
 	 * zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
 	 * are.  Invert it to get our return value.
 	 */
 	if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
 		rc = !zfs_ratelimit(&vd->vdev_delay_rl);
 	} else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
 		rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
 	} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
 		rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
 	}
 
 	if (rc)	{
 		/* We're rate limiting */
 		fm_erpt_dropped_increment();
 	}
 
 	return (rc);
 }
 
 /*
  * Return B_TRUE if the event actually posted, B_FALSE if not.
  */
 static boolean_t
 zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
     const char *subclass, spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     zio_t *zio, uint64_t stateoroffset, uint64_t size)
 {
 	nvlist_t *ereport, *detector;
 
 	uint64_t ena;
 	char class[64];
 
 	if ((ereport = fm_nvlist_create(NULL)) == NULL)
 		return (B_FALSE);
 
 	if ((detector = fm_nvlist_create(NULL)) == NULL) {
 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
 		return (B_FALSE);
 	}
 
 	/*
 	 * Serialize ereport generation
 	 */
 	mutex_enter(&spa->spa_errlist_lock);
 
 	/*
 	 * Determine the ENA to use for this event.  If we are in a loading
 	 * state, use a SPA-wide ENA.  Otherwise, if we are in an I/O state, use
 	 * a root zio-wide ENA.  Otherwise, simply use a unique ENA.
 	 */
 	if (spa_load_state(spa) != SPA_LOAD_NONE) {
 		if (spa->spa_ena == 0)
 			spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1);
 		ena = spa->spa_ena;
 	} else if (zio != NULL && zio->io_logical != NULL) {
 		if (zio->io_logical->io_ena == 0)
 			zio->io_logical->io_ena =
 			    fm_ena_generate(0, FM_ENA_FMT1);
 		ena = zio->io_logical->io_ena;
 	} else {
 		ena = fm_ena_generate(0, FM_ENA_FMT1);
 	}
 
 	/*
 	 * Construct the full class, detector, and other standard FMA fields.
 	 */
 	(void) snprintf(class, sizeof (class), "%s.%s",
 	    ZFS_ERROR_CLASS, subclass);
 
 	fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa),
 	    vd != NULL ? vd->vdev_guid : 0);
 
 	fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL);
 
 	/*
 	 * Construct the per-ereport payload, depending on which parameters are
 	 * passed in.
 	 */
 
 	/*
 	 * Generic payload members common to all ereports.
 	 */
 	fm_payload_set(ereport,
 	    FM_EREPORT_PAYLOAD_ZFS_POOL, DATA_TYPE_STRING, spa_name(spa),
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, DATA_TYPE_UINT64, spa_guid(spa),
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_STATE, DATA_TYPE_UINT64,
 	    (uint64_t)spa_state(spa),
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
 	    (int32_t)spa_load_state(spa), NULL);
 
 	fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
 	    DATA_TYPE_STRING,
 	    spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
 	    FM_EREPORT_FAILMODE_WAIT :
 	    spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
 	    FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
 	    NULL);
 
 	if (vd != NULL) {
 		vdev_t *pvd = vd->vdev_parent;
 		vdev_queue_t *vq = &vd->vdev_queue;
 		vdev_stat_t *vs = &vd->vdev_stat;
 		vdev_t *spare_vd;
 		uint64_t *spare_guids;
 		char **spare_paths;
 		int i, spare_count;
 
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
 		    DATA_TYPE_UINT64, vd->vdev_guid,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
 		    DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL);
 		if (vd->vdev_path != NULL)
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
 			    DATA_TYPE_STRING, vd->vdev_path, NULL);
 		if (vd->vdev_devid != NULL)
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID,
 			    DATA_TYPE_STRING, vd->vdev_devid, NULL);
 		if (vd->vdev_fru != NULL)
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
 			    DATA_TYPE_STRING, vd->vdev_fru, NULL);
 		if (vd->vdev_enc_sysfs_path != NULL)
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
 			    DATA_TYPE_STRING, vd->vdev_enc_sysfs_path, NULL);
 		if (vd->vdev_ashift)
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT,
 			    DATA_TYPE_UINT64, vd->vdev_ashift, NULL);
 
 		if (vq != NULL) {
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS,
 			    DATA_TYPE_UINT64, vq->vq_io_complete_ts, NULL);
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS,
 			    DATA_TYPE_UINT64, vq->vq_io_delta_ts, NULL);
 		}
 
 		if (vs != NULL) {
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS,
 			    DATA_TYPE_UINT64, vs->vs_read_errors,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS,
 			    DATA_TYPE_UINT64, vs->vs_write_errors,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS,
 			    DATA_TYPE_UINT64, vs->vs_checksum_errors,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DELAYS,
 			    DATA_TYPE_UINT64, vs->vs_slow_ios,
 			    NULL);
 		}
 
 		if (pvd != NULL) {
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID,
 			    DATA_TYPE_UINT64, pvd->vdev_guid,
 			    FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE,
 			    DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type,
 			    NULL);
 			if (pvd->vdev_path)
 				fm_payload_set(ereport,
 				    FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH,
 				    DATA_TYPE_STRING, pvd->vdev_path, NULL);
 			if (pvd->vdev_devid)
 				fm_payload_set(ereport,
 				    FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID,
 				    DATA_TYPE_STRING, pvd->vdev_devid, NULL);
 		}
 
 		spare_count = spa->spa_spares.sav_count;
 		spare_paths = kmem_zalloc(sizeof (char *) * spare_count,
 		    KM_SLEEP);
 		spare_guids = kmem_zalloc(sizeof (uint64_t) * spare_count,
 		    KM_SLEEP);
 
 		for (i = 0; i < spare_count; i++) {
 			spare_vd = spa->spa_spares.sav_vdevs[i];
 			if (spare_vd) {
 				spare_paths[i] = spare_vd->vdev_path;
 				spare_guids[i] = spare_vd->vdev_guid;
 			}
 		}
 
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS,
 		    DATA_TYPE_STRING_ARRAY, spare_count, spare_paths,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS,
 		    DATA_TYPE_UINT64_ARRAY, spare_count, spare_guids, NULL);
 
 		kmem_free(spare_guids, sizeof (uint64_t) * spare_count);
 		kmem_free(spare_paths, sizeof (char *) * spare_count);
 	}
 
 	if (zio != NULL) {
 		/*
 		 * Payload common to all I/Os.
 		 */
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR,
 		    DATA_TYPE_INT32, zio->io_error, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS,
 		    DATA_TYPE_INT32, zio->io_flags, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE,
 		    DATA_TYPE_UINT32, zio->io_stage, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE,
 		    DATA_TYPE_UINT32, zio->io_pipeline, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY,
 		    DATA_TYPE_UINT64, zio->io_delay, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP,
 		    DATA_TYPE_UINT64, zio->io_timestamp, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA,
 		    DATA_TYPE_UINT64, zio->io_delta, NULL);
 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PRIORITY,
 		    DATA_TYPE_UINT32, zio->io_priority, NULL);
 
 		/*
 		 * If the 'size' parameter is non-zero, it indicates this is a
 		 * RAID-Z or other I/O where the physical offset and length are
 		 * provided for us, instead of within the zio_t.
 		 */
 		if (vd != NULL) {
 			if (size)
 				fm_payload_set(ereport,
 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 				    DATA_TYPE_UINT64, stateoroffset,
 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 				    DATA_TYPE_UINT64, size, NULL);
 			else
 				fm_payload_set(ereport,
 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET,
 				    DATA_TYPE_UINT64, zio->io_offset,
 				    FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE,
 				    DATA_TYPE_UINT64, zio->io_size, NULL);
 		}
 	} else if (vd != NULL) {
 		/*
 		 * If we have a vdev but no zio, this is a device fault, and the
 		 * 'stateoroffset' parameter indicates the previous state of the
 		 * vdev.
 		 */
 		fm_payload_set(ereport,
 		    FM_EREPORT_PAYLOAD_ZFS_PREV_STATE,
 		    DATA_TYPE_UINT64, stateoroffset, NULL);
 	}
 
 	/*
 	 * Payload for I/Os with corresponding logical information.
 	 */
 	if (zb != NULL && (zio == NULL || zio->io_logical != NULL)) {
 		fm_payload_set(ereport,
 		    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET,
 		    DATA_TYPE_UINT64, zb->zb_objset,
 		    FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT,
 		    DATA_TYPE_UINT64, zb->zb_object,
 		    FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL,
 		    DATA_TYPE_INT64, zb->zb_level,
 		    FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID,
 		    DATA_TYPE_UINT64, zb->zb_blkid, NULL);
 	}
 
 	/*
 	 * Payload for tuning the zed
 	 */
 	if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
 		uint64_t cksum_n, cksum_t;
 
 		cksum_n = vdev_prop_get_inherited(vd, VDEV_PROP_CHECKSUM_N);
 		if (cksum_n != vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_N))
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_N,
 			    DATA_TYPE_UINT64,
 			    cksum_n,
 			    NULL);
 
 		cksum_t = vdev_prop_get_inherited(vd, VDEV_PROP_CHECKSUM_T);
 		if (cksum_t != vdev_prop_default_numeric(VDEV_PROP_CHECKSUM_T))
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_T,
 			    DATA_TYPE_UINT64,
 			    cksum_t,
 			    NULL);
 	}
 
 	if (vd != NULL && strcmp(subclass, FM_EREPORT_ZFS_IO) == 0) {
 		uint64_t io_n, io_t;
 
 		io_n = vdev_prop_get_inherited(vd, VDEV_PROP_IO_N);
 		if (io_n != vdev_prop_default_numeric(VDEV_PROP_IO_N))
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_N,
 			    DATA_TYPE_UINT64,
 			    io_n,
 			    NULL);
 
 		io_t = vdev_prop_get_inherited(vd, VDEV_PROP_IO_T);
 		if (io_t != vdev_prop_default_numeric(VDEV_PROP_IO_T))
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_IO_T,
 			    DATA_TYPE_UINT64,
 			    io_t,
 			    NULL);
 	}
 
 	mutex_exit(&spa->spa_errlist_lock);
 
 	*ereport_out = ereport;
 	*detector_out = detector;
 	return (B_TRUE);
 }
 
 /* if it's <= 128 bytes, save the corruption directly */
 #define	ZFM_MAX_INLINE		(128 / sizeof (uint64_t))
 
 #define	MAX_RANGES		16
 
 typedef struct zfs_ecksum_info {
-	/* histograms of set and cleared bits by bit number in a 64-bit word */
-	uint8_t zei_histogram_set[sizeof (uint64_t) * NBBY];
-	uint8_t zei_histogram_cleared[sizeof (uint64_t) * NBBY];
-
 	/* inline arrays of bits set and cleared. */
 	uint64_t zei_bits_set[ZFM_MAX_INLINE];
 	uint64_t zei_bits_cleared[ZFM_MAX_INLINE];
 
 	/*
 	 * for each range, the number of bits set and cleared.  The Hamming
 	 * distance between the good and bad buffers is the sum of them all.
 	 */
 	uint32_t zei_range_sets[MAX_RANGES];
 	uint32_t zei_range_clears[MAX_RANGES];
 
 	struct zei_ranges {
 		uint32_t	zr_start;
 		uint32_t	zr_end;
 	} zei_ranges[MAX_RANGES];
 
 	size_t	zei_range_count;
 	uint32_t zei_mingap;
 	uint32_t zei_allowed_mingap;
 
 } zfs_ecksum_info_t;
 
 static void
-update_histogram(uint64_t value_arg, uint8_t *hist, uint32_t *count)
+update_bad_bits(uint64_t value_arg, uint32_t *count)
 {
 	size_t i;
 	size_t bits = 0;
 	uint64_t value = BE_64(value_arg);
 
 	/* We store the bits in big-endian (largest-first) order */
 	for (i = 0; i < 64; i++) {
-		if (value & (1ull << i)) {
-			hist[63 - i] = MAX(hist[63 - i], hist[63 - i] + 1);
+		if (value & (1ull << i))
 			++bits;
-		}
 	}
 	/* update the count of bits changed */
 	*count += bits;
 }
 
 /*
  * We've now filled up the range array, and need to increase "mingap" and
  * shrink the range list accordingly.  zei_mingap is always the smallest
  * distance between array entries, so we set the new_allowed_gap to be
  * one greater than that.  We then go through the list, joining together
  * any ranges which are closer than the new_allowed_gap.
  *
  * By construction, there will be at least one.  We also update zei_mingap
  * to the new smallest gap, to prepare for our next invocation.
  */
 static void
 zei_shrink_ranges(zfs_ecksum_info_t *eip)
 {
 	uint32_t mingap = UINT32_MAX;
 	uint32_t new_allowed_gap = eip->zei_mingap + 1;
 
 	size_t idx, output;
 	size_t max = eip->zei_range_count;
 
 	struct zei_ranges *r = eip->zei_ranges;
 
 	ASSERT3U(eip->zei_range_count, >, 0);
 	ASSERT3U(eip->zei_range_count, <=, MAX_RANGES);
 
 	output = idx = 0;
 	while (idx < max - 1) {
 		uint32_t start = r[idx].zr_start;
 		uint32_t end = r[idx].zr_end;
 
 		while (idx < max - 1) {
 			idx++;
 
 			uint32_t nstart = r[idx].zr_start;
 			uint32_t nend = r[idx].zr_end;
 
 			uint32_t gap = nstart - end;
 			if (gap < new_allowed_gap) {
 				end = nend;
 				continue;
 			}
 			if (gap < mingap)
 				mingap = gap;
 			break;
 		}
 		r[output].zr_start = start;
 		r[output].zr_end = end;
 		output++;
 	}
 	ASSERT3U(output, <, eip->zei_range_count);
 	eip->zei_range_count = output;
 	eip->zei_mingap = mingap;
 	eip->zei_allowed_mingap = new_allowed_gap;
 }
 
 static void
 zei_add_range(zfs_ecksum_info_t *eip, int start, int end)
 {
 	struct zei_ranges *r = eip->zei_ranges;
 	size_t count = eip->zei_range_count;
 
 	if (count >= MAX_RANGES) {
 		zei_shrink_ranges(eip);
 		count = eip->zei_range_count;
 	}
 	if (count == 0) {
 		eip->zei_mingap = UINT32_MAX;
 		eip->zei_allowed_mingap = 1;
 	} else {
 		int gap = start - r[count - 1].zr_end;
 
 		if (gap < eip->zei_allowed_mingap) {
 			r[count - 1].zr_end = end;
 			return;
 		}
 		if (gap < eip->zei_mingap)
 			eip->zei_mingap = gap;
 	}
 	r[count].zr_start = start;
 	r[count].zr_end = end;
 	eip->zei_range_count++;
 }
 
 static size_t
 zei_range_total_size(zfs_ecksum_info_t *eip)
 {
 	struct zei_ranges *r = eip->zei_ranges;
 	size_t count = eip->zei_range_count;
 	size_t result = 0;
 	size_t idx;
 
 	for (idx = 0; idx < count; idx++)
 		result += (r[idx].zr_end - r[idx].zr_start);
 
 	return (result);
 }
 
 static zfs_ecksum_info_t *
 annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
     const abd_t *goodabd, const abd_t *badabd, size_t size,
     boolean_t drop_if_identical)
 {
 	const uint64_t *good;
 	const uint64_t *bad;
 
 	size_t nui64s = size / sizeof (uint64_t);
 
 	size_t inline_size;
 	int no_inline = 0;
 	size_t idx;
 	size_t range;
 
 	size_t offset = 0;
 	ssize_t start = -1;
 
 	zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP);
 
 	/* don't do any annotation for injected checksum errors */
 	if (info != NULL && info->zbc_injected)
 		return (eip);
 
 	if (info != NULL && info->zbc_has_cksum) {
 		fm_payload_set(ereport,
 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED,
 		    DATA_TYPE_UINT64_ARRAY,
 		    sizeof (info->zbc_expected) / sizeof (uint64_t),
 		    (uint64_t *)&info->zbc_expected,
 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL,
 		    DATA_TYPE_UINT64_ARRAY,
 		    sizeof (info->zbc_actual) / sizeof (uint64_t),
 		    (uint64_t *)&info->zbc_actual,
 		    FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO,
 		    DATA_TYPE_STRING,
 		    info->zbc_checksum_name,
 		    NULL);
 
 		if (info->zbc_byteswapped) {
 			fm_payload_set(ereport,
 			    FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP,
 			    DATA_TYPE_BOOLEAN, 1,
 			    NULL);
 		}
 	}
 
 	if (badabd == NULL || goodabd == NULL)
 		return (eip);
 
 	ASSERT3U(nui64s, <=, UINT32_MAX);
 	ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(size, <=, UINT32_MAX);
 
 	good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size);
 	bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size);
 
 	/* build up the range list by comparing the two buffers. */
 	for (idx = 0; idx < nui64s; idx++) {
 		if (good[idx] == bad[idx]) {
 			if (start == -1)
 				continue;
 
 			zei_add_range(eip, start, idx);
 			start = -1;
 		} else {
 			if (start != -1)
 				continue;
 
 			start = idx;
 		}
 	}
 	if (start != -1)
 		zei_add_range(eip, start, idx);
 
 	/* See if it will fit in our inline buffers */
 	inline_size = zei_range_total_size(eip);
 	if (inline_size > ZFM_MAX_INLINE)
 		no_inline = 1;
 
 	/*
 	 * If there is no change and we want to drop if the buffers are
 	 * identical, do so.
 	 */
 	if (inline_size == 0 && drop_if_identical) {
 		kmem_free(eip, sizeof (*eip));
 		abd_return_buf((abd_t *)goodabd, (void *)good, size);
 		abd_return_buf((abd_t *)badabd, (void *)bad, size);
 		return (NULL);
 	}
 
 	/*
 	 * Now walk through the ranges, filling in the details of the
 	 * differences.  Also convert our uint64_t-array offsets to byte
 	 * offsets.
 	 */
 	for (range = 0; range < eip->zei_range_count; range++) {
 		size_t start = eip->zei_ranges[range].zr_start;
 		size_t end = eip->zei_ranges[range].zr_end;
 
 		for (idx = start; idx < end; idx++) {
 			uint64_t set, cleared;
 
 			// bits set in bad, but not in good
 			set = ((~good[idx]) & bad[idx]);
 			// bits set in good, but not in bad
 			cleared = (good[idx] & (~bad[idx]));
 
 			if (!no_inline) {
 				ASSERT3U(offset, <, inline_size);
 				eip->zei_bits_set[offset] = set;
 				eip->zei_bits_cleared[offset] = cleared;
 				offset++;
 			}
 
-			update_histogram(set, eip->zei_histogram_set,
-			    &eip->zei_range_sets[range]);
-			update_histogram(cleared, eip->zei_histogram_cleared,
-			    &eip->zei_range_clears[range]);
+			update_bad_bits(set, &eip->zei_range_sets[range]);
+			update_bad_bits(cleared, &eip->zei_range_clears[range]);
 		}
 
 		/* convert to byte offsets */
 		eip->zei_ranges[range].zr_start	*= sizeof (uint64_t);
 		eip->zei_ranges[range].zr_end	*= sizeof (uint64_t);
 	}
 
 	abd_return_buf((abd_t *)goodabd, (void *)good, size);
 	abd_return_buf((abd_t *)badabd, (void *)bad, size);
 
 	eip->zei_allowed_mingap	*= sizeof (uint64_t);
 	inline_size		*= sizeof (uint64_t);
 
 	/* fill in ereport */
 	fm_payload_set(ereport,
 	    FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES,
 	    DATA_TYPE_UINT32_ARRAY, 2 * eip->zei_range_count,
 	    (uint32_t *)eip->zei_ranges,
 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP,
 	    DATA_TYPE_UINT32, eip->zei_allowed_mingap,
 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS,
 	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_sets,
 	    FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS,
 	    DATA_TYPE_UINT32_ARRAY, eip->zei_range_count, eip->zei_range_clears,
 	    NULL);
 
 	if (!no_inline) {
 		fm_payload_set(ereport,
 		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS,
 		    DATA_TYPE_UINT8_ARRAY,
 		    inline_size, (uint8_t *)eip->zei_bits_set,
 		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS,
 		    DATA_TYPE_UINT8_ARRAY,
 		    inline_size, (uint8_t *)eip->zei_bits_cleared,
 		    NULL);
-	} else {
-		fm_payload_set(ereport,
-		    FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM,
-		    DATA_TYPE_UINT8_ARRAY,
-		    NBBY * sizeof (uint64_t), eip->zei_histogram_set,
-		    FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM,
-		    DATA_TYPE_UINT8_ARRAY,
-		    NBBY * sizeof (uint64_t), eip->zei_histogram_cleared,
-		    NULL);
 	}
 	return (eip);
 }
 #else
 void
 zfs_ereport_clear(spa_t *spa, vdev_t *vd)
 {
 	(void) spa, (void) vd;
 }
 #endif
 
 /*
  * Make sure our event is still valid for the given zio/vdev/pool.  For example,
  * we don't want to keep logging events for a faulted or missing vdev.
  */
 boolean_t
 zfs_ereport_is_valid(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio)
 {
 #ifdef _KERNEL
 	/*
 	 * If we are doing a spa_tryimport() or in recovery mode,
 	 * ignore errors.
 	 */
 	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT ||
 	    spa_load_state(spa) == SPA_LOAD_RECOVER)
 		return (B_FALSE);
 
 	/*
 	 * If we are in the middle of opening a pool, and the previous attempt
 	 * failed, don't bother logging any new ereports - we're just going to
 	 * get the same diagnosis anyway.
 	 */
 	if (spa_load_state(spa) != SPA_LOAD_NONE &&
 	    spa->spa_last_open_failed)
 		return (B_FALSE);
 
 	if (zio != NULL) {
 		/*
 		 * If this is not a read or write zio, ignore the error.  This
 		 * can occur if the DKIOCFLUSHWRITECACHE ioctl fails.
 		 */
 		if (zio->io_type != ZIO_TYPE_READ &&
 		    zio->io_type != ZIO_TYPE_WRITE)
 			return (B_FALSE);
 
 		if (vd != NULL) {
 			/*
 			 * If the vdev has already been marked as failing due
 			 * to a failed probe, then ignore any subsequent I/O
 			 * errors, as the DE will automatically fault the vdev
 			 * on the first such failure.  This also catches cases
 			 * where vdev_remove_wanted is set and the device has
 			 * not yet been asynchronously placed into the REMOVED
 			 * state.
 			 */
 			if (zio->io_vd == vd && !vdev_accessible(vd, zio))
 				return (B_FALSE);
 
 			/*
 			 * Ignore checksum errors for reads from DTL regions of
 			 * leaf vdevs.
 			 */
 			if (zio->io_type == ZIO_TYPE_READ &&
 			    zio->io_error == ECKSUM &&
 			    vd->vdev_ops->vdev_op_leaf &&
 			    vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1))
 				return (B_FALSE);
 		}
 	}
 
 	/*
 	 * For probe failure, we want to avoid posting ereports if we've
 	 * already removed the device in the meantime.
 	 */
 	if (vd != NULL &&
 	    strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) == 0 &&
 	    (vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED))
 		return (B_FALSE);
 
 	/* Ignore bogus delay events (like from ioctls or unqueued IOs) */
 	if ((strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) &&
 	    (zio != NULL) && (!zio->io_timestamp)) {
 		return (B_FALSE);
 	}
 #else
 	(void) subclass, (void) spa, (void) vd, (void) zio;
 #endif
 	return (B_TRUE);
 }
 
 /*
  * Post an ereport for the given subclass
  *
  * Returns
  * - 0 if an event was posted
  * - EINVAL if there was a problem posting event
  * - EBUSY if the event was rate limited
  * - EALREADY if the event was already posted (duplicate)
  */
 int
 zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
     const zbookmark_phys_t *zb, zio_t *zio, uint64_t state)
 {
 	int rc = 0;
 #ifdef _KERNEL
 	nvlist_t *ereport = NULL;
 	nvlist_t *detector = NULL;
 
 	if (!zfs_ereport_is_valid(subclass, spa, vd, zio))
 		return (EINVAL);
 
 	if (zfs_ereport_is_duplicate(subclass, spa, vd, zb, zio, 0, 0))
 		return (SET_ERROR(EALREADY));
 
 	if (zfs_is_ratelimiting_event(subclass, vd))
 		return (SET_ERROR(EBUSY));
 
 	if (!zfs_ereport_start(&ereport, &detector, subclass, spa, vd,
 	    zb, zio, state, 0))
 		return (SET_ERROR(EINVAL));	/* couldn't post event */
 
 	if (ereport == NULL)
 		return (SET_ERROR(EINVAL));
 
 	/* Cleanup is handled by the callback function */
 	rc = zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
 #else
 	(void) subclass, (void) spa, (void) vd, (void) zb, (void) zio,
 	    (void) state;
 #endif
 	return (rc);
 }
 
 /*
  * Prepare a checksum ereport
  *
  * Returns
  * - 0 if an event was posted
  * - EINVAL if there was a problem posting event
  * - EBUSY if the event was rate limited
  * - EALREADY if the event was already posted (duplicate)
  */
 int
 zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     struct zio *zio, uint64_t offset, uint64_t length, zio_bad_cksum_t *info)
 {
 	zio_cksum_report_t *report;
 
 #ifdef _KERNEL
 	if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
 		return (SET_ERROR(EINVAL));
 
 	if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
 	    offset, length))
 		return (SET_ERROR(EALREADY));
 
 	if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
 		return (SET_ERROR(EBUSY));
 #else
 	(void) zb, (void) offset;
 #endif
 
 	report = kmem_zalloc(sizeof (*report), KM_SLEEP);
 
 	zio_vsd_default_cksum_report(zio, report);
 
 	/* copy the checksum failure information if it was provided */
 	if (info != NULL) {
 		report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP);
 		memcpy(report->zcr_ckinfo, info, sizeof (*info));
 	}
 
 	report->zcr_sector = 1ULL << vd->vdev_top->vdev_ashift;
 	report->zcr_align =
 	    vdev_psize_to_asize(vd->vdev_top, report->zcr_sector);
 	report->zcr_length = length;
 
 #ifdef _KERNEL
 	(void) zfs_ereport_start(&report->zcr_ereport, &report->zcr_detector,
 	    FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio, offset, length);
 
 	if (report->zcr_ereport == NULL) {
 		zfs_ereport_free_checksum(report);
 		return (0);
 	}
 #endif
 
 	mutex_enter(&spa->spa_errlist_lock);
 	report->zcr_next = zio->io_logical->io_cksum_report;
 	zio->io_logical->io_cksum_report = report;
 	mutex_exit(&spa->spa_errlist_lock);
 	return (0);
 }
 
 void
 zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data,
     const abd_t *bad_data, boolean_t drop_if_identical)
 {
 #ifdef _KERNEL
 	zfs_ecksum_info_t *info;
 
 	info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo,
 	    good_data, bad_data, report->zcr_length, drop_if_identical);
 	if (info != NULL)
 		zfs_zevent_post(report->zcr_ereport,
 		    report->zcr_detector, zfs_zevent_post_cb);
 	else
 		zfs_zevent_post_cb(report->zcr_ereport, report->zcr_detector);
 
 	report->zcr_ereport = report->zcr_detector = NULL;
 	if (info != NULL)
 		kmem_free(info, sizeof (*info));
 #else
 	(void) report, (void) good_data, (void) bad_data,
 	    (void) drop_if_identical;
 #endif
 }
 
 void
 zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
 {
 #ifdef _KERNEL
 	if (rpt->zcr_ereport != NULL) {
 		fm_nvlist_destroy(rpt->zcr_ereport,
 		    FM_NVA_FREE);
 		fm_nvlist_destroy(rpt->zcr_detector,
 		    FM_NVA_FREE);
 	}
 #endif
 	rpt->zcr_free(rpt->zcr_cbdata, rpt->zcr_cbinfo);
 
 	if (rpt->zcr_ckinfo != NULL)
 		kmem_free(rpt->zcr_ckinfo, sizeof (*rpt->zcr_ckinfo));
 
 	kmem_free(rpt, sizeof (*rpt));
 }
 
 /*
  * Post a checksum ereport
  *
  * Returns
  * - 0 if an event was posted
  * - EINVAL if there was a problem posting event
  * - EBUSY if the event was rate limited
  * - EALREADY if the event was already posted (duplicate)
  */
 int
 zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
     struct zio *zio, uint64_t offset, uint64_t length,
     const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
 {
 	int rc = 0;
 #ifdef _KERNEL
 	nvlist_t *ereport = NULL;
 	nvlist_t *detector = NULL;
 	zfs_ecksum_info_t *info;
 
 	if (!zfs_ereport_is_valid(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio))
 		return (SET_ERROR(EINVAL));
 
 	if (zfs_ereport_is_duplicate(FM_EREPORT_ZFS_CHECKSUM, spa, vd, zb, zio,
 	    offset, length))
 		return (SET_ERROR(EALREADY));
 
 	if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
 		return (SET_ERROR(EBUSY));
 
 	if (!zfs_ereport_start(&ereport, &detector, FM_EREPORT_ZFS_CHECKSUM,
 	    spa, vd, zb, zio, offset, length) || (ereport == NULL)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
 	    B_FALSE);
 
 	if (info != NULL) {
 		rc = zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
 		kmem_free(info, sizeof (*info));
 	}
 #else
 	(void) spa, (void) vd, (void) zb, (void) zio, (void) offset,
 	    (void) length, (void) good_data, (void) bad_data, (void) zbc;
 #endif
 	return (rc);
 }
 
 /*
  * The 'sysevent.fs.zfs.*' events are signals posted to notify user space of
  * change in the pool.  All sysevents are listed in sys/sysevent/eventdefs.h
  * and are designed to be consumed by the ZFS Event Daemon (ZED).  For
  * additional details refer to the zed(8) man page.
  */
 nvlist_t *
 zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, const char *name,
     nvlist_t *aux)
 {
 	nvlist_t *resource = NULL;
 #ifdef _KERNEL
 	char class[64];
 
 	if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
 		return (NULL);
 
 	if ((resource = fm_nvlist_create(NULL)) == NULL)
 		return (NULL);
 
 	(void) snprintf(class, sizeof (class), "%s.%s.%s", type,
 	    ZFS_ERROR_CLASS, name);
 	VERIFY0(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION));
 	VERIFY0(nvlist_add_string(resource, FM_CLASS, class));
 	VERIFY0(nvlist_add_string(resource,
 	    FM_EREPORT_PAYLOAD_ZFS_POOL, spa_name(spa)));
 	VERIFY0(nvlist_add_uint64(resource,
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)));
 	VERIFY0(nvlist_add_uint64(resource,
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_STATE, spa_state(spa)));
 	VERIFY0(nvlist_add_int32(resource,
 	    FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, spa_load_state(spa)));
 
 	if (vd) {
 		VERIFY0(nvlist_add_uint64(resource,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid));
 		VERIFY0(nvlist_add_uint64(resource,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state));
 		if (vd->vdev_path != NULL)
 			VERIFY0(nvlist_add_string(resource,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, vd->vdev_path));
 		if (vd->vdev_devid != NULL)
 			VERIFY0(nvlist_add_string(resource,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, vd->vdev_devid));
 		if (vd->vdev_fru != NULL)
 			VERIFY0(nvlist_add_string(resource,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, vd->vdev_fru));
 		if (vd->vdev_enc_sysfs_path != NULL)
 			VERIFY0(nvlist_add_string(resource,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
 			    vd->vdev_enc_sysfs_path));
 	}
 
 	/* also copy any optional payload data */
 	if (aux) {
 		nvpair_t *elem = NULL;
 
 		while ((elem = nvlist_next_nvpair(aux, elem)) != NULL)
 			(void) nvlist_add_nvpair(resource, elem);
 	}
 #else
 	(void) spa, (void) vd, (void) type, (void) name, (void) aux;
 #endif
 	return (resource);
 }
 
 static void
 zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name,
     nvlist_t *aux)
 {
 #ifdef _KERNEL
 	nvlist_t *resource;
 
 	resource = zfs_event_create(spa, vd, type, name, aux);
 	if (resource)
 		zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
 #else
 	(void) spa, (void) vd, (void) type, (void) name, (void) aux;
 #endif
 }
 
 /*
  * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev
  * has been removed from the system.  This will cause the DE to ignore any
  * recent I/O errors, inferring that they are due to the asynchronous device
  * removal.
  */
 void
 zfs_post_remove(spa_t *spa, vdev_t *vd)
 {
 	zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL);
 }
 
 /*
  * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool
  * has the 'autoreplace' property set, and therefore any broken vdevs will be
  * handled by higher level logic, and no vdev fault should be generated.
  */
 void
 zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
 {
 	zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_AUTOREPLACE, NULL);
 }
 
 /*
  * The 'resource.fs.zfs.statechange' event is an internal signal that the
  * given vdev has transitioned its state to DEGRADED or HEALTHY.  This will
  * cause the retire agent to repair any outstanding fault management cases
  * open because the device was not found (fault.fs.zfs.device).
  */
 void
 zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate)
 {
 #ifdef _KERNEL
 	nvlist_t *aux;
 
 	/*
 	 * Add optional supplemental keys to payload
 	 */
 	aux = fm_nvlist_create(NULL);
 	if (vd && aux) {
 		if (vd->vdev_physpath) {
 			fnvlist_add_string(aux,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH,
 			    vd->vdev_physpath);
 		}
 		if (vd->vdev_enc_sysfs_path) {
 			fnvlist_add_string(aux,
 			    FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
 			    vd->vdev_enc_sysfs_path);
 		}
 
 		fnvlist_add_uint64(aux,
 		    FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE, laststate);
 	}
 
 	zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_STATECHANGE,
 	    aux);
 
 	if (aux)
 		fm_nvlist_destroy(aux, FM_NVA_FREE);
 #else
 	(void) spa, (void) vd, (void) laststate;
 #endif
 }
 
 #ifdef _KERNEL
 void
 zfs_ereport_init(void)
 {
 	mutex_init(&recent_events_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&recent_events_list, sizeof (recent_events_node_t),
 	    offsetof(recent_events_node_t, re_list_link));
 	avl_create(&recent_events_tree,  recent_events_compare,
 	    sizeof (recent_events_node_t), offsetof(recent_events_node_t,
 	    re_tree_link));
 }
 
 /*
  * This 'early' fini needs to run before zfs_fini() which on Linux waits
  * for the system_delay_taskq to drain.
  */
 void
 zfs_ereport_taskq_fini(void)
 {
 	mutex_enter(&recent_events_lock);
 	if (recent_events_cleaner_tqid != 0) {
 		taskq_cancel_id(system_delay_taskq, recent_events_cleaner_tqid);
 		recent_events_cleaner_tqid = 0;
 	}
 	mutex_exit(&recent_events_lock);
 }
 
 void
 zfs_ereport_fini(void)
 {
 	recent_events_node_t *entry;
 
 	while ((entry = list_remove_head(&recent_events_list)) != NULL) {
 		avl_remove(&recent_events_tree, entry);
 		kmem_free(entry, sizeof (*entry));
 	}
 	avl_destroy(&recent_events_tree);
 	list_destroy(&recent_events_list);
 	mutex_destroy(&recent_events_lock);
 }
 
 void
 zfs_ereport_snapshot_post(const char *subclass, spa_t *spa, const char *name)
 {
 	nvlist_t *aux;
 
 	aux = fm_nvlist_create(NULL);
 	fnvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_SNAPSHOT_NAME, name);
 
 	zfs_post_common(spa, NULL, FM_RSRC_CLASS, subclass, aux);
 	fm_nvlist_destroy(aux, FM_NVA_FREE);
 }
 
 /*
  * Post when a event when a zvol is created or removed
  *
  * This is currently only used by macOS, since it uses the event to create
  * symlinks between the volume name (mypool/myvol) and the actual /dev
  * device (/dev/disk3).  For example:
  *
  * /var/run/zfs/dsk/mypool/myvol -> /dev/disk3
  *
  * name: The full name of the zvol ("mypool/myvol")
  * dev_name: The full /dev name for the zvol ("/dev/disk3")
  * raw_name: The raw  /dev name for the zvol ("/dev/rdisk3")
  */
 void
 zfs_ereport_zvol_post(const char *subclass, const char *name,
     const char *dev_name, const char *raw_name)
 {
 	nvlist_t *aux;
 	char *r;
 
 	boolean_t locked = mutex_owned(&spa_namespace_lock);
 	if (!locked) mutex_enter(&spa_namespace_lock);
 	spa_t *spa = spa_lookup(name);
 	if (!locked) mutex_exit(&spa_namespace_lock);
 
 	if (spa == NULL)
 		return;
 
 	aux = fm_nvlist_create(NULL);
 	fnvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_DEVICE_NAME, dev_name);
 	fnvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_RAW_DEVICE_NAME,
 	    raw_name);
 	r = strchr(name, '/');
 	if (r && r[1])
 		fnvlist_add_string(aux, FM_EREPORT_PAYLOAD_ZFS_VOLUME, &r[1]);
 
 	zfs_post_common(spa, NULL, FM_RSRC_CLASS, subclass, aux);
 	fm_nvlist_destroy(aux, FM_NVA_FREE);
 }
 
 EXPORT_SYMBOL(zfs_ereport_post);
 EXPORT_SYMBOL(zfs_ereport_is_valid);
 EXPORT_SYMBOL(zfs_ereport_post_checksum);
 EXPORT_SYMBOL(zfs_post_remove);
 EXPORT_SYMBOL(zfs_post_autoreplace);
 EXPORT_SYMBOL(zfs_post_state_change);
 
 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_max, UINT, ZMOD_RW,
 	"Maximum recent zevents records to retain for duplicate checking");
 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, retain_expire_secs, UINT, ZMOD_RW,
 	"Expiration time for recent zevents records");
 #endif /* _KERNEL */