diff --git a/cmd/zed/agents/fmd_api.c b/cmd/zed/agents/fmd_api.c
index 9e46e831d517..56c134b731b8 100644
--- a/cmd/zed/agents/fmd_api.c
+++ b/cmd/zed/agents/fmd_api.c
@@ -1,780 +1,780 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  *
  * Copyright (c) 2016, Intel Corporation.
  */
 
 /*
  * This file implements the minimal FMD module API required to support the
  * fault logic modules in ZED. This support includes module registration,
  * memory allocation, module property accessors, basic case management,
  * one-shot timers and SERD engines.
  *
  * In the ZED runtime, the modules are called from a single thread so no
  * locking is required in this emulated FMD environment.
  */
 
 #include <sys/types.h>
 #include <sys/fm/protocol.h>
 #include <uuid/uuid.h>
 #include <signal.h>
 #include <string.h>
 #include <time.h>
 
 #include "fmd_api.h"
 #include "fmd_serd.h"
 
 #include "zfs_agents.h"
 #include "../zed_log.h"
 
 typedef struct fmd_modstat {
 	fmd_stat_t	ms_accepted;	/* total events accepted by module */
 	fmd_stat_t	ms_caseopen;	/* cases currently open */
 	fmd_stat_t	ms_casesolved;	/* total cases solved by module */
 	fmd_stat_t	ms_caseclosed;	/* total cases closed by module */
 } fmd_modstat_t;
 
 typedef struct fmd_module {
 	const char	*mod_name;	/* basename of module (ro) */
 	const fmd_hdl_info_t *mod_info;	/* module info registered with handle */
 	void		*mod_spec;	/* fmd_hdl_get/setspecific data value */
 	fmd_stat_t	*mod_ustat;	/* module specific custom stats */
 	uint_t		mod_ustat_cnt;	/* count of ustat stats */
 	fmd_modstat_t	mod_stats;	/* fmd built-in per-module statistics */
 	fmd_serd_hash_t	mod_serds;	/* hash of serd engs owned by module */
 	char		*mod_vers;	/* a copy of module version string */
 } fmd_module_t;
 
 /*
  * ZED has two FMD hardwired module instances
  */
 fmd_module_t	zfs_retire_module;
 fmd_module_t	zfs_diagnosis_module;
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
  */
 
 #ifdef DEBUG
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 #endif
 
 /*
  * Register a module with fmd and finish module initialization.
  * Returns an integer indicating whether it succeeded (zero) or
  * failed (non-zero).
  */
 int
 fmd_hdl_register(fmd_hdl_t *hdl, int version, const fmd_hdl_info_t *mip)
 {
 	(void) version;
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	mp->mod_info = mip;
 	mp->mod_name = mip->fmdi_desc + 4;	/* drop 'ZFS ' prefix */
 	mp->mod_spec = NULL;
 
 	/* bare minimum module stats */
 	(void) strcpy(mp->mod_stats.ms_accepted.fmds_name, "fmd.accepted");
 	(void) strcpy(mp->mod_stats.ms_caseopen.fmds_name, "fmd.caseopen");
 	(void) strcpy(mp->mod_stats.ms_casesolved.fmds_name, "fmd.casesolved");
 	(void) strcpy(mp->mod_stats.ms_caseclosed.fmds_name, "fmd.caseclosed");
 
 	fmd_serd_hash_create(&mp->mod_serds);
 
 	fmd_hdl_debug(hdl, "register module");
 
 	return (0);
 }
 
 void
 fmd_hdl_unregister(fmd_hdl_t *hdl)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	fmd_modstat_t *msp = &mp->mod_stats;
 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
 
 	/* dump generic module stats */
 	fmd_hdl_debug(hdl, "%s: %llu", msp->ms_accepted.fmds_name,
 	    msp->ms_accepted.fmds_value.ui64);
 	if (ops->fmdo_close != NULL) {
 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseopen.fmds_name,
 		    msp->ms_caseopen.fmds_value.ui64);
 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_casesolved.fmds_name,
 		    msp->ms_casesolved.fmds_value.ui64);
 		fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseclosed.fmds_name,
 		    msp->ms_caseclosed.fmds_value.ui64);
 	}
 
 	/* dump module specific stats */
 	if (mp->mod_ustat != NULL) {
 		int i;
 
 		for (i = 0; i < mp->mod_ustat_cnt; i++) {
 			fmd_hdl_debug(hdl, "%s: %llu",
 			    mp->mod_ustat[i].fmds_name,
 			    mp->mod_ustat[i].fmds_value.ui64);
 		}
 	}
 
 	fmd_serd_hash_destroy(&mp->mod_serds);
 
 	fmd_hdl_debug(hdl, "unregister module");
 }
 
 /*
  * fmd_hdl_setspecific() is used to associate a data pointer with
  * the specified handle for the duration of the module's lifetime.
  * This pointer can be retrieved using fmd_hdl_getspecific().
  */
 void
 fmd_hdl_setspecific(fmd_hdl_t *hdl, void *spec)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	mp->mod_spec = spec;
 }
 
 /*
  * Return the module-specific data pointer previously associated
  * with the handle using fmd_hdl_setspecific().
  */
 void *
 fmd_hdl_getspecific(fmd_hdl_t *hdl)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	return (mp->mod_spec);
 }
 
 void *
 fmd_hdl_alloc(fmd_hdl_t *hdl, size_t size, int flags)
 {
 	(void) hdl;
 	return (umem_alloc(size, flags));
 }
 
 void *
 fmd_hdl_zalloc(fmd_hdl_t *hdl, size_t size, int flags)
 {
 	(void) hdl;
 	return (umem_zalloc(size, flags));
 }
 
 void
 fmd_hdl_free(fmd_hdl_t *hdl, void *data, size_t size)
 {
 	(void) hdl;
 	umem_free(data, size);
 }
 
 /*
  * Record a module debug message using the specified format.
  */
 void
 fmd_hdl_debug(fmd_hdl_t *hdl, const char *format, ...)
 {
 	char message[256];
 	va_list vargs;
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	va_start(vargs, format);
 	(void) vsnprintf(message, sizeof (message), format, vargs);
 	va_end(vargs);
 
 	/* prefix message with module name */
 	zed_log_msg(LOG_INFO, "%s: %s", mp->mod_name, message);
 }
 
 /* Property Retrieval */
 
 int32_t
 fmd_prop_get_int32(fmd_hdl_t *hdl, const char *name)
 {
 	(void) hdl;
 
 	/*
 	 * These can be looked up in mp->modinfo->fmdi_props
 	 * For now we just hard code for phase 2. In the
 	 * future, there can be a ZED based override.
 	 */
 	if (strcmp(name, "spare_on_remove") == 0)
 		return (1);
 
 	if (strcmp(name, "io_N") == 0 || strcmp(name, "checksum_N") == 0)
 		return (10);	/* N = 10 events */
 
 	return (0);
 }
 
 int64_t
 fmd_prop_get_int64(fmd_hdl_t *hdl, const char *name)
 {
 	(void) hdl;
 
 	/*
 	 * These can be looked up in mp->modinfo->fmdi_props
 	 * For now we just hard code for phase 2. In the
 	 * future, there can be a ZED based override.
 	 */
 	if (strcmp(name, "remove_timeout") == 0)
 		return (15ULL * 1000ULL * 1000ULL * 1000ULL);	/* 15 sec */
 
 	if (strcmp(name, "io_T") == 0 || strcmp(name, "checksum_T") == 0)
 		return (1000ULL * 1000ULL * 1000ULL * 600ULL);	/* 10 min */
 
 	return (0);
 }
 
 /* FMD Statistics */
 
 fmd_stat_t *
 fmd_stat_create(fmd_hdl_t *hdl, uint_t flags, uint_t nstats, fmd_stat_t *statv)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	if (flags == FMD_STAT_NOALLOC) {
 		mp->mod_ustat = statv;
 		mp->mod_ustat_cnt = nstats;
 	}
 
 	return (statv);
 }
 
 /* Case Management */
 
 fmd_case_t *
 fmd_case_open(fmd_hdl_t *hdl, void *data)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	uuid_t uuid;
 
 	fmd_case_t *cp;
 
 	cp = fmd_hdl_zalloc(hdl, sizeof (fmd_case_t), FMD_SLEEP);
 	cp->ci_mod = hdl;
 	cp->ci_state = FMD_CASE_UNSOLVED;
 	cp->ci_flags = FMD_CF_DIRTY;
 	cp->ci_data = data;
 	cp->ci_bufptr = NULL;
 	cp->ci_bufsiz = 0;
 
 	uuid_generate(uuid);
 	uuid_unparse(uuid, cp->ci_uuid);
 
 	fmd_hdl_debug(hdl, "case opened (%s)", cp->ci_uuid);
 	mp->mod_stats.ms_caseopen.fmds_value.ui64++;
 
 	return (cp);
 }
 
 void
 fmd_case_solve(fmd_hdl_t *hdl, fmd_case_t *cp)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	/*
 	 * For ZED, the event was already sent from fmd_case_add_suspect()
 	 */
 
 	if (cp->ci_state >= FMD_CASE_SOLVED)
 		fmd_hdl_debug(hdl, "case is already solved or closed");
 
 	cp->ci_state = FMD_CASE_SOLVED;
 
 	fmd_hdl_debug(hdl, "case solved (%s)", cp->ci_uuid);
 	mp->mod_stats.ms_casesolved.fmds_value.ui64++;
 }
 
 void
 fmd_case_close(fmd_hdl_t *hdl, fmd_case_t *cp)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
 
 	fmd_hdl_debug(hdl, "case closed (%s)", cp->ci_uuid);
 
 	if (ops->fmdo_close != NULL)
 		ops->fmdo_close(hdl, cp);
 
 	mp->mod_stats.ms_caseopen.fmds_value.ui64--;
 	mp->mod_stats.ms_caseclosed.fmds_value.ui64++;
 
 	if (cp->ci_bufptr != NULL && cp->ci_bufsiz > 0)
 		fmd_hdl_free(hdl, cp->ci_bufptr, cp->ci_bufsiz);
 
 	fmd_hdl_free(hdl, cp, sizeof (fmd_case_t));
 }
 
 void
 fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid)
 {
 	fmd_hdl_debug(hdl, "case resolved by uuid (%s)", uuid);
 }
 
 boolean_t
 fmd_case_solved(fmd_hdl_t *hdl, fmd_case_t *cp)
 {
 	(void) hdl;
 	return (cp->ci_state >= FMD_CASE_SOLVED);
 }
 
 void
 fmd_case_add_ereport(fmd_hdl_t *hdl, fmd_case_t *cp, fmd_event_t *ep)
 {
 	(void) hdl, (void) cp, (void) ep;
 }
 
 static void
 zed_log_fault(nvlist_t *nvl, const char *uuid, const char *code)
 {
 	nvlist_t *rsrc;
 	char *strval;
 	uint64_t guid;
 	uint8_t byte;
 
 	zed_log_msg(LOG_INFO, "\nzed_fault_event:");
 
 	if (uuid != NULL)
 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_UUID, uuid);
 	if (nvlist_lookup_string(nvl, FM_CLASS, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_CLASS, strval);
 	if (code != NULL)
 		zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_DIAG_CODE, code);
 	if (nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &byte) == 0)
-		zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FAULT_CERTAINTY, byte);
+		zed_log_msg(LOG_INFO, "\t%s: %hhu", FM_FAULT_CERTAINTY, byte);
 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
 		if (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &strval) == 0)
 			zed_log_msg(LOG_INFO, "\t%s: %s", FM_FMRI_SCHEME,
 			    strval);
 		if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_POOL, &guid) == 0)
 			zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FMRI_ZFS_POOL,
 			    guid);
 		if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_VDEV, &guid) == 0)
 			zed_log_msg(LOG_INFO, "\t%s: %llu \n", FM_FMRI_ZFS_VDEV,
 			    guid);
 	}
 }
 
 static const char *
 fmd_fault_mkcode(nvlist_t *fault)
 {
 	char *class;
 	const char *code = "-";
 
 	/*
 	 * Note: message codes come from: openzfs/usr/src/cmd/fm/dicts/ZFS.po
 	 */
 	if (nvlist_lookup_string(fault, FM_CLASS, &class) == 0) {
 		if (strcmp(class, "fault.fs.zfs.vdev.io") == 0)
 			code = "ZFS-8000-FD";
 		else if (strcmp(class, "fault.fs.zfs.vdev.checksum") == 0)
 			code = "ZFS-8000-GH";
 		else if (strcmp(class, "fault.fs.zfs.io_failure_wait") == 0)
 			code = "ZFS-8000-HC";
 		else if (strcmp(class, "fault.fs.zfs.io_failure_continue") == 0)
 			code = "ZFS-8000-JQ";
 		else if (strcmp(class, "fault.fs.zfs.log_replay") == 0)
 			code = "ZFS-8000-K4";
 		else if (strcmp(class, "fault.fs.zfs.pool") == 0)
 			code = "ZFS-8000-CS";
 		else if (strcmp(class, "fault.fs.zfs.device") == 0)
 			code = "ZFS-8000-D3";
 
 	}
 	return (code);
 }
 
 void
 fmd_case_add_suspect(fmd_hdl_t *hdl, fmd_case_t *cp, nvlist_t *fault)
 {
 	nvlist_t *nvl;
 	const char *code = fmd_fault_mkcode(fault);
 	int64_t tod[2];
 	int err = 0;
 
 	/*
 	 * payload derived from fmd_protocol_list()
 	 */
 
 	(void) gettimeofday(&cp->ci_tv, NULL);
 	tod[0] = cp->ci_tv.tv_sec;
 	tod[1] = cp->ci_tv.tv_usec;
 
 	nvl = fmd_nvl_alloc(hdl, FMD_SLEEP);
 
 	err |= nvlist_add_uint8(nvl, FM_VERSION, FM_SUSPECT_VERSION);
 	err |= nvlist_add_string(nvl, FM_CLASS, FM_LIST_SUSPECT_CLASS);
 	err |= nvlist_add_string(nvl, FM_SUSPECT_UUID, cp->ci_uuid);
 	err |= nvlist_add_string(nvl, FM_SUSPECT_DIAG_CODE, code);
 	err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
 	err |= nvlist_add_uint32(nvl, FM_SUSPECT_FAULT_SZ, 1);
 	err |= nvlist_add_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
 	    (const nvlist_t **)&fault, 1);
 
 	if (err)
 		zed_log_die("failed to populate nvlist");
 
 	zed_log_fault(fault, cp->ci_uuid, code);
 	zfs_agent_post_event(FM_LIST_SUSPECT_CLASS, NULL, nvl);
 
 	nvlist_free(nvl);
 	nvlist_free(fault);
 }
 
 void
 fmd_case_setspecific(fmd_hdl_t *hdl, fmd_case_t *cp, void *data)
 {
 	(void) hdl;
 	cp->ci_data = data;
 }
 
 void *
 fmd_case_getspecific(fmd_hdl_t *hdl, fmd_case_t *cp)
 {
 	(void) hdl;
 	return (cp->ci_data);
 }
 
 void
 fmd_buf_create(fmd_hdl_t *hdl, fmd_case_t *cp, const char *name, size_t size)
 {
 	assert(strcmp(name, "data") == 0), (void) name;
 	assert(cp->ci_bufptr == NULL);
 	assert(size < (1024 * 1024));
 
 	cp->ci_bufptr = fmd_hdl_alloc(hdl, size, FMD_SLEEP);
 	cp->ci_bufsiz = size;
 }
 
 void
 fmd_buf_read(fmd_hdl_t *hdl, fmd_case_t *cp,
     const char *name, void *buf, size_t size)
 {
 	(void) hdl;
 	assert(strcmp(name, "data") == 0), (void) name;
 	assert(cp->ci_bufptr != NULL);
 	assert(size <= cp->ci_bufsiz);
 
 	memcpy(buf, cp->ci_bufptr, size);
 }
 
 void
 fmd_buf_write(fmd_hdl_t *hdl, fmd_case_t *cp,
     const char *name, const void *buf, size_t size)
 {
 	(void) hdl;
 	assert(strcmp(name, "data") == 0), (void) name;
 	assert(cp->ci_bufptr != NULL);
 	assert(cp->ci_bufsiz >= size);
 
 	memcpy(cp->ci_bufptr, buf, size);
 }
 
 /* SERD Engines */
 
 void
 fmd_serd_create(fmd_hdl_t *hdl, const char *name, uint_t n, hrtime_t t)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	if (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL) {
 		zed_log_msg(LOG_ERR, "failed to create SERD engine '%s': "
 		    " name already exists", name);
 		return;
 	}
 
 	(void) fmd_serd_eng_insert(&mp->mod_serds, name, n, t);
 }
 
 void
 fmd_serd_destroy(fmd_hdl_t *hdl, const char *name)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	fmd_serd_eng_delete(&mp->mod_serds, name);
 
 	fmd_hdl_debug(hdl, "serd_destroy %s", name);
 }
 
 int
 fmd_serd_exists(fmd_hdl_t *hdl, const char *name)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	return (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL);
 }
 
 void
 fmd_serd_reset(fmd_hdl_t *hdl, const char *name)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	fmd_serd_eng_t *sgp;
 
 	if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
 		zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
 		return;
 	}
 
 	fmd_serd_eng_reset(sgp);
 
 	fmd_hdl_debug(hdl, "serd_reset %s", name);
 }
 
 int
 fmd_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	fmd_serd_eng_t *sgp;
 	int err;
 
 	if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
 		zed_log_msg(LOG_ERR, "failed to add record to SERD engine '%s'",
 		    name);
 		return (0);
 	}
 	err = fmd_serd_eng_record(sgp, ep->ev_hrt);
 
 	return (err);
 }
 
 /* FMD Timers */
 
 static void
 _timer_notify(union sigval sv)
 {
 	fmd_timer_t *ftp = sv.sival_ptr;
 	fmd_hdl_t *hdl = ftp->ft_hdl;
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
 	struct itimerspec its;
 
 	fmd_hdl_debug(hdl, "timer fired (%p)", ftp->ft_tid);
 
 	/* disarm the timer */
 	memset(&its, 0, sizeof (struct itimerspec));
 	timer_settime(ftp->ft_tid, 0, &its, NULL);
 
 	/* Note that the fmdo_timeout can remove this timer */
 	if (ops->fmdo_timeout != NULL)
 		ops->fmdo_timeout(hdl, ftp, ftp->ft_arg);
 }
 
 /*
  * Install a new timer which will fire at least delta nanoseconds after the
  * current time. After the timeout has expired, the module's fmdo_timeout
  * entry point is called.
  */
 fmd_timer_t *
 fmd_timer_install(fmd_hdl_t *hdl, void *arg, fmd_event_t *ep, hrtime_t delta)
 {
 	(void) ep;
 	struct sigevent sev;
 	struct itimerspec its;
 	fmd_timer_t *ftp;
 
 	ftp = fmd_hdl_alloc(hdl, sizeof (fmd_timer_t), FMD_SLEEP);
 	ftp->ft_arg = arg;
 	ftp->ft_hdl = hdl;
 
 	its.it_value.tv_sec = delta / 1000000000;
 	its.it_value.tv_nsec = delta % 1000000000;
 	its.it_interval.tv_sec = its.it_value.tv_sec;
 	its.it_interval.tv_nsec = its.it_value.tv_nsec;
 
 	sev.sigev_notify = SIGEV_THREAD;
 	sev.sigev_notify_function = _timer_notify;
 	sev.sigev_notify_attributes = NULL;
 	sev.sigev_value.sival_ptr = ftp;
 
 	timer_create(CLOCK_REALTIME, &sev, &ftp->ft_tid);
 	timer_settime(ftp->ft_tid, 0, &its, NULL);
 
 	fmd_hdl_debug(hdl, "installing timer for %d secs (%p)",
 	    (int)its.it_value.tv_sec, ftp->ft_tid);
 
 	return (ftp);
 }
 
 void
 fmd_timer_remove(fmd_hdl_t *hdl, fmd_timer_t *ftp)
 {
 	fmd_hdl_debug(hdl, "removing timer (%p)", ftp->ft_tid);
 
 	timer_delete(ftp->ft_tid);
 
 	fmd_hdl_free(hdl, ftp, sizeof (fmd_timer_t));
 }
 
 /* Name-Value Pair Lists */
 
 nvlist_t *
 fmd_nvl_create_fault(fmd_hdl_t *hdl, const char *class, uint8_t certainty,
     nvlist_t *asru, nvlist_t *fru, nvlist_t *resource)
 {
 	(void) hdl;
 	nvlist_t *nvl;
 	int err = 0;
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		zed_log_die("failed to xalloc fault nvlist");
 
 	err |= nvlist_add_uint8(nvl, FM_VERSION, FM_FAULT_VERSION);
 	err |= nvlist_add_string(nvl, FM_CLASS, class);
 	err |= nvlist_add_uint8(nvl, FM_FAULT_CERTAINTY, certainty);
 
 	if (asru != NULL)
 		err |= nvlist_add_nvlist(nvl, FM_FAULT_ASRU, asru);
 	if (fru != NULL)
 		err |= nvlist_add_nvlist(nvl, FM_FAULT_FRU, fru);
 	if (resource != NULL)
 		err |= nvlist_add_nvlist(nvl, FM_FAULT_RESOURCE, resource);
 
 	if (err)
 		zed_log_die("failed to populate nvlist: %s\n", strerror(err));
 
 	return (nvl);
 }
 
 /*
  * sourced from fmd_string.c
  */
 static int
 fmd_strmatch(const char *s, const char *p)
 {
 	char c;
 
 	if (p == NULL)
 		return (0);
 
 	if (s == NULL)
 		s = ""; /* treat NULL string as the empty string */
 
 	do {
 		if ((c = *p++) == '\0')
 			return (*s == '\0');
 
 		if (c == '*') {
 			while (*p == '*')
 				p++; /* consecutive *'s can be collapsed */
 
 			if (*p == '\0')
 				return (1);
 
 			while (*s != '\0') {
 				if (fmd_strmatch(s++, p) != 0)
 					return (1);
 			}
 
 			return (0);
 		}
 	} while (c == *s++);
 
 	return (0);
 }
 
 int
 fmd_nvl_class_match(fmd_hdl_t *hdl, nvlist_t *nvl, const char *pattern)
 {
 	(void) hdl;
 	char *class;
 
 	return (nvl != NULL &&
 	    nvlist_lookup_string(nvl, FM_CLASS, &class) == 0 &&
 	    fmd_strmatch(class, pattern));
 }
 
 nvlist_t *
 fmd_nvl_alloc(fmd_hdl_t *hdl, int flags)
 {
 	(void) hdl, (void) flags;
 	nvlist_t *nvl = NULL;
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		return (NULL);
 
 	return (nvl);
 }
 
 
 /*
  * ZED Agent specific APIs
  */
 
 fmd_hdl_t *
 fmd_module_hdl(const char *name)
 {
 	if (strcmp(name, "zfs-retire") == 0)
 		return ((fmd_hdl_t *)&zfs_retire_module);
 	if (strcmp(name, "zfs-diagnosis") == 0)
 		return ((fmd_hdl_t *)&zfs_diagnosis_module);
 
 	return (NULL);
 }
 
 boolean_t
 fmd_module_initialized(fmd_hdl_t *hdl)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 
 	return (mp->mod_info != NULL);
 }
 
 /*
  * fmd_module_recv is called for each event that is received by
  * the fault manager that has a class that matches one of the
  * module's subscriptions.
  */
 void
 fmd_module_recv(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
 {
 	fmd_module_t *mp = (fmd_module_t *)hdl;
 	const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
 	fmd_event_t faux_event = {0};
 	int64_t *tv;
 	uint_t n;
 
 	/*
 	 * Will need to normalized this if we persistently store the case data
 	 */
 	if (nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0)
 		faux_event.ev_hrt = tv[0] * NANOSEC + tv[1];
 	else
 		faux_event.ev_hrt = 0;
 
 	ops->fmdo_recv(hdl, &faux_event, nvl, class);
 
 	mp->mod_stats.ms_accepted.fmds_value.ui64++;
 
 	/* TBD - should we initiate fm_module_gc() periodically? */
 }
diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c
index 7364dd2c6286..af6de73a1cc1 100644
--- a/cmd/zed/agents/zfs_mod.c
+++ b/cmd/zed/agents/zfs_mod.c
@@ -1,1275 +1,1275 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2016, 2017, Intel Corporation.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  */
 
 /*
  * ZFS syseventd module.
  *
  * file origin: openzfs/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c
  *
  * The purpose of this module is to identify when devices are added to the
  * system, and appropriately online or replace the affected vdevs.
  *
  * When a device is added to the system:
  *
  * 	1. Search for any vdevs whose devid matches that of the newly added
  *	   device.
  *
  * 	2. If no vdevs are found, then search for any vdevs whose udev path
  *	   matches that of the new device.
  *
  *	3. If no vdevs match by either method, then ignore the event.
  *
  * 	4. Attempt to online the device with a flag to indicate that it should
  *	   be unspared when resilvering completes.  If this succeeds, then the
  *	   same device was inserted and we should continue normally.
  *
  *	5. If the pool does not have the 'autoreplace' property set, attempt to
  *	   online the device again without the unspare flag, which will
  *	   generate a FMA fault.
  *
  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
  *	   is a whole disk, then label the new disk and attempt a 'zpool
  *	   replace'.
  *
  * The module responds to EC_DEV_ADD events.  The special ESC_ZFS_VDEV_CHECK
  * event indicates that a device failed to open during pool load, but the
  * autoreplace property was set.  In this case, we deferred the associated
  * FMA fault until our module had a chance to process the autoreplace logic.
  * If the device could not be replaced, then the second online attempt will
  * trigger the FMA fault that we skipped earlier.
  *
  * On Linux udev provides a disk insert for both the disk and the partition.
  */
 
 #include <ctype.h>
 #include <fcntl.h>
 #include <libnvpair.h>
 #include <libzfs.h>
 #include <libzutil.h>
 #include <limits.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
 #include <sys/list.h>
 #include <sys/sunddi.h>
 #include <sys/sysevent/eventdefs.h>
 #include <sys/sysevent/dev.h>
 #include <thread_pool.h>
 #include <pthread.h>
 #include <unistd.h>
 #include <errno.h>
 #include "zfs_agents.h"
 #include "../zed_log.h"
 
 #define	DEV_BYID_PATH	"/dev/disk/by-id/"
 #define	DEV_BYPATH_PATH	"/dev/disk/by-path/"
 #define	DEV_BYVDEV_PATH	"/dev/disk/by-vdev/"
 
 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
 
 libzfs_handle_t *g_zfshdl;
 list_t g_pool_list;	/* list of unavailable pools at initialization */
 list_t g_device_list;	/* list of disks with asynchronous label request */
 tpool_t *g_tpool;
 boolean_t g_enumeration_done;
 pthread_t g_zfs_tid;	/* zfs_enum_pools() thread */
 
 typedef struct unavailpool {
 	zpool_handle_t	*uap_zhp;
 	list_node_t	uap_node;
 } unavailpool_t;
 
 typedef struct pendingdev {
 	char		pd_physpath[128];
 	list_node_t	pd_node;
 } pendingdev_t;
 
 static int
 zfs_toplevel_state(zpool_handle_t *zhp)
 {
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
 	unsigned int c;
 
 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 	return (vs->vs_state);
 }
 
 static int
 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
 {
 	zed_log_msg(LOG_INFO, "zfs_unavail_pool: examining '%s' (state %d)",
 	    zpool_get_name(zhp), (int)zfs_toplevel_state(zhp));
 
 	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
 		unavailpool_t *uap;
 		uap = malloc(sizeof (unavailpool_t));
 		uap->uap_zhp = zhp;
 		list_insert_tail((list_t *)data, uap);
 	} else {
 		zpool_close(zhp);
 	}
 	return (0);
 }
 
 /*
  * Two stage replace on Linux
  * since we get disk notifications
  * we can wait for partitioned disk slice to show up!
  *
  * First stage tags the disk, initiates async partitioning, and returns
  * Second stage finds the tag and proceeds to ZFS labeling/replace
  *
  * disk-add --> label-disk + tag-disk --> partition-add --> zpool_vdev_attach
  *
  * 1. physical match with no fs, no partition
  *	tag it top, partition disk
  *
  * 2. physical match again, see partition and tag
  *
  */
 
 /*
  * The device associated with the given vdev (either by devid or physical path)
  * has been added to the system.  If 'isdisk' is set, then we only attempt a
  * replacement if it's a whole disk.  This also implies that we should label the
  * disk first.
  *
  * First, we attempt to online the device (making sure to undo any spare
  * operation when finished).  If this succeeds, then we're done.  If it fails,
  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
  * but that the label was not what we expected.  If the 'autoreplace' property
  * is enabled, then we relabel the disk (if specified), and attempt a 'zpool
  * replace'.  If the online is successful, but the new state is something else
  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
  * race, and we should avoid attempting to relabel the disk.
  *
  * Also can arrive here from a ESC_ZFS_VDEV_CHECK event
  */
 static void
 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
 {
 	char *path;
 	vdev_state_t newstate;
 	nvlist_t *nvroot, *newvd;
 	pendingdev_t *device;
 	uint64_t wholedisk = 0ULL;
 	uint64_t offline = 0ULL, faulted = 0ULL;
 	uint64_t guid = 0ULL;
 	char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
 	char devpath[PATH_MAX];
 	int ret;
 	boolean_t is_sd = B_FALSE;
 	boolean_t is_mpath_wholedisk = B_FALSE;
 	uint_t c;
 	vdev_stat_t *vs;
 
 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
 		return;
 
 	/* Skip healthy disks */
 	verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 	if (vs->vs_state == VDEV_STATE_HEALTHY) {
 		zed_log_msg(LOG_INFO, "%s: %s is already healthy, skip it.",
 		    __func__, path);
 		return;
 	}
 
 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
 	    &enc_sysfs_path);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
 
 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
 
 	/*
 	 * Special case:
 	 *
 	 * We've seen times where a disk won't have a ZPOOL_CONFIG_PHYS_PATH
 	 * entry in their config. For example, on this force-faulted disk:
 	 *
 	 *	children[0]:
 	 *	   type: 'disk'
 	 *	   id: 0
 	 *	   guid: 14309659774640089719
 	 *        path: '/dev/disk/by-vdev/L28'
 	 *        whole_disk: 0
 	 *        DTL: 654
 	 *        create_txg: 4
 	 *        com.delphix:vdev_zap_leaf: 1161
 	 *        faulted: 1
 	 *        aux_state: 'external'
 	 *	children[1]:
 	 *        type: 'disk'
 	 *        id: 1
 	 *        guid: 16002508084177980912
 	 *        path: '/dev/disk/by-vdev/L29'
 	 *        devid: 'dm-uuid-mpath-35000c500a61d68a3'
 	 *        phys_path: 'L29'
 	 *        vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
 	 *        whole_disk: 0
 	 *        DTL: 1028
 	 *        create_txg: 4
 	 *        com.delphix:vdev_zap_leaf: 131
 	 *
 	 * If the disk's path is a /dev/disk/by-vdev/ path, then we can infer
 	 * the ZPOOL_CONFIG_PHYS_PATH from the by-vdev disk name.
 	 */
 	if (physpath == NULL && path != NULL) {
 		/* If path begins with "/dev/disk/by-vdev/" ... */
 		if (strncmp(path, DEV_BYVDEV_PATH,
 		    strlen(DEV_BYVDEV_PATH)) == 0) {
 			/* Set physpath to the char after "/dev/disk/by-vdev" */
 			physpath = &path[strlen(DEV_BYVDEV_PATH)];
 		}
 	}
 
 	/*
 	 * We don't want to autoreplace offlined disks.  However, we do want to
 	 * replace force-faulted disks (`zpool offline -f`).  Force-faulted
 	 * disks have both offline=1 and faulted=1 in the nvlist.
 	 */
 	if (offline && !faulted) {
 		zed_log_msg(LOG_INFO, "%s: %s is offline, skip autoreplace",
 		    __func__, path);
 		return;
 	}
 
 	is_mpath_wholedisk = is_mpath_whole_disk(path);
 	zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
 	    " %s blank disk, %s mpath blank disk, %s labeled, enc sysfs '%s', "
 	    "(guid %llu)",
 	    zpool_get_name(zhp), path,
 	    physpath ? physpath : "NULL",
 	    wholedisk ? "is" : "not",
 	    is_mpath_wholedisk? "is" : "not",
 	    labeled ? "is" : "not",
 	    enc_sysfs_path,
 	    (long long unsigned int)guid);
 
 	/*
 	 * The VDEV guid is preferred for identification (gets passed in path)
 	 */
 	if (guid != 0) {
 		(void) snprintf(fullpath, sizeof (fullpath), "%llu",
 		    (long long unsigned int)guid);
 	} else {
 		/*
 		 * otherwise use path sans partition suffix for whole disks
 		 */
 		(void) strlcpy(fullpath, path, sizeof (fullpath));
 		if (wholedisk) {
 			char *spath = zfs_strip_partition(fullpath);
 			if (!spath) {
 				zed_log_msg(LOG_INFO, "%s: Can't alloc",
 				    __func__);
 				return;
 			}
 
 			(void) strlcpy(fullpath, spath, sizeof (fullpath));
 			free(spath);
 		}
 	}
 
 	/*
 	 * Attempt to online the device.
 	 */
 	if (zpool_vdev_online(zhp, fullpath,
 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
 	    (newstate == VDEV_STATE_HEALTHY ||
 	    newstate == VDEV_STATE_DEGRADED)) {
 		zed_log_msg(LOG_INFO,
 		    "  zpool_vdev_online: vdev '%s' ('%s') is "
 		    "%s", fullpath, physpath, (newstate == VDEV_STATE_HEALTHY) ?
 		    "HEALTHY" : "DEGRADED");
 		return;
 	}
 
 	/*
 	 * vdev_id alias rule for using scsi_debug devices (FMA automated
 	 * testing)
 	 */
 	if (physpath != NULL && strcmp("scsidebug", physpath) == 0)
 		is_sd = B_TRUE;
 
 	/*
 	 * If the pool doesn't have the autoreplace property set, then use
 	 * vdev online to trigger a FMA fault by posting an ereport.
 	 */
 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
 	    !(wholedisk || is_mpath_wholedisk) || (physpath == NULL)) {
 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 		    &newstate);
 		zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
 		    "not a blank disk for '%s' ('%s')", fullpath,
 		    physpath);
 		return;
 	}
 
 	/*
 	 * Convert physical path into its current device node.  Rawpath
 	 * needs to be /dev/disk/by-vdev for a scsi_debug device since
 	 * /dev/disk/by-path will not be present.
 	 */
 	(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
 	    is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
 
 	if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
 		zed_log_msg(LOG_INFO, "  realpath: %s failed (%s)",
 		    rawpath, strerror(errno));
 
 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
 		    &newstate);
 
 		zed_log_msg(LOG_INFO, "  zpool_vdev_online: %s FORCEFAULT (%s)",
 		    fullpath, libzfs_error_description(g_zfshdl));
 		return;
 	}
 
 	/* Only autoreplace bad disks */
 	if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
 	    (vs->vs_state != VDEV_STATE_FAULTED) &&
 	    (vs->vs_state != VDEV_STATE_CANT_OPEN)) {
 		zed_log_msg(LOG_INFO, "  not autoreplacing since disk isn't in "
-		    "a bad state (currently %d)", vs->vs_state);
+		    "a bad state (currently %llu)", vs->vs_state);
 		return;
 	}
 
 	nvlist_lookup_string(vdev, "new_devid", &new_devid);
 
 	if (is_mpath_wholedisk) {
 		/* Don't label device mapper or multipath disks. */
 	} else if (!labeled) {
 		/*
 		 * we're auto-replacing a raw disk, so label it first
 		 */
 		char *leafname;
 
 		/*
 		 * If this is a request to label a whole disk, then attempt to
 		 * write out the label.  Before we can label the disk, we need
 		 * to map the physical string that was matched on to the under
 		 * lying device node.
 		 *
 		 * If any part of this process fails, then do a force online
 		 * to trigger a ZFS fault for the device (and any hot spare
 		 * replacement).
 		 */
 		leafname = strrchr(devpath, '/') + 1;
 
 		/*
 		 * If this is a request to label a whole disk, then attempt to
 		 * write out the label.
 		 */
 		if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) {
 			zed_log_msg(LOG_INFO, "  zpool_label_disk: could not "
 			    "label '%s' (%s)", leafname,
 			    libzfs_error_description(g_zfshdl));
 
 			(void) zpool_vdev_online(zhp, fullpath,
 			    ZFS_ONLINE_FORCEFAULT, &newstate);
 			return;
 		}
 
 		/*
 		 * The disk labeling is asynchronous on Linux. Just record
 		 * this label request and return as there will be another
 		 * disk add event for the partition after the labeling is
 		 * completed.
 		 */
 		device = malloc(sizeof (pendingdev_t));
 		(void) strlcpy(device->pd_physpath, physpath,
 		    sizeof (device->pd_physpath));
 		list_insert_tail(&g_device_list, device);
 
 		zed_log_msg(LOG_INFO, "  zpool_label_disk: async '%s' (%llu)",
 		    leafname, (u_longlong_t)guid);
 
 		return;	/* resumes at EC_DEV_ADD.ESC_DISK for partition */
 
 	} else /* labeled */ {
 		boolean_t found = B_FALSE;
 		/*
 		 * match up with request above to label the disk
 		 */
 		for (device = list_head(&g_device_list); device != NULL;
 		    device = list_next(&g_device_list, device)) {
 			if (strcmp(physpath, device->pd_physpath) == 0) {
 				list_remove(&g_device_list, device);
 				free(device);
 				found = B_TRUE;
 				break;
 			}
 			zed_log_msg(LOG_INFO, "zpool_label_disk: %s != %s",
 			    physpath, device->pd_physpath);
 		}
 		if (!found) {
 			/* unexpected partition slice encountered */
 			zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
 			    fullpath);
 			(void) zpool_vdev_online(zhp, fullpath,
 			    ZFS_ONLINE_FORCEFAULT, &newstate);
 			return;
 		}
 
 		zed_log_msg(LOG_INFO, "  zpool_label_disk: resume '%s' (%llu)",
 		    physpath, (u_longlong_t)guid);
 
 		(void) snprintf(devpath, sizeof (devpath), "%s%s",
 		    DEV_BYID_PATH, new_devid);
 	}
 
 	/*
 	 * Construct the root vdev to pass to zpool_vdev_attach().  While adding
 	 * the entire vdev structure is harmless, we construct a reduced set of
 	 * path/physpath/wholedisk to keep it simple.
 	 */
 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) {
 		zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory");
 		return;
 	}
 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
 		zed_log_msg(LOG_WARNING, "zfs_mod: nvlist_alloc out of memory");
 		nvlist_free(nvroot);
 		return;
 	}
 
 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
 	    nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID, new_devid) != 0 ||
 	    (physpath != NULL && nvlist_add_string(newvd,
 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
 	    (enc_sysfs_path != NULL && nvlist_add_string(newvd,
 	    ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH, enc_sysfs_path) != 0) ||
 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    (const nvlist_t **)&newvd, 1) != 0) {
 		zed_log_msg(LOG_WARNING, "zfs_mod: unable to add nvlist pairs");
 		nvlist_free(newvd);
 		nvlist_free(nvroot);
 		return;
 	}
 
 	nvlist_free(newvd);
 
 	/*
 	 * Wait for udev to verify the links exist, then auto-replace
 	 * the leaf disk at same physical location.
 	 */
 	if (zpool_label_disk_wait(path, 3000) != 0) {
 		zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
 		    "disk %s is missing", path);
 		nvlist_free(nvroot);
 		return;
 	}
 
 	/*
 	 * Prefer sequential resilvering when supported (mirrors and dRAID),
 	 * otherwise fallback to a traditional healing resilver.
 	 */
 	ret = zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE, B_TRUE);
 	if (ret != 0) {
 		ret = zpool_vdev_attach(zhp, fullpath, path, nvroot,
 		    B_TRUE, B_FALSE);
 	}
 
 	zed_log_msg(LOG_INFO, "  zpool_vdev_replace: %s with %s (%s)",
 	    fullpath, path, (ret == 0) ? "no errors" :
 	    libzfs_error_description(g_zfshdl));
 
 	nvlist_free(nvroot);
 }
 
 /*
  * Utility functions to find a vdev matching given criteria.
  */
 typedef struct dev_data {
 	const char		*dd_compare;
 	const char		*dd_prop;
 	zfs_process_func_t	dd_func;
 	boolean_t		dd_found;
 	boolean_t		dd_islabeled;
 	uint64_t		dd_pool_guid;
 	uint64_t		dd_vdev_guid;
 	const char		*dd_new_devid;
 } dev_data_t;
 
 static void
 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
 {
 	dev_data_t *dp = data;
 	char *path = NULL;
 	uint_t c, children;
 	nvlist_t **child;
 
 	/*
 	 * First iterate over any children.
 	 */
 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			zfs_iter_vdev(zhp, child[c], data);
 	}
 
 	/*
 	 * Iterate over any spares and cache devices
 	 */
 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			zfs_iter_vdev(zhp, child[c], data);
 	}
 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			zfs_iter_vdev(zhp, child[c], data);
 	}
 
 	/* once a vdev was matched and processed there is nothing left to do */
 	if (dp->dd_found)
 		return;
 
 	/*
 	 * Match by GUID if available otherwise fallback to devid or physical
 	 */
 	if (dp->dd_vdev_guid != 0) {
 		uint64_t guid;
 
 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
 		    &guid) != 0 || guid != dp->dd_vdev_guid) {
 			return;
 		}
 		zed_log_msg(LOG_INFO, "  zfs_iter_vdev: matched on %llu", guid);
 		dp->dd_found = B_TRUE;
 
 	} else if (dp->dd_compare != NULL) {
 		/*
 		 * NOTE: On Linux there is an event for partition, so unlike
 		 * illumos, substring matching is not required to accommodate
 		 * the partition suffix. An exact match will be present in
 		 * the dp->dd_compare value.
 		 */
 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
 		    strcmp(dp->dd_compare, path) != 0) {
 			zed_log_msg(LOG_INFO, "  %s: no match (%s != vdev %s)",
 			    __func__, dp->dd_compare, path);
 			return;
 		}
 
 		zed_log_msg(LOG_INFO, "  zfs_iter_vdev: matched %s on %s",
 		    dp->dd_prop, path);
 		dp->dd_found = B_TRUE;
 
 		/* pass the new devid for use by replacing code */
 		if (dp->dd_new_devid != NULL) {
 			(void) nvlist_add_string(nvl, "new_devid",
 			    dp->dd_new_devid);
 		}
 	}
 
 	(dp->dd_func)(zhp, nvl, dp->dd_islabeled);
 }
 
 static void
 zfs_enable_ds(void *arg)
 {
 	unavailpool_t *pool = (unavailpool_t *)arg;
 
 	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
 	zpool_close(pool->uap_zhp);
 	free(pool);
 }
 
 static int
 zfs_iter_pool(zpool_handle_t *zhp, void *data)
 {
 	nvlist_t *config, *nvl;
 	dev_data_t *dp = data;
 	uint64_t pool_guid;
 	unavailpool_t *pool;
 
 	zed_log_msg(LOG_INFO, "zfs_iter_pool: evaluating vdevs on %s (by %s)",
 	    zpool_get_name(zhp), dp->dd_vdev_guid ? "GUID" : dp->dd_prop);
 
 	/*
 	 * For each vdev in this pool, look for a match to apply dd_func
 	 */
 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
 		if (dp->dd_pool_guid == 0 ||
 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
 			(void) nvlist_lookup_nvlist(config,
 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
 			zfs_iter_vdev(zhp, nvl, data);
 		}
 	} else {
 		zed_log_msg(LOG_INFO, "%s: no config\n", __func__);
 	}
 
 	/*
 	 * if this pool was originally unavailable,
 	 * then enable its datasets asynchronously
 	 */
 	if (g_enumeration_done)  {
 		for (pool = list_head(&g_pool_list); pool != NULL;
 		    pool = list_next(&g_pool_list, pool)) {
 
 			if (strcmp(zpool_get_name(zhp),
 			    zpool_get_name(pool->uap_zhp)))
 				continue;
 			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
 				list_remove(&g_pool_list, pool);
 				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
 				    pool);
 				break;
 			}
 		}
 	}
 
 	zpool_close(zhp);
 	return (dp->dd_found);	/* cease iteration after a match */
 }
 
 /*
  * Given a physical device location, iterate over all
  * (pool, vdev) pairs which correspond to that location.
  */
 static boolean_t
 devphys_iter(const char *physical, const char *devid, zfs_process_func_t func,
     boolean_t is_slice)
 {
 	dev_data_t data = { 0 };
 
 	data.dd_compare = physical;
 	data.dd_func = func;
 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
 	data.dd_found = B_FALSE;
 	data.dd_islabeled = is_slice;
 	data.dd_new_devid = devid;	/* used by auto replace code */
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
 	return (data.dd_found);
 }
 
 /*
  * Given a device identifier, find any vdevs with a matching by-vdev
  * path.  Normally we shouldn't need this as the comparison would be
  * made earlier in the devphys_iter().  For example, if we were replacing
  * /dev/disk/by-vdev/L28, normally devphys_iter() would match the
  * ZPOOL_CONFIG_PHYS_PATH of "L28" from the old disk config to "L28"
  * of the new disk config.  However, we've seen cases where
  * ZPOOL_CONFIG_PHYS_PATH was not in the config for the old disk.  Here's
  * an example of a real 2-disk mirror pool where one disk was force
  * faulted:
  *
  *       com.delphix:vdev_zap_top: 129
  *           children[0]:
  *               type: 'disk'
  *               id: 0
  *               guid: 14309659774640089719
  *               path: '/dev/disk/by-vdev/L28'
  *               whole_disk: 0
  *               DTL: 654
  *               create_txg: 4
  *               com.delphix:vdev_zap_leaf: 1161
  *               faulted: 1
  *               aux_state: 'external'
  *           children[1]:
  *               type: 'disk'
  *               id: 1
  *               guid: 16002508084177980912
  *               path: '/dev/disk/by-vdev/L29'
  *               devid: 'dm-uuid-mpath-35000c500a61d68a3'
  *               phys_path: 'L29'
  *               vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
  *               whole_disk: 0
  *               DTL: 1028
  *               create_txg: 4
  *               com.delphix:vdev_zap_leaf: 131
  *
  * So in the case above, the only thing we could compare is the path.
  *
  * We can do this because we assume by-vdev paths are authoritative as physical
  * paths.  We could not assume this for normal paths like /dev/sda since the
  * physical location /dev/sda points to could change over time.
  */
 static boolean_t
 by_vdev_path_iter(const char *by_vdev_path, const char *devid,
     zfs_process_func_t func, boolean_t is_slice)
 {
 	dev_data_t data = { 0 };
 
 	data.dd_compare = by_vdev_path;
 	data.dd_func = func;
 	data.dd_prop = ZPOOL_CONFIG_PATH;
 	data.dd_found = B_FALSE;
 	data.dd_islabeled = is_slice;
 	data.dd_new_devid = devid;
 
 	if (strncmp(by_vdev_path, DEV_BYVDEV_PATH,
 	    strlen(DEV_BYVDEV_PATH)) != 0) {
 		/* by_vdev_path doesn't start with "/dev/disk/by-vdev/" */
 		return (B_FALSE);
 	}
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
 	return (data.dd_found);
 }
 
 /*
  * Given a device identifier, find any vdevs with a matching devid.
  * On Linux we can match devid directly which is always a whole disk.
  */
 static boolean_t
 devid_iter(const char *devid, zfs_process_func_t func, boolean_t is_slice)
 {
 	dev_data_t data = { 0 };
 
 	data.dd_compare = devid;
 	data.dd_func = func;
 	data.dd_prop = ZPOOL_CONFIG_DEVID;
 	data.dd_found = B_FALSE;
 	data.dd_islabeled = is_slice;
 	data.dd_new_devid = devid;
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
 	return (data.dd_found);
 }
 
 /*
  * Given a device guid, find any vdevs with a matching guid.
  */
 static boolean_t
 guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
     zfs_process_func_t func, boolean_t is_slice)
 {
 	dev_data_t data = { 0 };
 
 	data.dd_func = func;
 	data.dd_found = B_FALSE;
 	data.dd_pool_guid = pool_guid;
 	data.dd_vdev_guid = vdev_guid;
 	data.dd_islabeled = is_slice;
 	data.dd_new_devid = devid;
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
 	return (data.dd_found);
 }
 
 /*
  * Handle a EC_DEV_ADD.ESC_DISK event.
  *
  * illumos
  *	Expects: DEV_PHYS_PATH string in schema
  *	Matches: vdev's ZPOOL_CONFIG_PHYS_PATH or ZPOOL_CONFIG_DEVID
  *
  *      path: '/dev/dsk/c0t1d0s0' (persistent)
  *     devid: 'id1,sd@SATA_____Hitachi_HDS72101______JP2940HZ3H74MC/a'
  * phys_path: '/pci@0,0/pci103c,1609@11/disk@1,0:a'
  *
  * linux
  *	provides: DEV_PHYS_PATH and DEV_IDENTIFIER strings in schema
  *	Matches: vdev's ZPOOL_CONFIG_PHYS_PATH or ZPOOL_CONFIG_DEVID
  *
  *      path: '/dev/sdc1' (not persistent)
  *     devid: 'ata-SAMSUNG_HD204UI_S2HGJD2Z805891-part1'
  * phys_path: 'pci-0000:04:00.0-sas-0x4433221106000000-lun-0'
  */
 static int
 zfs_deliver_add(nvlist_t *nvl)
 {
 	char *devpath = NULL, *devid = NULL;
 	uint64_t pool_guid = 0, vdev_guid = 0;
 	boolean_t is_slice;
 
 	/*
 	 * Expecting a devid string and an optional physical location and guid
 	 */
 	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0) {
 		zed_log_msg(LOG_INFO, "%s: no dev identifier\n", __func__);
 		return (-1);
 	}
 
 	(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
 	(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
 	(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
 
 	is_slice = (nvlist_lookup_boolean(nvl, DEV_IS_PART) == 0);
 
 	zed_log_msg(LOG_INFO, "zfs_deliver_add: adding %s (%s) (is_slice %d)",
 	    devid, devpath ? devpath : "NULL", is_slice);
 
 	/*
 	 * Iterate over all vdevs looking for a match in the following order:
 	 * 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
 	 * 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
 	 * 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
 	 * 4. ZPOOL_CONFIG_PATH for /dev/disk/by-vdev devices only (since
 	 *    by-vdev paths represent physical paths).
 	 */
 	if (devid_iter(devid, zfs_process_add, is_slice))
 		return (0);
 	if (devpath != NULL && devphys_iter(devpath, devid, zfs_process_add,
 	    is_slice))
 		return (0);
 	if (vdev_guid != 0)
 		(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
 		    is_slice);
 
 	if (devpath != NULL) {
 		/* Can we match a /dev/disk/by-vdev/ path? */
 		char by_vdev_path[MAXPATHLEN];
 		snprintf(by_vdev_path, sizeof (by_vdev_path),
 		    "/dev/disk/by-vdev/%s", devpath);
 		if (by_vdev_path_iter(by_vdev_path, devid, zfs_process_add,
 		    is_slice))
 			return (0);
 	}
 
 	return (0);
 }
 
 /*
  * Called when we receive a VDEV_CHECK event, which indicates a device could not
  * be opened during initial pool open, but the autoreplace property was set on
  * the pool.  In this case, we treat it as if it were an add event.
  */
 static int
 zfs_deliver_check(nvlist_t *nvl)
 {
 	dev_data_t data = { 0 };
 
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
 	    &data.dd_pool_guid) != 0 ||
 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
 	    &data.dd_vdev_guid) != 0 ||
 	    data.dd_vdev_guid == 0)
 		return (0);
 
 	zed_log_msg(LOG_INFO, "zfs_deliver_check: pool '%llu', vdev %llu",
 	    data.dd_pool_guid, data.dd_vdev_guid);
 
 	data.dd_func = zfs_process_add;
 
 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
 
 	return (0);
 }
 
 /*
  * Given a path to a vdev, lookup the vdev's physical size from its
  * config nvlist.
  *
  * Returns the vdev's physical size in bytes on success, 0 on error.
  */
 static uint64_t
 vdev_size_from_config(zpool_handle_t *zhp, const char *vdev_path)
 {
 	nvlist_t *nvl = NULL;
 	boolean_t avail_spare, l2cache, log;
 	vdev_stat_t *vs = NULL;
 	uint_t c;
 
 	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
 	if (!nvl)
 		return (0);
 
 	verify(nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 	if (!vs) {
 		zed_log_msg(LOG_INFO, "%s: no nvlist for '%s'", __func__,
 		    vdev_path);
 		return (0);
 	}
 
 	return (vs->vs_pspace);
 }
 
 /*
  * Given a path to a vdev, lookup if the vdev is a "whole disk" in the
  * config nvlist.  "whole disk" means that ZFS was passed a whole disk
  * at pool creation time, which it partitioned up and has full control over.
  * Thus a partition with wholedisk=1 set tells us that zfs created the
  * partition at creation time.  A partition without whole disk set would have
  * been created by externally (like with fdisk) and passed to ZFS.
  *
  * Returns the whole disk value (either 0 or 1).
  */
 static uint64_t
 vdev_whole_disk_from_config(zpool_handle_t *zhp, const char *vdev_path)
 {
 	nvlist_t *nvl = NULL;
 	boolean_t avail_spare, l2cache, log;
 	uint64_t wholedisk;
 
 	nvl = zpool_find_vdev(zhp, vdev_path, &avail_spare, &l2cache, &log);
 	if (!nvl)
 		return (0);
 
 	verify(nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_WHOLE_DISK,
 	    &wholedisk) == 0);
 
 	return (wholedisk);
 }
 
 /*
  * If the device size grew more than 1% then return true.
  */
 #define	DEVICE_GREW(oldsize, newsize) \
 		    ((newsize > oldsize) && \
 		    ((newsize / (newsize - oldsize)) <= 100))
 
 static int
 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
 {
 	boolean_t avail_spare, l2cache;
 	nvlist_t *udev_nvl = data;
 	nvlist_t *tgt;
 	int error;
 
 	char *tmp_devname, devname[MAXPATHLEN];
 	uint64_t guid;
 
 	if (nvlist_lookup_uint64(udev_nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
 		sprintf(devname, "%llu", (u_longlong_t)guid);
 	} else if (nvlist_lookup_string(udev_nvl, DEV_PHYS_PATH,
 	    &tmp_devname) == 0) {
 		strlcpy(devname, tmp_devname, MAXPATHLEN);
 		zfs_append_partition(devname, MAXPATHLEN);
 	} else {
 		zed_log_msg(LOG_INFO, "%s: no guid or physpath", __func__);
 	}
 
 	zed_log_msg(LOG_INFO, "zfsdle_vdev_online: searching for '%s' in '%s'",
 	    devname, zpool_get_name(zhp));
 
 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
 	    &avail_spare, &l2cache, NULL)) != NULL) {
 		char *path, fullpath[MAXPATHLEN];
 		uint64_t wholedisk;
 
 		error = nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &path);
 		if (error) {
 			zpool_close(zhp);
 			return (0);
 		}
 
 		error = nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
 		    &wholedisk);
 		if (error)
 			wholedisk = 0;
 
 		if (wholedisk) {
 			path = strrchr(path, '/');
 			if (path != NULL) {
 				path = zfs_strip_partition(path + 1);
 				if (path == NULL) {
 					zpool_close(zhp);
 					return (0);
 				}
 			} else {
 				zpool_close(zhp);
 				return (0);
 			}
 
 			(void) strlcpy(fullpath, path, sizeof (fullpath));
 			free(path);
 
 			/*
 			 * We need to reopen the pool associated with this
 			 * device so that the kernel can update the size of
 			 * the expanded device.  When expanding there is no
 			 * need to restart the scrub from the beginning.
 			 */
 			boolean_t scrub_restart = B_FALSE;
 			(void) zpool_reopen_one(zhp, &scrub_restart);
 		} else {
 			(void) strlcpy(fullpath, path, sizeof (fullpath));
 		}
 
 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
 			vdev_state_t newstate;
 
 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
 				/*
 				 * If this disk size has not changed, then
 				 * there's no need to do an autoexpand.  To
 				 * check we look at the disk's size in its
 				 * config, and compare it to the disk size
 				 * that udev is reporting.
 				 */
 				uint64_t udev_size = 0, conf_size = 0,
 				    wholedisk = 0, udev_parent_size = 0;
 
 				/*
 				 * Get the size of our disk that udev is
 				 * reporting.
 				 */
 				if (nvlist_lookup_uint64(udev_nvl, DEV_SIZE,
 				    &udev_size) != 0) {
 					udev_size = 0;
 				}
 
 				/*
 				 * Get the size of our disk's parent device
 				 * from udev (where sda1's parent is sda).
 				 */
 				if (nvlist_lookup_uint64(udev_nvl,
 				    DEV_PARENT_SIZE, &udev_parent_size) != 0) {
 					udev_parent_size = 0;
 				}
 
 				conf_size = vdev_size_from_config(zhp,
 				    fullpath);
 
 				wholedisk = vdev_whole_disk_from_config(zhp,
 				    fullpath);
 
 				/*
 				 * Only attempt an autoexpand if the vdev size
 				 * changed.  There are two different cases
 				 * to consider.
 				 *
 				 * 1. wholedisk=1
 				 * If you do a 'zpool create' on a whole disk
 				 * (like /dev/sda), then zfs will create
 				 * partitions on the disk (like /dev/sda1).  In
 				 * that case, wholedisk=1 will be set in the
 				 * partition's nvlist config.  So zed will need
 				 * to see if your parent device (/dev/sda)
 				 * expanded in size, and if so, then attempt
 				 * the autoexpand.
 				 *
 				 * 2. wholedisk=0
 				 * If you do a 'zpool create' on an existing
 				 * partition, or a device that doesn't allow
 				 * partitions, then wholedisk=0, and you will
 				 * simply need to check if the device itself
 				 * expanded in size.
 				 */
 				if (DEVICE_GREW(conf_size, udev_size) ||
 				    (wholedisk && DEVICE_GREW(conf_size,
 				    udev_parent_size))) {
 					error = zpool_vdev_online(zhp, fullpath,
 					    0, &newstate);
 
 					zed_log_msg(LOG_INFO,
 					    "%s: autoexpanding '%s' from %llu"
 					    " to %llu bytes in pool '%s': %d",
 					    __func__, fullpath, conf_size,
 					    MAX(udev_size, udev_parent_size),
 					    zpool_get_name(zhp), error);
 				}
 			}
 		}
 		zpool_close(zhp);
 		return (1);
 	}
 	zpool_close(zhp);
 	return (0);
 }
 
 /*
  * This function handles the ESC_DEV_DLE device change event.  Use the
  * provided vdev guid when looking up a disk or partition, when the guid
  * is not present assume the entire disk is owned by ZFS and append the
  * expected -part1 partition information then lookup by physical path.
  */
 static int
 zfs_deliver_dle(nvlist_t *nvl)
 {
 	char *devname, name[MAXPATHLEN];
 	uint64_t guid;
 
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &guid) == 0) {
 		sprintf(name, "%llu", (u_longlong_t)guid);
 	} else if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) == 0) {
 		strlcpy(name, devname, MAXPATHLEN);
 		zfs_append_partition(name, MAXPATHLEN);
 	} else {
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: no guid or physpath");
 	}
 
 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, nvl) != 1) {
 		zed_log_msg(LOG_INFO, "zfs_deliver_dle: device '%s' not "
 		    "found", name);
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * syseventd daemon module event handler
  *
  * Handles syseventd daemon zfs device related events:
  *
  *	EC_DEV_ADD.ESC_DISK
  *	EC_DEV_STATUS.ESC_DEV_DLE
  *	EC_ZFS.ESC_ZFS_VDEV_CHECK
  *
  * Note: assumes only one thread active at a time (not thread safe)
  */
 static int
 zfs_slm_deliver_event(const char *class, const char *subclass, nvlist_t *nvl)
 {
 	int ret;
 	boolean_t is_check = B_FALSE, is_dle = B_FALSE;
 
 	if (strcmp(class, EC_DEV_ADD) == 0) {
 		/*
 		 * We're mainly interested in disk additions, but we also listen
 		 * for new loop devices, to allow for simplified testing.
 		 */
 		if (strcmp(subclass, ESC_DISK) != 0 &&
 		    strcmp(subclass, ESC_LOFI) != 0)
 			return (0);
 
 		is_check = B_FALSE;
 	} else if (strcmp(class, EC_ZFS) == 0 &&
 	    strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
 		/*
 		 * This event signifies that a device failed to open
 		 * during pool load, but the 'autoreplace' property was
 		 * set, so we should pretend it's just been added.
 		 */
 		is_check = B_TRUE;
 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
 		is_dle = B_TRUE;
 	} else {
 		return (0);
 	}
 
 	if (is_dle)
 		ret = zfs_deliver_dle(nvl);
 	else if (is_check)
 		ret = zfs_deliver_check(nvl);
 	else
 		ret = zfs_deliver_add(nvl);
 
 	return (ret);
 }
 
 static void *
 zfs_enum_pools(void *arg)
 {
 	(void) arg;
 
 	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
 	/*
 	 * Linux - instead of using a thread pool, each list entry
 	 * will spawn a thread when an unavailable pool transitions
 	 * to available. zfs_slm_fini will wait for these threads.
 	 */
 	g_enumeration_done = B_TRUE;
 	return (NULL);
 }
 
 /*
  * called from zed daemon at startup
  *
  * sent messages from zevents or udev monitor
  *
  * For now, each agent has its own libzfs instance
  */
 int
 zfs_slm_init(void)
 {
 	if ((g_zfshdl = libzfs_init()) == NULL)
 		return (-1);
 
 	/*
 	 * collect a list of unavailable pools (asynchronously,
 	 * since this can take a while)
 	 */
 	list_create(&g_pool_list, sizeof (struct unavailpool),
 	    offsetof(struct unavailpool, uap_node));
 
 	if (pthread_create(&g_zfs_tid, NULL, zfs_enum_pools, NULL) != 0) {
 		list_destroy(&g_pool_list);
 		libzfs_fini(g_zfshdl);
 		return (-1);
 	}
 
 	pthread_setname_np(g_zfs_tid, "enum-pools");
 	list_create(&g_device_list, sizeof (struct pendingdev),
 	    offsetof(struct pendingdev, pd_node));
 
 	return (0);
 }
 
 void
 zfs_slm_fini(void)
 {
 	unavailpool_t *pool;
 	pendingdev_t *device;
 
 	/* wait for zfs_enum_pools thread to complete */
 	(void) pthread_join(g_zfs_tid, NULL);
 	/* destroy the thread pool */
 	if (g_tpool != NULL) {
 		tpool_wait(g_tpool);
 		tpool_destroy(g_tpool);
 	}
 
 	while ((pool = (list_head(&g_pool_list))) != NULL) {
 		list_remove(&g_pool_list, pool);
 		zpool_close(pool->uap_zhp);
 		free(pool);
 	}
 	list_destroy(&g_pool_list);
 
 	while ((device = (list_head(&g_device_list))) != NULL) {
 		list_remove(&g_device_list, device);
 		free(device);
 	}
 	list_destroy(&g_device_list);
 
 	libzfs_fini(g_zfshdl);
 }
 
 void
 zfs_slm_event(const char *class, const char *subclass, nvlist_t *nvl)
 {
 	zed_log_msg(LOG_INFO, "zfs_slm_event: %s.%s", class, subclass);
 	(void) zfs_slm_deliver_event(class, subclass, nvl);
 }
diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c
index 9a39d1a80985..29de27c77c34 100644
--- a/cmd/zed/zed_conf.c
+++ b/cmd/zed/zed_conf.c
@@ -1,720 +1,720 @@
 /*
  * This file is part of the ZFS Event Daemon (ZED).
  *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
  * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
  * You can obtain a copy of the license from the top-level file
  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
  * You may not use this file except in compliance with the license.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <unistd.h>
 #include "zed.h"
 #include "zed_conf.h"
 #include "zed_file.h"
 #include "zed_log.h"
 #include "zed_strings.h"
 
 /*
  * Initialise the configuration with default values.
  */
 void
 zed_conf_init(struct zed_conf *zcp)
 {
 	memset(zcp, 0, sizeof (*zcp));
 
 	/* zcp->zfs_hdl opened in zed_event_init() */
 	/* zcp->zedlets created in zed_conf_scan_dir() */
 
 	zcp->pid_fd = -1;		/* opened in zed_conf_write_pid() */
 	zcp->state_fd = -1;		/* opened in zed_conf_open_state() */
 	zcp->zevent_fd = -1;		/* opened in zed_event_init() */
 
 	zcp->max_jobs = 16;
 	zcp->max_zevent_buf_len = 1 << 20;
 
 	if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
 	    !(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
 	    !(zcp->state_file = strdup(ZED_STATE_FILE)))
 		zed_log_die("Failed to create conf: %s", strerror(errno));
 }
 
 /*
  * Destroy the configuration [zcp].
  *
  * Note: zfs_hdl & zevent_fd are destroyed via zed_event_fini().
  */
 void
 zed_conf_destroy(struct zed_conf *zcp)
 {
 	if (zcp->state_fd >= 0) {
 		if (close(zcp->state_fd) < 0)
 			zed_log_msg(LOG_WARNING,
 			    "Failed to close state file \"%s\": %s",
 			    zcp->state_file, strerror(errno));
 		zcp->state_fd = -1;
 	}
 	if (zcp->pid_file) {
 		if ((unlink(zcp->pid_file) < 0) && (errno != ENOENT))
 			zed_log_msg(LOG_WARNING,
 			    "Failed to remove PID file \"%s\": %s",
 			    zcp->pid_file, strerror(errno));
 	}
 	if (zcp->pid_fd >= 0) {
 		if (close(zcp->pid_fd) < 0)
 			zed_log_msg(LOG_WARNING,
 			    "Failed to close PID file \"%s\": %s",
 			    zcp->pid_file, strerror(errno));
 		zcp->pid_fd = -1;
 	}
 	if (zcp->pid_file) {
 		free(zcp->pid_file);
 		zcp->pid_file = NULL;
 	}
 	if (zcp->zedlet_dir) {
 		free(zcp->zedlet_dir);
 		zcp->zedlet_dir = NULL;
 	}
 	if (zcp->state_file) {
 		free(zcp->state_file);
 		zcp->state_file = NULL;
 	}
 	if (zcp->zedlets) {
 		zed_strings_destroy(zcp->zedlets);
 		zcp->zedlets = NULL;
 	}
 }
 
 /*
  * Display command-line help and exit.
  *
  * If [got_err] is 0, output to stdout and exit normally;
  * otherwise, output to stderr and exit with a failure status.
  */
 static void
 _zed_conf_display_help(const char *prog, boolean_t got_err)
 {
 	struct opt { const char *o, *d, *v; };
 
 	FILE *fp = got_err ? stderr : stdout;
 
 	struct opt *oo;
 	struct opt iopts[] = {
 		{ .o = "-h", .d = "Display help" },
 		{ .o = "-L", .d = "Display license information" },
 		{ .o = "-V", .d = "Display version information" },
 		{},
 	};
 	struct opt nopts[] = {
 		{ .o = "-v", .d = "Be verbose" },
 		{ .o = "-f", .d = "Force daemon to run" },
 		{ .o = "-F", .d = "Run daemon in the foreground" },
 		{ .o = "-I",
 		    .d = "Idle daemon until kernel module is (re)loaded" },
 		{ .o = "-M", .d = "Lock all pages in memory" },
 		{ .o = "-P", .d = "$PATH for ZED to use (only used by ZTS)" },
 		{ .o = "-Z", .d = "Zero state file" },
 		{},
 	};
 	struct opt vopts[] = {
 		{ .o = "-d DIR", .d = "Read enabled ZEDLETs from DIR.",
 		    .v = ZED_ZEDLET_DIR },
 		{ .o = "-p FILE", .d = "Write daemon's PID to FILE.",
 		    .v = ZED_PID_FILE },
 		{ .o = "-s FILE", .d = "Write daemon's state to FILE.",
 		    .v = ZED_STATE_FILE },
 		{ .o = "-j JOBS", .d = "Start at most JOBS at once.",
 		    .v = "16" },
 		{ .o = "-b LEN", .d = "Cap kernel event buffer at LEN entries.",
 		    .v = "1048576" },
 		{},
 	};
 
 	fprintf(fp, "Usage: %s [OPTION]...\n", (prog ? prog : "zed"));
 	fprintf(fp, "\n");
 	for (oo = iopts; oo->o; ++oo)
 		fprintf(fp, "    %*s %s\n", -8, oo->o, oo->d);
 	fprintf(fp, "\n");
 	for (oo = nopts; oo->o; ++oo)
 		fprintf(fp, "    %*s %s\n", -8, oo->o, oo->d);
 	fprintf(fp, "\n");
 	for (oo = vopts; oo->o; ++oo)
 		fprintf(fp, "    %*s %s [%s]\n", -8, oo->o, oo->d, oo->v);
 	fprintf(fp, "\n");
 
 	exit(got_err ? EXIT_FAILURE : EXIT_SUCCESS);
 }
 
 /*
  * Display license information to stdout and exit.
  */
 static void
 _zed_conf_display_license(void)
 {
 	printf(
 	    "The ZFS Event Daemon (ZED) is distributed under the terms of the\n"
 	    "  Common Development and Distribution License (CDDL-1.0)\n"
 	    "  <http://opensource.org/licenses/CDDL-1.0>.\n"
 	    "\n"
 	    "Developed at Lawrence Livermore National Laboratory"
 	    " (LLNL-CODE-403049).\n"
 	    "\n");
 
 	exit(EXIT_SUCCESS);
 }
 
 /*
  * Display version information to stdout and exit.
  */
 static void
 _zed_conf_display_version(void)
 {
 	printf("%s-%s-%s\n",
 	    ZFS_META_NAME, ZFS_META_VERSION, ZFS_META_RELEASE);
 
 	exit(EXIT_SUCCESS);
 }
 
 /*
  * Copy the [path] string to the [resultp] ptr.
  * If [path] is not an absolute path, prefix it with the current working dir.
  * If [resultp] is non-null, free its existing string before assignment.
  */
 static void
 _zed_conf_parse_path(char **resultp, const char *path)
 {
 	char buf[PATH_MAX];
 
 	assert(resultp != NULL);
 	assert(path != NULL);
 
 	if (*resultp)
 		free(*resultp);
 
 	if (path[0] == '/') {
 		*resultp = strdup(path);
 	} else {
 		if (!getcwd(buf, sizeof (buf)))
 			zed_log_die("Failed to get current working dir: %s",
 			    strerror(errno));
 
 		if (strlcat(buf, "/", sizeof (buf)) >= sizeof (buf) ||
 		    strlcat(buf, path, sizeof (buf)) >= sizeof (buf))
 			zed_log_die("Failed to copy path: %s",
 			    strerror(ENAMETOOLONG));
 
 		*resultp = strdup(buf);
 	}
 
 	if (!*resultp)
 		zed_log_die("Failed to copy path: %s", strerror(ENOMEM));
 }
 
 /*
  * Parse the command-line options into the configuration [zcp].
  */
 void
 zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
 {
 	const char * const opts = ":hLVd:p:P:s:vfFMZIj:b:";
 	int opt;
 	unsigned long raw;
 
 	if (!zcp || !argv || !argv[0])
 		zed_log_die("Failed to parse options: Internal error");
 
 	opterr = 0;			/* suppress default getopt err msgs */
 
 	while ((opt = getopt(argc, argv, opts)) != -1) {
 		switch (opt) {
 		case 'h':
 			_zed_conf_display_help(argv[0], B_FALSE);
 			break;
 		case 'L':
 			_zed_conf_display_license();
 			break;
 		case 'V':
 			_zed_conf_display_version();
 			break;
 		case 'd':
 			_zed_conf_parse_path(&zcp->zedlet_dir, optarg);
 			break;
 		case 'I':
 			zcp->do_idle = 1;
 			break;
 		case 'p':
 			_zed_conf_parse_path(&zcp->pid_file, optarg);
 			break;
 		case 'P':
 			_zed_conf_parse_path(&zcp->path, optarg);
 			break;
 		case 's':
 			_zed_conf_parse_path(&zcp->state_file, optarg);
 			break;
 		case 'v':
 			zcp->do_verbose = 1;
 			break;
 		case 'f':
 			zcp->do_force = 1;
 			break;
 		case 'F':
 			zcp->do_foreground = 1;
 			break;
 		case 'M':
 			zcp->do_memlock = 1;
 			break;
 		case 'Z':
 			zcp->do_zero = 1;
 			break;
 		case 'j':
 			errno = 0;
 			raw = strtoul(optarg, NULL, 0);
 			if (errno == ERANGE || raw > INT16_MAX) {
 				zed_log_die("%lu is too many jobs", raw);
 			} if (raw == 0) {
 				zed_log_die("0 jobs makes no sense");
 			} else {
 				zcp->max_jobs = raw;
 			}
 			break;
 		case 'b':
 			errno = 0;
 			raw = strtoul(optarg, NULL, 0);
 			if (errno == ERANGE || raw > INT32_MAX) {
 				zed_log_die("%lu is too large", raw);
 			} if (raw == 0) {
 				zcp->max_zevent_buf_len = INT32_MAX;
 			} else {
 				zcp->max_zevent_buf_len = raw;
 			}
 			break;
 		case '?':
 		default:
 			if (optopt == '?')
 				_zed_conf_display_help(argv[0], B_FALSE);
 
 			fprintf(stderr, "%s: Invalid option '-%c'\n\n",
 			    argv[0], optopt);
 			_zed_conf_display_help(argv[0], B_TRUE);
 			break;
 		}
 	}
 }
 
 /*
  * Scan the [zcp] zedlet_dir for files to exec based on the event class.
  * Files must be executable by user, but not writable by group or other.
  * Dotfiles are ignored.
  *
  * Return 0 on success with an updated set of zedlets,
  * or -1 on error with errno set.
  */
 int
 zed_conf_scan_dir(struct zed_conf *zcp)
 {
 	zed_strings_t *zedlets;
 	DIR *dirp;
 	struct dirent *direntp;
 	char pathname[PATH_MAX];
 	struct stat st;
 	int n;
 
 	if (!zcp) {
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR, "Failed to scan zedlet dir: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	zedlets = zed_strings_create();
 	if (!zedlets) {
 		errno = ENOMEM;
 		zed_log_msg(LOG_WARNING, "Failed to scan dir \"%s\": %s",
 		    zcp->zedlet_dir, strerror(errno));
 		return (-1);
 	}
 	dirp = opendir(zcp->zedlet_dir);
 	if (!dirp) {
 		int errno_bak = errno;
 		zed_log_msg(LOG_WARNING, "Failed to open dir \"%s\": %s",
 		    zcp->zedlet_dir, strerror(errno));
 		zed_strings_destroy(zedlets);
 		errno = errno_bak;
 		return (-1);
 	}
 	while ((direntp = readdir(dirp))) {
 		if (direntp->d_name[0] == '.')
 			continue;
 
 		n = snprintf(pathname, sizeof (pathname),
 		    "%s/%s", zcp->zedlet_dir, direntp->d_name);
 		if ((n < 0) || (n >= sizeof (pathname))) {
 			zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s",
 			    direntp->d_name, strerror(ENAMETOOLONG));
 			continue;
 		}
 		if (stat(pathname, &st) < 0) {
 			zed_log_msg(LOG_WARNING, "Failed to stat \"%s\": %s",
 			    pathname, strerror(errno));
 			continue;
 		}
 		if (!S_ISREG(st.st_mode)) {
 			zed_log_msg(LOG_INFO,
 			    "Ignoring \"%s\": not a regular file",
 			    direntp->d_name);
 			continue;
 		}
 		if ((st.st_uid != 0) && !zcp->do_force) {
 			zed_log_msg(LOG_NOTICE,
 			    "Ignoring \"%s\": not owned by root",
 			    direntp->d_name);
 			continue;
 		}
 		if (!(st.st_mode & S_IXUSR)) {
 			zed_log_msg(LOG_INFO,
 			    "Ignoring \"%s\": not executable by user",
 			    direntp->d_name);
 			continue;
 		}
 		if ((st.st_mode & S_IWGRP) && !zcp->do_force) {
 			zed_log_msg(LOG_NOTICE,
 			    "Ignoring \"%s\": writable by group",
 			    direntp->d_name);
 			continue;
 		}
 		if ((st.st_mode & S_IWOTH) && !zcp->do_force) {
 			zed_log_msg(LOG_NOTICE,
 			    "Ignoring \"%s\": writable by other",
 			    direntp->d_name);
 			continue;
 		}
 		if (zed_strings_add(zedlets, NULL, direntp->d_name) < 0) {
 			zed_log_msg(LOG_WARNING,
 			    "Failed to register \"%s\": %s",
 			    direntp->d_name, strerror(errno));
 			continue;
 		}
 		if (zcp->do_verbose)
 			zed_log_msg(LOG_INFO,
 			    "Registered zedlet \"%s\"", direntp->d_name);
 	}
 	if (closedir(dirp) < 0) {
 		int errno_bak = errno;
 		zed_log_msg(LOG_WARNING, "Failed to close dir \"%s\": %s",
 		    zcp->zedlet_dir, strerror(errno));
 		zed_strings_destroy(zedlets);
 		errno = errno_bak;
 		return (-1);
 	}
 	if (zcp->zedlets)
 		zed_strings_destroy(zcp->zedlets);
 
 	zcp->zedlets = zedlets;
 	return (0);
 }
 
 /*
  * Write the PID file specified in [zcp].
  * Return 0 on success, -1 on error.
  *
  * This must be called after fork()ing to become a daemon (so the correct PID
  * is recorded), but before daemonization is complete and the parent process
  * exits (for synchronization with systemd).
  */
 int
 zed_conf_write_pid(struct zed_conf *zcp)
 {
 	char buf[PATH_MAX];
 	int n;
 	char *p;
 	mode_t mask;
 	int rv;
 
 	if (!zcp || !zcp->pid_file) {
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR, "Failed to create PID file: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	assert(zcp->pid_fd == -1);
 	/*
 	 * Create PID file directory if needed.
 	 */
 	n = strlcpy(buf, zcp->pid_file, sizeof (buf));
 	if (n >= sizeof (buf)) {
 		errno = ENAMETOOLONG;
 		zed_log_msg(LOG_ERR, "Failed to create PID file: %s",
 		    strerror(errno));
 		goto err;
 	}
 	p = strrchr(buf, '/');
 	if (p)
 		*p = '\0';
 
 	if ((mkdirp(buf, 0755) < 0) && (errno != EEXIST)) {
 		zed_log_msg(LOG_ERR, "Failed to create directory \"%s\": %s",
 		    buf, strerror(errno));
 		goto err;
 	}
 	/*
 	 * Obtain PID file lock.
 	 */
 	mask = umask(0);
 	umask(mask | 022);
 	zcp->pid_fd = open(zcp->pid_file, O_RDWR | O_CREAT | O_CLOEXEC, 0644);
 	umask(mask);
 	if (zcp->pid_fd < 0) {
 		zed_log_msg(LOG_ERR, "Failed to open PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 		goto err;
 	}
 	rv = zed_file_lock(zcp->pid_fd);
 	if (rv < 0) {
 		zed_log_msg(LOG_ERR, "Failed to lock PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 		goto err;
 	} else if (rv > 0) {
 		pid_t pid = zed_file_is_locked(zcp->pid_fd);
 		if (pid < 0) {
 			zed_log_msg(LOG_ERR,
 			    "Failed to test lock on PID file \"%s\"",
 			    zcp->pid_file);
 		} else if (pid > 0) {
 			zed_log_msg(LOG_ERR,
 			    "Found PID %d bound to PID file \"%s\"",
 			    pid, zcp->pid_file);
 		} else {
 			zed_log_msg(LOG_ERR,
 			    "Inconsistent lock state on PID file \"%s\"",
 			    zcp->pid_file);
 		}
 		goto err;
 	}
 	/*
 	 * Write PID file.
 	 */
 	n = snprintf(buf, sizeof (buf), "%d\n", (int)getpid());
 	if ((n < 0) || (n >= sizeof (buf))) {
 		errno = ERANGE;
 		zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 	} else if (write(zcp->pid_fd, buf, n) != n) {
 		zed_log_msg(LOG_ERR, "Failed to write PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 	} else if (fdatasync(zcp->pid_fd) < 0) {
 		zed_log_msg(LOG_ERR, "Failed to sync PID file \"%s\": %s",
 		    zcp->pid_file, strerror(errno));
 	} else {
 		return (0);
 	}
 
 err:
 	if (zcp->pid_fd >= 0) {
 		(void) close(zcp->pid_fd);
 		zcp->pid_fd = -1;
 	}
 	return (-1);
 }
 
 /*
  * Open and lock the [zcp] state_file.
  * Return 0 on success, -1 on error.
  *
  * FIXME: Move state information into kernel.
  */
 int
 zed_conf_open_state(struct zed_conf *zcp)
 {
 	char dirbuf[PATH_MAX];
 	int n;
 	char *p;
 	int rv;
 
 	if (!zcp || !zcp->state_file) {
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR, "Failed to open state file: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	n = strlcpy(dirbuf, zcp->state_file, sizeof (dirbuf));
 	if (n >= sizeof (dirbuf)) {
 		errno = ENAMETOOLONG;
 		zed_log_msg(LOG_WARNING, "Failed to open state file: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	p = strrchr(dirbuf, '/');
 	if (p)
 		*p = '\0';
 
 	if ((mkdirp(dirbuf, 0755) < 0) && (errno != EEXIST)) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to create directory \"%s\": %s",
 		    dirbuf, strerror(errno));
 		return (-1);
 	}
 	if (zcp->state_fd >= 0) {
 		if (close(zcp->state_fd) < 0) {
 			zed_log_msg(LOG_WARNING,
 			    "Failed to close state file \"%s\": %s",
 			    zcp->state_file, strerror(errno));
 			return (-1);
 		}
 	}
 	if (zcp->do_zero)
 		(void) unlink(zcp->state_file);
 
 	zcp->state_fd = open(zcp->state_file,
 	    O_RDWR | O_CREAT | O_CLOEXEC, 0644);
 	if (zcp->state_fd < 0) {
 		zed_log_msg(LOG_WARNING, "Failed to open state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));
 		return (-1);
 	}
 	rv = zed_file_lock(zcp->state_fd);
 	if (rv < 0) {
 		zed_log_msg(LOG_WARNING, "Failed to lock state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));
 		return (-1);
 	}
 	if (rv > 0) {
 		pid_t pid = zed_file_is_locked(zcp->state_fd);
 		if (pid < 0) {
 			zed_log_msg(LOG_WARNING,
 			    "Failed to test lock on state file \"%s\"",
 			    zcp->state_file);
 		} else if (pid > 0) {
 			zed_log_msg(LOG_WARNING,
 			    "Found PID %d bound to state file \"%s\"",
 			    pid, zcp->state_file);
 		} else {
 			zed_log_msg(LOG_WARNING,
 			    "Inconsistent lock state on state file \"%s\"",
 			    zcp->state_file);
 		}
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * Read the opened [zcp] state_file to obtain the eid & etime of the last event
  * processed.  Write the state from the last event to the [eidp] & [etime] args
  * passed by reference.  Note that etime[] is an array of size 2.
  * Return 0 on success, -1 on error.
  */
 int
 zed_conf_read_state(struct zed_conf *zcp, uint64_t *eidp, int64_t etime[])
 {
 	ssize_t len;
 	struct iovec iov[3];
 	ssize_t n;
 
 	if (!zcp || !eidp || !etime) {
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR,
 		    "Failed to read state file: %s", strerror(errno));
 		return (-1);
 	}
 	if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t)-1) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to reposition state file offset: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	len = 0;
 	iov[0].iov_base = eidp;
 	len += iov[0].iov_len = sizeof (*eidp);
 	iov[1].iov_base = &etime[0];
 	len += iov[1].iov_len = sizeof (etime[0]);
 	iov[2].iov_base = &etime[1];
 	len += iov[2].iov_len = sizeof (etime[1]);
 
 	n = readv(zcp->state_fd, iov, 3);
 	if (n == 0) {
 		*eidp = 0;
 	} else if (n < 0) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to read state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));
 		return (-1);
 	} else if (n != len) {
 		errno = EIO;
 		zed_log_msg(LOG_WARNING,
-		    "Failed to read state file \"%s\": Read %d of %d bytes",
+		    "Failed to read state file \"%s\": Read %zd of %zd bytes",
 		    zcp->state_file, n, len);
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * Write the [eid] & [etime] of the last processed event to the opened
  * [zcp] state_file.  Note that etime[] is an array of size 2.
  * Return 0 on success, -1 on error.
  */
 int
 zed_conf_write_state(struct zed_conf *zcp, uint64_t eid, int64_t etime[])
 {
 	ssize_t len;
 	struct iovec iov[3];
 	ssize_t n;
 
 	if (!zcp) {
 		errno = EINVAL;
 		zed_log_msg(LOG_ERR,
 		    "Failed to write state file: %s", strerror(errno));
 		return (-1);
 	}
 	if (lseek(zcp->state_fd, 0, SEEK_SET) == (off_t)-1) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to reposition state file offset: %s",
 		    strerror(errno));
 		return (-1);
 	}
 	len = 0;
 	iov[0].iov_base = &eid;
 	len += iov[0].iov_len = sizeof (eid);
 	iov[1].iov_base = &etime[0];
 	len += iov[1].iov_len = sizeof (etime[0]);
 	iov[2].iov_base = &etime[1];
 	len += iov[2].iov_len = sizeof (etime[1]);
 
 	n = writev(zcp->state_fd, iov, 3);
 	if (n < 0) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to write state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));
 		return (-1);
 	}
 	if (n != len) {
 		errno = EIO;
 		zed_log_msg(LOG_WARNING,
-		    "Failed to write state file \"%s\": Wrote %d of %d bytes",
+		    "Failed to write state file \"%s\": Wrote %zd of %zd bytes",
 		    zcp->state_file, n, len);
 		return (-1);
 	}
 	if (fdatasync(zcp->state_fd) < 0) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to sync state file \"%s\": %s",
 		    zcp->state_file, strerror(errno));
 		return (-1);
 	}
 	return (0);
 }
diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c
index 3c8e2fb38c15..db89ecc907bb 100644
--- a/cmd/zed/zed_disk_event.c
+++ b/cmd/zed/zed_disk_event.c
@@ -1,469 +1,469 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
  * You can obtain a copy of the license from the top-level file
  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
  * You may not use this file except in compliance with the license.
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2016, 2017, Intel Corporation.
  */
 
 #ifdef HAVE_LIBUDEV
 
 #include <errno.h>
 #include <fcntl.h>
 #include <libnvpair.h>
 #include <libudev.h>
 #include <libzfs.h>
 #include <libzutil.h>
 #include <pthread.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <sys/sysevent/eventdefs.h>
 #include <sys/sysevent/dev.h>
 
 #include "zed_log.h"
 #include "zed_disk_event.h"
 #include "agents/zfs_agents.h"
 
 /*
  * Portions of ZED need to see disk events for disks belonging to ZFS pools.
  * A libudev monitor is established to monitor block device actions and pass
  * them on to internal ZED logic modules.  Initially, zfs_mod.c is the only
  * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM
  * module responsible for handling disk events for ZFS.
  */
 
 pthread_t g_mon_tid;
 struct udev *g_udev;
 struct udev_monitor *g_mon;
 
 
 #define	DEV_BYID_PATH	"/dev/disk/by-id/"
 
 /* 64MB is minimum usable disk for ZFS */
-#define	MINIMUM_SECTORS		131072
+#define	MINIMUM_SECTORS		131072ULL
 
 
 /*
  * Post disk event to SLM module
  *
  * occurs in the context of monitor thread
  */
 static void
 zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
 {
 	char *strval;
 	uint64_t numval;
 
 	zed_log_msg(LOG_INFO, "zed_disk_event:");
 	zed_log_msg(LOG_INFO, "\tclass: %s", class);
 	zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass);
 	if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval);
 	if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
 	if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
 	if (nvlist_lookup_boolean(nvl, DEV_IS_PART) == B_TRUE)
 		zed_log_msg(LOG_INFO, "\t%s: B_TRUE", DEV_IS_PART);
 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
 	if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
 	if (nvlist_lookup_uint64(nvl, DEV_PARENT_SIZE, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_PARENT_SIZE, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
 	if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
 		zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
 
 	(void) zfs_agent_post_event(class, subclass, nvl);
 }
 
 /*
  * dev_event_nvlist: place event schema into an nv pair list
  *
  * NAME			VALUE (example)
  * --------------	--------------------------------------------------------
  * DEV_NAME		/dev/sdl
  * DEV_PATH		/devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/...
  * DEV_IDENTIFIER	ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC
  * DEV_PHYS_PATH	pci-0000:04:00.0-sas-0x4433221101000000-lun-0
  * DEV_IS_PART		---
  * DEV_SIZE		500107862016
  * ZFS_EV_POOL_GUID	17523635698032189180
  * ZFS_EV_VDEV_GUID	14663607734290803088
  */
 static nvlist_t *
 dev_event_nvlist(struct udev_device *dev)
 {
 	nvlist_t *nvl;
 	char strval[128];
 	const char *value, *path;
 	uint64_t guid;
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		return (NULL);
 
 	if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0)
 		(void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval);
 	if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0)
 		(void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval);
 	if ((path = udev_device_get_devnode(dev)) != NULL)
 		(void) nvlist_add_string(nvl, DEV_NAME, path);
 	if ((value = udev_device_get_devpath(dev)) != NULL)
 		(void) nvlist_add_string(nvl, DEV_PATH, value);
 	value = udev_device_get_devtype(dev);
 	if ((value != NULL && strcmp("partition", value) == 0) ||
 	    (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER")
 	    != NULL)) {
 		(void) nvlist_add_boolean(nvl, DEV_IS_PART);
 	}
 	if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) {
 		uint64_t numval = DEV_BSIZE;
 
 		numval *= strtoull(value, NULL, 10);
 		(void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
 
 		/*
 		 * If the device has a parent, then get the parent block
 		 * device's size as well.  For example, /dev/sda1's parent
 		 * is /dev/sda.
 		 */
 		struct udev_device *parent_dev = udev_device_get_parent(dev);
 		if ((value = udev_device_get_sysattr_value(parent_dev, "size"))
 		    != NULL) {
 			uint64_t numval = DEV_BSIZE;
 
 			numval *= strtoull(value, NULL, 10);
 			(void) nvlist_add_uint64(nvl, DEV_PARENT_SIZE, numval);
 		}
 	}
 
 	/*
 	 * Grab the pool and vdev guids from blkid cache
 	 */
 	value = udev_device_get_property_value(dev, "ID_FS_UUID");
 	if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
 		(void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid);
 
 	value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB");
 	if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
 		(void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid);
 
 	/*
 	 * Either a vdev guid or a devid must be present for matching
 	 */
 	if (!nvlist_exists(nvl, DEV_IDENTIFIER) &&
 	    !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) {
 		nvlist_free(nvl);
 		return (NULL);
 	}
 
 	return (nvl);
 }
 
 /*
  *  Listen for block device uevents
  */
 static void *
 zed_udev_monitor(void *arg)
 {
 	struct udev_monitor *mon = arg;
 	char *tmp, *tmp2;
 
 	zed_log_msg(LOG_INFO, "Waiting for new udev disk events...");
 
 	while (1) {
 		struct udev_device *dev;
 		const char *action, *type, *part, *sectors;
 		const char *bus, *uuid, *devpath;
 		const char *class, *subclass;
 		nvlist_t *nvl;
 		boolean_t is_zfs = B_FALSE;
 
 		/* allow a cancellation while blocked (recvmsg) */
 		pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
 
 		/* blocks at recvmsg until an event occurs */
 		if ((dev = udev_monitor_receive_device(mon)) == NULL) {
 			zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive "
 			    "device error %d", errno);
 			continue;
 		}
 
 		/* allow all steps to complete before a cancellation */
 		pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
 
 		/*
 		 * Strongly typed device is the preferred filter
 		 */
 		type = udev_device_get_property_value(dev, "ID_FS_TYPE");
 		if (type != NULL && type[0] != '\0') {
 			if (strcmp(type, "zfs_member") == 0) {
 				is_zfs = B_TRUE;
 			} else {
 				/* not ours, so skip */
 				zed_log_msg(LOG_INFO, "zed_udev_monitor: skip "
 				    "%s (in use by %s)",
 				    udev_device_get_devnode(dev), type);
 				udev_device_unref(dev);
 				continue;
 			}
 		}
 
 		/*
 		 * if this is a disk and it is partitioned, then the
 		 * zfs label will reside in a DEVTYPE=partition and
 		 * we can skip passing this event
 		 *
 		 * Special case: Blank disks are sometimes reported with
 		 * an erroneous 'atari' partition, and should not be
 		 * excluded from being used as an autoreplace disk:
 		 *
 		 * https://github.com/openzfs/zfs/issues/13497
 		 */
 		type = udev_device_get_property_value(dev, "DEVTYPE");
 		part = udev_device_get_property_value(dev,
 		    "ID_PART_TABLE_TYPE");
 		if (type != NULL && type[0] != '\0' &&
 		    strcmp(type, "disk") == 0 &&
 		    part != NULL && part[0] != '\0') {
 			const char *devname =
 			    udev_device_get_property_value(dev, "DEVNAME");
 
 			if (strcmp(part, "atari") == 0) {
 				zed_log_msg(LOG_INFO,
 				    "%s: %s is reporting an atari partition, "
 				    "but we're going to assume it's a false "
 				    "positive and still use it (issue #13497)",
 				    __func__, devname);
 			} else {
 				zed_log_msg(LOG_INFO,
 				    "%s: skip %s since it has a %s partition "
 				    "already", __func__, devname, part);
 				/* skip and wait for partition event */
 				udev_device_unref(dev);
 				continue;
 			}
 		}
 
 		/*
 		 * ignore small partitions
 		 */
 		sectors = udev_device_get_property_value(dev,
 		    "ID_PART_ENTRY_SIZE");
 		if (sectors == NULL)
 			sectors = udev_device_get_sysattr_value(dev, "size");
 		if (sectors != NULL &&
 		    strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
 			zed_log_msg(LOG_INFO,
 			    "%s: %s sectors %s < %llu (minimum)",
 			    __func__,
 			    udev_device_get_property_value(dev, "DEVNAME"),
 			    sectors, MINIMUM_SECTORS);
 			udev_device_unref(dev);
 			continue;
 		}
 
 		/*
 		 * If the blkid probe didn't find ZFS, then a persistent
 		 * device id string is required in the message schema
 		 * for matching with vdevs. Preflight here for expected
 		 * udev information.
 		 *
 		 * Special case:
 		 * NVMe devices don't have ID_BUS set (at least on RHEL 7-8),
 		 * but they are valid for autoreplace.  Add a special case for
 		 * them by searching for "/nvme/" in the udev DEVPATH:
 		 *
 		 * DEVPATH=/devices/pci0000:00/0000:00:1e.0/nvme/nvme2/nvme2n1
 		 */
 		bus = udev_device_get_property_value(dev, "ID_BUS");
 		uuid = udev_device_get_property_value(dev, "DM_UUID");
 		devpath = udev_device_get_devpath(dev);
 		if (!is_zfs && (bus == NULL && uuid == NULL &&
 		    strstr(devpath, "/nvme/") == NULL)) {
 			zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
 			    "source", udev_device_get_devnode(dev));
 			udev_device_unref(dev);
 			continue;
 		}
 
 		action = udev_device_get_action(dev);
 		if (strcmp(action, "add") == 0) {
 			class = EC_DEV_ADD;
 			subclass = ESC_DISK;
 		} else if (strcmp(action, "remove") == 0) {
 			class = EC_DEV_REMOVE;
 			subclass = ESC_DISK;
 		} else if (strcmp(action, "change") == 0) {
 			class = EC_DEV_STATUS;
 			subclass = ESC_DEV_DLE;
 		} else {
 			zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown",
 			    action);
 			udev_device_unref(dev);
 			continue;
 		}
 
 		/*
 		 * Special case an EC_DEV_ADD for multipath devices
 		 *
 		 * When a multipath device is created, udev reports the
 		 * following:
 		 *
 		 * 1.	"add" event of the dm device for the multipath device
 		 *	(like /dev/dm-3).
 		 * 2.	"change" event to create the actual multipath device
 		 *	symlink (like /dev/mapper/mpatha).  The event also
 		 *	passes back the relevant DM vars we care about, like
 		 *	DM_UUID.
 		 * 3.	Another "change" event identical to #2 (that we ignore).
 		 *
 		 * To get the behavior we want, we treat the "change" event
 		 * in #2 as a "add" event; as if "/dev/mapper/mpatha" was
 		 * a new disk being added.
 		 */
 		if (strcmp(class, EC_DEV_STATUS) == 0 &&
 		    udev_device_get_property_value(dev, "DM_UUID") &&
 		    udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
 			tmp = (char *)udev_device_get_devnode(dev);
 			tmp2 = zfs_get_underlying_path(tmp);
 			if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
 				/*
 				 * We have a real underlying device, which
 				 * means that this multipath "change" event is
 				 * an "add" event.
 				 *
 				 * If the multipath device and the underlying
 				 * dev are the same name (i.e. /dev/dm-5), then
 				 * there is no real underlying disk for this
 				 * multipath device, and so this "change" event
 				 * really is a multipath removal.
 				 */
 				class = EC_DEV_ADD;
 				subclass = ESC_DISK;
 			} else {
 				tmp = (char *)
 				    udev_device_get_property_value(dev,
 				    "DM_NR_VALID_PATHS");
 				/* treat as a multipath remove */
 				if (tmp != NULL && strcmp(tmp, "0") == 0) {
 					class = EC_DEV_REMOVE;
 					subclass = ESC_DISK;
 				}
 			}
 			free(tmp2);
 		}
 
 		/*
 		 * Special case an EC_DEV_ADD for scsi_debug devices
 		 *
 		 * These devices require a udevadm trigger command after
 		 * creation in order to register the vdev_id scsidebug alias
 		 * rule (adds a persistent path (phys_path) used for fault
 		 * management automated tests in the ZFS test suite.
 		 *
 		 * After udevadm trigger command, event registers as a "change"
 		 * event but needs to instead be handled as another "add" event
 		 * to allow for disk labeling and partitioning to occur.
 		 */
 		if (strcmp(class, EC_DEV_STATUS) == 0 &&
 		    udev_device_get_property_value(dev, "ID_VDEV") &&
 		    udev_device_get_property_value(dev, "ID_MODEL")) {
 			const char *id_model, *id_model_sd = "scsi_debug";
 
 			id_model = udev_device_get_property_value(dev,
 			    "ID_MODEL");
 			if (strcmp(id_model, id_model_sd) == 0) {
 				class = EC_DEV_ADD;
 				subclass = ESC_DISK;
 			}
 		}
 
 		if ((nvl = dev_event_nvlist(dev)) != NULL) {
 			zed_udev_event(class, subclass, nvl);
 			nvlist_free(nvl);
 		}
 
 		udev_device_unref(dev);
 	}
 
 	return (NULL);
 }
 
 int
 zed_disk_event_init(void)
 {
 	int fd, fflags;
 
 	if ((g_udev = udev_new()) == NULL) {
 		zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno);
 		return (-1);
 	}
 
 	/* Set up a udev monitor for block devices */
 	g_mon = udev_monitor_new_from_netlink(g_udev, "udev");
 	udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk");
 	udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block",
 	    "partition");
 	udev_monitor_enable_receiving(g_mon);
 
 	/* Make sure monitoring socket is blocking */
 	fd = udev_monitor_get_fd(g_mon);
 	if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK)
 		(void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK);
 
 	/* spawn a thread to monitor events */
 	if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) {
 		udev_monitor_unref(g_mon);
 		udev_unref(g_udev);
 		zed_log_msg(LOG_WARNING, "pthread_create failed");
 		return (-1);
 	}
 
 	pthread_setname_np(g_mon_tid, "udev monitor");
 	zed_log_msg(LOG_INFO, "zed_disk_event_init");
 
 	return (0);
 }
 
 void
 zed_disk_event_fini(void)
 {
 	/* cancel monitor thread at recvmsg() */
 	(void) pthread_cancel(g_mon_tid);
 	(void) pthread_join(g_mon_tid, NULL);
 
 	/* cleanup udev resources */
 	udev_monitor_unref(g_mon);
 	udev_unref(g_udev);
 
 	zed_log_msg(LOG_INFO, "zed_disk_event_fini");
 }
 
 #else
 
 #include "zed_disk_event.h"
 
 int
 zed_disk_event_init(void)
 {
 	return (0);
 }
 
 void
 zed_disk_event_fini(void)
 {
 }
 
 #endif /* HAVE_LIBUDEV */
diff --git a/cmd/zed/zed_exec.c b/cmd/zed/zed_exec.c
index 369c4b6950c5..51c292d41ccc 100644
--- a/cmd/zed/zed_exec.c
+++ b/cmd/zed/zed_exec.c
@@ -1,373 +1,373 @@
 /*
  * This file is part of the ZFS Event Daemon (ZED).
  *
  * Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
  * Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC.
  * Refer to the OpenZFS git commit log for authoritative copyright attribution.
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License Version 1.0 (CDDL-1.0).
  * You can obtain a copy of the license from the top-level file
  * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
  * You may not use this file except in compliance with the license.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stddef.h>
 #include <sys/avl.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <time.h>
 #include <unistd.h>
 #include <pthread.h>
 #include <signal.h>
 
 #include "zed_exec.h"
 #include "zed_log.h"
 #include "zed_strings.h"
 
 #define	ZEVENT_FILENO	3
 
 struct launched_process_node {
 	avl_node_t node;
 	pid_t pid;
 	uint64_t eid;
 	char *name;
 };
 
 static int
 _launched_process_node_compare(const void *x1, const void *x2)
 {
 	pid_t p1;
 	pid_t p2;
 
 	assert(x1 != NULL);
 	assert(x2 != NULL);
 
 	p1 = ((const struct launched_process_node *) x1)->pid;
 	p2 = ((const struct launched_process_node *) x2)->pid;
 
 	if (p1 < p2)
 		return (-1);
 	else if (p1 == p2)
 		return (0);
 	else
 		return (1);
 }
 
 static pthread_t _reap_children_tid = (pthread_t)-1;
 static volatile boolean_t _reap_children_stop;
 static avl_tree_t _launched_processes;
 static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER;
 static int16_t _launched_processes_limit;
 
 /*
  * Create an environment string array for passing to execve() using the
  * NAME=VALUE strings in container [zsp].
  * Return a newly-allocated environment, or NULL on error.
  */
 static char **
 _zed_exec_create_env(zed_strings_t *zsp)
 {
 	int num_ptrs;
 	int buflen;
 	char *buf;
 	char **pp;
 	char *p;
 	const char *q;
 	int i;
 	int len;
 
 	num_ptrs = zed_strings_count(zsp) + 1;
 	buflen = num_ptrs * sizeof (char *);
 	for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp))
 		buflen += strlen(q) + 1;
 
 	buf = calloc(1, buflen);
 	if (!buf)
 		return (NULL);
 
 	pp = (char **)buf;
 	p = buf + (num_ptrs * sizeof (char *));
 	i = 0;
 	for (q = zed_strings_first(zsp); q; q = zed_strings_next(zsp)) {
 		pp[i] = p;
 		len = strlen(q) + 1;
 		memcpy(p, q, len);
 		p += len;
 		i++;
 	}
 	pp[i] = NULL;
 	assert(buf + buflen == p);
 	return ((char **)buf);
 }
 
 /*
  * Fork a child process to handle event [eid].  The program [prog]
  * in directory [dir] is executed with the environment [env].
  *
  * The file descriptor [zfd] is the zevent_fd used to track the
  * current cursor location within the zevent nvlist.
  */
 static void
 _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog,
     char *env[], int zfd, boolean_t in_foreground)
 {
 	char path[PATH_MAX];
 	int n;
 	pid_t pid;
 	int fd;
 	struct launched_process_node *node;
 	sigset_t mask;
 	struct timespec launch_timeout =
 		{ .tv_sec = 0, .tv_nsec = 200 * 1000 * 1000, };
 
 	assert(dir != NULL);
 	assert(prog != NULL);
 	assert(env != NULL);
 	assert(zfd >= 0);
 
 	while (__atomic_load_n(&_launched_processes_limit,
 	    __ATOMIC_SEQ_CST) <= 0)
 		(void) nanosleep(&launch_timeout, NULL);
 
 	n = snprintf(path, sizeof (path), "%s/%s", dir, prog);
 	if ((n < 0) || (n >= sizeof (path))) {
 		zed_log_msg(LOG_WARNING,
 		    "Failed to fork \"%s\" for eid=%llu: %s",
 		    prog, eid, strerror(ENAMETOOLONG));
 		return;
 	}
 	(void) pthread_mutex_lock(&_launched_processes_lock);
 	pid = fork();
 	if (pid < 0) {
 		(void) pthread_mutex_unlock(&_launched_processes_lock);
 		zed_log_msg(LOG_WARNING,
 		    "Failed to fork \"%s\" for eid=%llu: %s",
 		    prog, eid, strerror(errno));
 		return;
 	} else if (pid == 0) {
 		(void) sigemptyset(&mask);
 		(void) sigprocmask(SIG_SETMASK, &mask, NULL);
 
 		(void) umask(022);
 		if (in_foreground && /* we're already devnulled if daemonised */
 		    (fd = open("/dev/null", O_RDWR | O_CLOEXEC)) != -1) {
 			(void) dup2(fd, STDIN_FILENO);
 			(void) dup2(fd, STDOUT_FILENO);
 			(void) dup2(fd, STDERR_FILENO);
 		}
 		(void) dup2(zfd, ZEVENT_FILENO);
 		execle(path, prog, NULL, env);
 		_exit(127);
 	}
 
 	/* parent process */
 
 	node = calloc(1, sizeof (*node));
 	if (node) {
 		node->pid = pid;
 		node->eid = eid;
 		node->name = strdup(prog);
 
 		avl_add(&_launched_processes, node);
 	}
 	(void) pthread_mutex_unlock(&_launched_processes_lock);
 
 	__atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST);
 	zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d",
 	    prog, eid, pid);
 }
 
 static void
 _nop(int sig)
 {
 	(void) sig;
 }
 
 static void *
 _reap_children(void *arg)
 {
 	(void) arg;
 	struct launched_process_node node, *pnode;
 	pid_t pid;
 	int status;
 	struct rusage usage;
 	struct sigaction sa = {};
 
 	(void) sigfillset(&sa.sa_mask);
 	(void) sigdelset(&sa.sa_mask, SIGCHLD);
 	(void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL);
 
 	(void) sigemptyset(&sa.sa_mask);
 	sa.sa_handler = _nop;
 	sa.sa_flags = SA_NOCLDSTOP;
 	(void) sigaction(SIGCHLD, &sa, NULL);
 
 	for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) {
 		(void) pthread_mutex_lock(&_launched_processes_lock);
 		pid = wait4(0, &status, WNOHANG, &usage);
 
 		if (pid == 0 || pid == (pid_t)-1) {
 			(void) pthread_mutex_unlock(&_launched_processes_lock);
 			if (pid == 0 || errno == ECHILD)
 				pause();
 			else if (errno != EINTR)
 				zed_log_msg(LOG_WARNING,
 				    "Failed to wait for children: %s",
 				    strerror(errno));
 		} else {
 			memset(&node, 0, sizeof (node));
 			node.pid = pid;
 			pnode = avl_find(&_launched_processes, &node, NULL);
 			if (pnode) {
 				memcpy(&node, pnode, sizeof (node));
 
 				avl_remove(&_launched_processes, pnode);
 				free(pnode);
 			}
 			(void) pthread_mutex_unlock(&_launched_processes_lock);
 			__atomic_add_fetch(&_launched_processes_limit, 1,
 			    __ATOMIC_SEQ_CST);
 
 			usage.ru_utime.tv_sec += usage.ru_stime.tv_sec;
 			usage.ru_utime.tv_usec += usage.ru_stime.tv_usec;
 			usage.ru_utime.tv_sec +=
 			    usage.ru_utime.tv_usec / (1000 * 1000);
 			usage.ru_utime.tv_usec %= 1000 * 1000;
 
 			if (WIFEXITED(status)) {
 				zed_log_msg(LOG_INFO,
 				    "Finished \"%s\" eid=%llu pid=%d "
 				    "time=%llu.%06us exit=%d",
 				    node.name, node.eid, pid,
 				    (unsigned long long) usage.ru_utime.tv_sec,
 				    (unsigned int) usage.ru_utime.tv_usec,
 				    WEXITSTATUS(status));
 			} else if (WIFSIGNALED(status)) {
 				zed_log_msg(LOG_INFO,
 				    "Finished \"%s\" eid=%llu pid=%d "
 				    "time=%llu.%06us sig=%d/%s",
 				    node.name, node.eid, pid,
 				    (unsigned long long) usage.ru_utime.tv_sec,
 				    (unsigned int) usage.ru_utime.tv_usec,
 				    WTERMSIG(status),
 				    strsignal(WTERMSIG(status)));
 			} else {
 				zed_log_msg(LOG_INFO,
 				    "Finished \"%s\" eid=%llu pid=%d "
 				    "time=%llu.%06us status=0x%X",
-				    node.name, node.eid,
+				    node.name, node.eid, pid,
 				    (unsigned long long) usage.ru_utime.tv_sec,
 				    (unsigned int) usage.ru_utime.tv_usec,
 				    (unsigned int) status);
 			}
 
 			free(node.name);
 		}
 	}
 
 	return (NULL);
 }
 
 void
 zed_exec_fini(void)
 {
 	struct launched_process_node *node;
 	void *ck = NULL;
 
 	if (_reap_children_tid == (pthread_t)-1)
 		return;
 
 	_reap_children_stop = B_TRUE;
 	(void) pthread_kill(_reap_children_tid, SIGCHLD);
 	(void) pthread_join(_reap_children_tid, NULL);
 
 	while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) {
 		free(node->name);
 		free(node);
 	}
 	avl_destroy(&_launched_processes);
 
 	(void) pthread_mutex_destroy(&_launched_processes_lock);
 	(void) pthread_mutex_init(&_launched_processes_lock, NULL);
 
 	_reap_children_tid = (pthread_t)-1;
 }
 
 /*
  * Process the event [eid] by synchronously invoking all zedlets with a
  * matching class prefix.
  *
  * Each executable in [zcp->zedlets] from the directory [zcp->zedlet_dir]
  * is matched against the event's [class], [subclass], and the "all" class
  * (which matches all events).
  * Every zedlet with a matching class prefix is invoked.
  * The NAME=VALUE strings in [envs] will be passed to the zedlet as
  * environment variables.
  *
  * The file descriptor [zcp->zevent_fd] is the zevent_fd used to track the
  * current cursor location within the zevent nvlist.
  *
  * Return 0 on success, -1 on error.
  */
 int
 zed_exec_process(uint64_t eid, const char *class, const char *subclass,
     struct zed_conf *zcp, zed_strings_t *envs)
 {
 	const char *class_strings[4];
 	const char *allclass = "all";
 	const char **csp;
 	const char *z;
 	char **e;
 	int n;
 
 	if (!zcp->zedlet_dir || !zcp->zedlets || !envs || zcp->zevent_fd < 0)
 		return (-1);
 
 	if (_reap_children_tid == (pthread_t)-1) {
 		_launched_processes_limit = zcp->max_jobs;
 
 		if (pthread_create(&_reap_children_tid, NULL,
 		    _reap_children, NULL) != 0)
 			return (-1);
 		pthread_setname_np(_reap_children_tid, "reap ZEDLETs");
 
 		avl_create(&_launched_processes, _launched_process_node_compare,
 		    sizeof (struct launched_process_node),
 		    offsetof(struct launched_process_node, node));
 	}
 
 	csp = class_strings;
 
 	if (class)
 		*csp++ = class;
 
 	if (subclass)
 		*csp++ = subclass;
 
 	if (allclass)
 		*csp++ = allclass;
 
 	*csp = NULL;
 
 	e = _zed_exec_create_env(envs);
 
 	for (z = zed_strings_first(zcp->zedlets); z;
 	    z = zed_strings_next(zcp->zedlets)) {
 		for (csp = class_strings; *csp; csp++) {
 			n = strlen(*csp);
 			if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n]))
 				_zed_exec_fork_child(eid, zcp->zedlet_dir,
 				    z, e, zcp->zevent_fd, zcp->do_foreground);
 		}
 	}
 	free(e);
 	return (0);
 }
diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c
index b5b1dfa8a083..a837b1ce97ec 100644
--- a/module/zfs/spa_checkpoint.c
+++ b/module/zfs/spa_checkpoint.c
@@ -1,637 +1,637 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2017 by Delphix. All rights reserved.
  */
 
 /*
  * Storage Pool Checkpoint
  *
  * A storage pool checkpoint can be thought of as a pool-wide snapshot or
  * a stable version of extreme rewind that guarantees no blocks from the
  * checkpointed state will have been overwritten. It remembers the entire
  * state of the storage pool (e.g. snapshots, dataset names, etc..) from the
  * point that it was taken and the user can rewind back to that point even if
  * they applied destructive operations on their datasets or even enabled new
  * zpool on-disk features. If a pool has a checkpoint that is no longer
  * needed, the user can discard it.
  *
  * == On disk data structures used ==
  *
  * - The pool has a new feature flag and a new entry in the MOS. The feature
  *   flag is set to active when we create the checkpoint and remains active
  *   until the checkpoint is fully discarded. The entry in the MOS config
  *   (DMU_POOL_ZPOOL_CHECKPOINT) is populated with the uberblock that
  *   references the state of the pool when we take the checkpoint. The entry
  *   remains populated until we start discarding the checkpoint or we rewind
  *   back to it.
  *
  * - Each vdev contains a vdev-wide space map while the pool has a checkpoint,
  *   which persists until the checkpoint is fully discarded. The space map
  *   contains entries that have been freed in the current state of the pool
  *   but we want to keep around in case we decide to rewind to the checkpoint.
  *   [see vdev_checkpoint_sm]
  *
  * - Each metaslab's ms_sm space map behaves the same as without the
  *   checkpoint, with the only exception being the scenario when we free
  *   blocks that belong to the checkpoint. In this case, these blocks remain
  *   ALLOCATED in the metaslab's space map and they are added as FREE in the
  *   vdev's checkpoint space map.
  *
  * - Each uberblock has a field (ub_checkpoint_txg) which holds the txg that
  *   the uberblock was checkpointed. For normal uberblocks this field is 0.
  *
  * == Overview of operations ==
  *
  * - To create a checkpoint, we first wait for the current TXG to be synced,
  *   so we can use the most recently synced uberblock (spa_ubsync) as the
  *   checkpointed uberblock. Then we use an early synctask to place that
  *   uberblock in MOS config, increment the feature flag for the checkpoint
  *   (marking it active), and setting spa_checkpoint_txg (see its use below)
  *   to the TXG of the checkpointed uberblock. We use an early synctask for
  *   the aforementioned operations to ensure that no blocks were dirtied
  *   between the current TXG and the TXG of the checkpointed uberblock
  *   (e.g the previous txg).
  *
  * - When a checkpoint exists, we need to ensure that the blocks that
  *   belong to the checkpoint are freed but never reused. This means that
  *   these blocks should never end up in the ms_allocatable or the ms_freeing
  *   trees of a metaslab. Therefore, whenever there is a checkpoint the new
  *   ms_checkpointing tree is used in addition to the aforementioned ones.
  *
  *   Whenever a block is freed and we find out that it is referenced by the
  *   checkpoint (we find out by comparing its birth to spa_checkpoint_txg),
  *   we place it in the ms_checkpointing tree instead of the ms_freeingtree.
  *   This way, we divide the blocks that are being freed into checkpointed
  *   and not-checkpointed blocks.
  *
  *   In order to persist these frees, we write the extents from the
  *   ms_freeingtree to the ms_sm as usual, and the extents from the
  *   ms_checkpointing tree to the vdev_checkpoint_sm. This way, these
  *   checkpointed extents will remain allocated in the metaslab's ms_sm space
  *   map, and therefore won't be reused [see metaslab_sync()]. In addition,
  *   when we discard the checkpoint, we can find the entries that have
  *   actually been freed in vdev_checkpoint_sm.
  *   [see spa_checkpoint_discard_thread_sync()]
  *
  * - To discard the checkpoint we use an early synctask to delete the
  *   checkpointed uberblock from the MOS config, set spa_checkpoint_txg to 0,
  *   and wakeup the discarding zthr thread (an open-context async thread).
  *   We use an early synctask to ensure that the operation happens before any
  *   new data end up in the checkpoint's data structures.
  *
  *   Once the synctask is done and the discarding zthr is awake, we discard
  *   the checkpointed data over multiple TXGs by having the zthr prefetching
  *   entries from vdev_checkpoint_sm and then starting a synctask that places
  *   them as free blocks into their respective ms_allocatable and ms_sm
  *   structures.
  *   [see spa_checkpoint_discard_thread()]
  *
  *   When there are no entries left in the vdev_checkpoint_sm of all
  *   top-level vdevs, a final synctask runs that decrements the feature flag.
  *
  * - To rewind to the checkpoint, we first use the current uberblock and
  *   open the MOS so we can access the checkpointed uberblock from the MOS
  *   config. After we retrieve the checkpointed uberblock, we use it as the
  *   current uberblock for the pool by writing it to disk with an updated
  *   TXG, opening its version of the MOS, and moving on as usual from there.
  *   [see spa_ld_checkpoint_rewind()]
  *
  *   An important note on rewinding to the checkpoint has to do with how we
  *   handle ZIL blocks. In the scenario of a rewind, we clear out any ZIL
  *   blocks that have not been claimed by the time we took the checkpoint
  *   as they should no longer be valid.
  *   [see comment in zil_claim()]
  *
  * == Miscellaneous information ==
  *
  * - In the hypothetical event that we take a checkpoint, remove a vdev,
  *   and attempt to rewind, the rewind would fail as the checkpointed
  *   uberblock would reference data in the removed device. For this reason
  *   and others of similar nature, we disallow the following operations that
  *   can change the config:
  *   	vdev removal and attach/detach, mirror splitting, and pool reguid.
  *
  * - As most of the checkpoint logic is implemented in the SPA and doesn't
  *   distinguish datasets when it comes to space accounting, having a
  *   checkpoint can potentially break the boundaries set by dataset
  *   reservations.
  */
 
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/metaslab_impl.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/spa_checkpoint.h>
 #include <sys/vdev_impl.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 
 /*
  * The following parameter limits the amount of memory to be used for the
  * prefetching of the checkpoint space map done on each vdev while
  * discarding the checkpoint.
  *
  * The reason it exists is because top-level vdevs with long checkpoint
  * space maps can potentially take up a lot of memory depending on the
  * amount of checkpointed data that has been freed within them while
  * the pool had a checkpoint.
  */
 static unsigned long zfs_spa_discard_memory_limit = 16 * 1024 * 1024;
 
 int
 spa_checkpoint_get_stats(spa_t *spa, pool_checkpoint_stat_t *pcs)
 {
 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
 		return (SET_ERROR(ZFS_ERR_NO_CHECKPOINT));
 
 	memset(pcs, 0, sizeof (pool_checkpoint_stat_t));
 
 	int error = zap_contains(spa_meta_objset(spa),
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT);
 	ASSERT(error == 0 || error == ENOENT);
 
 	if (error == ENOENT)
 		pcs->pcs_state = CS_CHECKPOINT_DISCARDING;
 	else
 		pcs->pcs_state = CS_CHECKPOINT_EXISTS;
 
 	pcs->pcs_space = spa->spa_checkpoint_info.sci_dspace;
 	pcs->pcs_start_time = spa->spa_checkpoint_info.sci_timestamp;
 
 	return (0);
 }
 
 static void
 spa_checkpoint_discard_complete_sync(void *arg, dmu_tx_t *tx)
 {
 	spa_t *spa = arg;
 
 	spa->spa_checkpoint_info.sci_timestamp = 0;
 
 	spa_feature_decr(spa, SPA_FEATURE_POOL_CHECKPOINT, tx);
 	spa_notify_waiters(spa);
 
 	spa_history_log_internal(spa, "spa discard checkpoint", tx,
 	    "finished discarding checkpointed state from the pool");
 }
 
 typedef struct spa_checkpoint_discard_sync_callback_arg {
 	vdev_t *sdc_vd;
 	uint64_t sdc_txg;
 	uint64_t sdc_entry_limit;
 } spa_checkpoint_discard_sync_callback_arg_t;
 
 static int
 spa_checkpoint_discard_sync_callback(space_map_entry_t *sme, void *arg)
 {
 	spa_checkpoint_discard_sync_callback_arg_t *sdc = arg;
 	vdev_t *vd = sdc->sdc_vd;
 	metaslab_t *ms = vd->vdev_ms[sme->sme_offset >> vd->vdev_ms_shift];
 	uint64_t end = sme->sme_offset + sme->sme_run;
 
 	if (sdc->sdc_entry_limit == 0)
 		return (SET_ERROR(EINTR));
 
 	/*
 	 * Since the space map is not condensed, we know that
 	 * none of its entries is crossing the boundaries of
 	 * its respective metaslab.
 	 *
 	 * That said, there is no fundamental requirement that
 	 * the checkpoint's space map entries should not cross
 	 * metaslab boundaries. So if needed we could add code
 	 * that handles metaslab-crossing segments in the future.
 	 */
 	VERIFY3U(sme->sme_type, ==, SM_FREE);
 	VERIFY3U(sme->sme_offset, >=, ms->ms_start);
 	VERIFY3U(end, <=, ms->ms_start + ms->ms_size);
 
 	/*
 	 * At this point we should not be processing any
 	 * other frees concurrently, so the lock is technically
 	 * unnecessary. We use the lock anyway though to
 	 * potentially save ourselves from future headaches.
 	 */
 	mutex_enter(&ms->ms_lock);
 	if (range_tree_is_empty(ms->ms_freeing))
 		vdev_dirty(vd, VDD_METASLAB, ms, sdc->sdc_txg);
 	range_tree_add(ms->ms_freeing, sme->sme_offset, sme->sme_run);
 	mutex_exit(&ms->ms_lock);
 
 	ASSERT3U(vd->vdev_spa->spa_checkpoint_info.sci_dspace, >=,
 	    sme->sme_run);
 	ASSERT3U(vd->vdev_stat.vs_checkpoint_space, >=, sme->sme_run);
 
 	vd->vdev_spa->spa_checkpoint_info.sci_dspace -= sme->sme_run;
 	vd->vdev_stat.vs_checkpoint_space -= sme->sme_run;
 	sdc->sdc_entry_limit--;
 
 	return (0);
 }
 
 #ifdef ZFS_DEBUG
 static void
 spa_checkpoint_accounting_verify(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t ckpoint_sm_space_sum = 0;
 	uint64_t vs_ckpoint_space_sum = 0;
 
 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
 
 		if (vd->vdev_checkpoint_sm != NULL) {
 			ckpoint_sm_space_sum +=
 			    -space_map_allocated(vd->vdev_checkpoint_sm);
 			vs_ckpoint_space_sum +=
 			    vd->vdev_stat.vs_checkpoint_space;
 			ASSERT3U(ckpoint_sm_space_sum, ==,
 			    vs_ckpoint_space_sum);
 		} else {
 			ASSERT0(vd->vdev_stat.vs_checkpoint_space);
 		}
 	}
 	ASSERT3U(spa->spa_checkpoint_info.sci_dspace, ==, ckpoint_sm_space_sum);
 }
 #endif
 
 static void
 spa_checkpoint_discard_thread_sync(void *arg, dmu_tx_t *tx)
 {
 	vdev_t *vd = arg;
 	int error;
 
 	/*
 	 * The space map callback is applied only to non-debug entries.
 	 * Because the number of debug entries is less or equal to the
 	 * number of non-debug entries, we want to ensure that we only
 	 * read what we prefetched from open-context.
 	 *
 	 * Thus, we set the maximum entries that the space map callback
 	 * will be applied to be half the entries that could fit in the
 	 * imposed memory limit.
 	 *
 	 * Note that since this is a conservative estimate we also
 	 * assume the worst case scenario in our computation where each
 	 * entry is two-word.
 	 */
 	uint64_t max_entry_limit =
 	    (zfs_spa_discard_memory_limit / (2 * sizeof (uint64_t))) >> 1;
 
 	/*
 	 * Iterate from the end of the space map towards the beginning,
 	 * placing its entries on ms_freeing and removing them from the
 	 * space map. The iteration stops if one of the following
 	 * conditions is true:
 	 *
 	 * 1] We reached the beginning of the space map. At this point
 	 *    the space map should be completely empty and
 	 *    space_map_incremental_destroy should have returned 0.
 	 *    The next step would be to free and close the space map
 	 *    and remove its entry from its vdev's top zap. This allows
 	 *    spa_checkpoint_discard_thread() to move on to the next vdev.
 	 *
 	 * 2] We reached the memory limit (amount of memory used to hold
 	 *    space map entries in memory) and space_map_incremental_destroy
 	 *    returned EINTR. This means that there are entries remaining
 	 *    in the space map that will be cleared in a future invocation
 	 *    of this function by spa_checkpoint_discard_thread().
 	 */
 	spa_checkpoint_discard_sync_callback_arg_t sdc;
 	sdc.sdc_vd = vd;
 	sdc.sdc_txg = tx->tx_txg;
 	sdc.sdc_entry_limit = max_entry_limit;
 
 	uint64_t words_before =
 	    space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t);
 
 	error = space_map_incremental_destroy(vd->vdev_checkpoint_sm,
 	    spa_checkpoint_discard_sync_callback, &sdc, tx);
 
 	uint64_t words_after =
 	    space_map_length(vd->vdev_checkpoint_sm) / sizeof (uint64_t);
 
 #ifdef ZFS_DEBUG
 	spa_checkpoint_accounting_verify(vd->vdev_spa);
 #endif
 
 	zfs_dbgmsg("discarding checkpoint: txg %llu, vdev id %lld, "
 	    "deleted %llu words - %llu words are left",
 	    (u_longlong_t)tx->tx_txg, (longlong_t)vd->vdev_id,
 	    (u_longlong_t)(words_before - words_after),
 	    (u_longlong_t)words_after);
 
 	if (error != EINTR) {
 		if (error != 0) {
 			zfs_panic_recover("zfs: error %lld was returned "
 			    "while incrementally destroying the checkpoint "
-			    "space map of vdev %u\n",
+			    "space map of vdev %llu\n",
 			    (longlong_t)error, vd->vdev_id);
 		}
 		ASSERT0(words_after);
 		ASSERT0(space_map_allocated(vd->vdev_checkpoint_sm));
 		ASSERT0(space_map_length(vd->vdev_checkpoint_sm));
 
 		space_map_free(vd->vdev_checkpoint_sm, tx);
 		space_map_close(vd->vdev_checkpoint_sm);
 		vd->vdev_checkpoint_sm = NULL;
 
 		VERIFY0(zap_remove(spa_meta_objset(vd->vdev_spa),
 		    vd->vdev_top_zap, VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, tx));
 	}
 }
 
 static boolean_t
 spa_checkpoint_discard_is_done(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	ASSERT(!spa_has_checkpoint(spa));
 	ASSERT(spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT));
 
 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 		if (rvd->vdev_child[c]->vdev_checkpoint_sm != NULL)
 			return (B_FALSE);
 		ASSERT0(rvd->vdev_child[c]->vdev_stat.vs_checkpoint_space);
 	}
 
 	return (B_TRUE);
 }
 
 boolean_t
 spa_checkpoint_discard_thread_check(void *arg, zthr_t *zthr)
 {
 	(void) zthr;
 	spa_t *spa = arg;
 
 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
 		return (B_FALSE);
 
 	if (spa_has_checkpoint(spa))
 		return (B_FALSE);
 
 	return (B_TRUE);
 }
 
 void
 spa_checkpoint_discard_thread(void *arg, zthr_t *zthr)
 {
 	spa_t *spa = arg;
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
 
 		while (vd->vdev_checkpoint_sm != NULL) {
 			space_map_t *checkpoint_sm = vd->vdev_checkpoint_sm;
 			int numbufs;
 			dmu_buf_t **dbp;
 
 			if (zthr_iscancelled(zthr))
 				return;
 
 			ASSERT3P(vd->vdev_ops, !=, &vdev_indirect_ops);
 
 			uint64_t size = MIN(space_map_length(checkpoint_sm),
 			    zfs_spa_discard_memory_limit);
 			uint64_t offset =
 			    space_map_length(checkpoint_sm) - size;
 
 			/*
 			 * Ensure that the part of the space map that will
 			 * be destroyed by the synctask, is prefetched in
 			 * memory before the synctask runs.
 			 */
 			int error = dmu_buf_hold_array_by_bonus(
 			    checkpoint_sm->sm_dbuf, offset, size,
 			    B_TRUE, FTAG, &numbufs, &dbp);
 			if (error != 0) {
 				zfs_panic_recover("zfs: error %d was returned "
 				    "while prefetching checkpoint space map "
 				    "entries of vdev %llu\n",
 				    error, vd->vdev_id);
 			}
 
 			VERIFY0(dsl_sync_task(spa->spa_name, NULL,
 			    spa_checkpoint_discard_thread_sync, vd,
 			    0, ZFS_SPACE_CHECK_NONE));
 
 			dmu_buf_rele_array(dbp, numbufs, FTAG);
 		}
 	}
 
 	VERIFY(spa_checkpoint_discard_is_done(spa));
 	VERIFY0(spa->spa_checkpoint_info.sci_dspace);
 	VERIFY0(dsl_sync_task(spa->spa_name, NULL,
 	    spa_checkpoint_discard_complete_sync, spa,
 	    0, ZFS_SPACE_CHECK_NONE));
 }
 
 
 static int
 spa_checkpoint_check(void *arg, dmu_tx_t *tx)
 {
 	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_POOL_CHECKPOINT))
 		return (SET_ERROR(ENOTSUP));
 
 	if (!spa_top_vdevs_spacemap_addressable(spa))
 		return (SET_ERROR(ZFS_ERR_VDEV_TOO_BIG));
 
 	if (spa->spa_removing_phys.sr_state == DSS_SCANNING)
 		return (SET_ERROR(ZFS_ERR_DEVRM_IN_PROGRESS));
 
 	if (spa->spa_checkpoint_txg != 0)
 		return (SET_ERROR(ZFS_ERR_CHECKPOINT_EXISTS));
 
 	if (spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
 		return (SET_ERROR(ZFS_ERR_DISCARDING_CHECKPOINT));
 
 	return (0);
 }
 
 static void
 spa_checkpoint_sync(void *arg, dmu_tx_t *tx)
 {
 	(void) arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	spa_t *spa = dp->dp_spa;
 	uberblock_t checkpoint = spa->spa_ubsync;
 
 	/*
 	 * At this point, there should not be a checkpoint in the MOS.
 	 */
 	ASSERT3U(zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ZPOOL_CHECKPOINT), ==, ENOENT);
 
 	ASSERT0(spa->spa_checkpoint_info.sci_timestamp);
 	ASSERT0(spa->spa_checkpoint_info.sci_dspace);
 
 	/*
 	 * Since the checkpointed uberblock is the one that just got synced
 	 * (we use spa_ubsync), its txg must be equal to the txg number of
 	 * the txg we are syncing, minus 1.
 	 */
 	ASSERT3U(checkpoint.ub_txg, ==, spa->spa_syncing_txg - 1);
 
 	/*
 	 * Once the checkpoint is in place, we need to ensure that none of
 	 * its blocks will be marked for reuse after it has been freed.
 	 * When there is a checkpoint and a block is freed, we compare its
 	 * birth txg to the txg of the checkpointed uberblock to see if the
 	 * block is part of the checkpoint or not. Therefore, we have to set
 	 * spa_checkpoint_txg before any frees happen in this txg (which is
 	 * why this is done as an early_synctask as explained in the comment
 	 * in spa_checkpoint()).
 	 */
 	spa->spa_checkpoint_txg = checkpoint.ub_txg;
 	spa->spa_checkpoint_info.sci_timestamp = checkpoint.ub_timestamp;
 
 	checkpoint.ub_checkpoint_txg = checkpoint.ub_txg;
 	VERIFY0(zap_add(spa->spa_dsl_pool->dp_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT,
 	    sizeof (uint64_t), sizeof (uberblock_t) / sizeof (uint64_t),
 	    &checkpoint, tx));
 
 	/*
 	 * Increment the feature refcount and thus activate the feature.
 	 * Note that the feature will be deactivated when we've
 	 * completely discarded all checkpointed state (both vdev
 	 * space maps and uberblock).
 	 */
 	spa_feature_incr(spa, SPA_FEATURE_POOL_CHECKPOINT, tx);
 
 	spa_history_log_internal(spa, "spa checkpoint", tx,
 	    "checkpointed uberblock txg=%llu", (u_longlong_t)checkpoint.ub_txg);
 }
 
 /*
  * Create a checkpoint for the pool.
  */
 int
 spa_checkpoint(const char *pool)
 {
 	int error;
 	spa_t *spa;
 
 	error = spa_open(pool, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	mutex_enter(&spa->spa_vdev_top_lock);
 
 	/*
 	 * Wait for current syncing txg to finish so the latest synced
 	 * uberblock (spa_ubsync) has all the changes that we expect
 	 * to see if we were to revert later to the checkpoint. In other
 	 * words we want the checkpointed uberblock to include/reference
 	 * all the changes that were pending at the time that we issued
 	 * the checkpoint command.
 	 */
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * As the checkpointed uberblock references blocks from the previous
 	 * txg (spa_ubsync) we want to ensure that are not freeing any of
 	 * these blocks in the same txg that the following synctask will
 	 * run. Thus, we run it as an early synctask, so the dirty changes
 	 * that are synced to disk afterwards during zios and other synctasks
 	 * do not reuse checkpointed blocks.
 	 */
 	error = dsl_early_sync_task(pool, spa_checkpoint_check,
 	    spa_checkpoint_sync, NULL, 0, ZFS_SPACE_CHECK_NORMAL);
 
 	mutex_exit(&spa->spa_vdev_top_lock);
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 spa_checkpoint_discard_check(void *arg, dmu_tx_t *tx)
 {
 	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	if (!spa_feature_is_active(spa, SPA_FEATURE_POOL_CHECKPOINT))
 		return (SET_ERROR(ZFS_ERR_NO_CHECKPOINT));
 
 	if (spa->spa_checkpoint_txg == 0)
 		return (SET_ERROR(ZFS_ERR_DISCARDING_CHECKPOINT));
 
 	VERIFY0(zap_contains(spa_meta_objset(spa),
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ZPOOL_CHECKPOINT));
 
 	return (0);
 }
 
 static void
 spa_checkpoint_discard_sync(void *arg, dmu_tx_t *tx)
 {
 	(void) arg;
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 
 	VERIFY0(zap_remove(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_ZPOOL_CHECKPOINT, tx));
 
 	spa->spa_checkpoint_txg = 0;
 
 	zthr_wakeup(spa->spa_checkpoint_discard_zthr);
 
 	spa_history_log_internal(spa, "spa discard checkpoint", tx,
 	    "started discarding checkpointed state from the pool");
 }
 
 /*
  * Discard the checkpoint from a pool.
  */
 int
 spa_checkpoint_discard(const char *pool)
 {
 	/*
 	 * Similarly to spa_checkpoint(), we want our synctask to run
 	 * before any pending dirty data are written to disk so they
 	 * won't end up in the checkpoint's data structures (e.g.
 	 * ms_checkpointing and vdev_checkpoint_sm) and re-create any
 	 * space maps that the discarding open-context thread has
 	 * deleted.
 	 * [see spa_discard_checkpoint_sync and spa_discard_checkpoint_thread]
 	 */
 	return (dsl_early_sync_task(pool, spa_checkpoint_discard_check,
 	    spa_checkpoint_discard_sync, NULL, 0,
 	    ZFS_SPACE_CHECK_DISCARD_CHECKPOINT));
 }
 
 EXPORT_SYMBOL(spa_checkpoint_get_stats);
 EXPORT_SYMBOL(spa_checkpoint_discard_thread);
 EXPORT_SYMBOL(spa_checkpoint_discard_thread_check);
 
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, ULONG, ZMOD_RW,
 	"Limit for memory used in prefetching the checkpoint space map done "
 	"on each vdev while discarding the checkpoint");
 /* END CSTYLED */