Index: head/sys/cam/cam_xpt.c
===================================================================
--- head/sys/cam/cam_xpt.c	(revision 334199)
+++ head/sys/cam/cam_xpt.c	(revision 334200)
@@ -1,5616 +1,5618 @@
 /*-
  * Implementation of the Common Access Method Transport (XPT) layer.
  *
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, 1998, 1999 Justin T. Gibbs.
  * Copyright (c) 1997, 1998, 1999 Kenneth D. Merry.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_printf.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/interrupt.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/taskqueue.h>
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/kthread.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_iosched.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_queue.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_compat.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 #include <cam/scsi/scsi_pass.h>
 
 #include <machine/md_var.h>	/* geometry translation */
 #include <machine/stdarg.h>	/* for xpt_print below */
 
 #include "opt_cam.h"
 
 /* Wild guess based on not wanting to grow the stack too much */
 #define XPT_PRINT_MAXLEN	512
 #ifdef PRINTF_BUFR_SIZE
 #define XPT_PRINT_LEN	PRINTF_BUFR_SIZE
 #else
 #define XPT_PRINT_LEN	128
 #endif
 _Static_assert(XPT_PRINT_LEN <= XPT_PRINT_MAXLEN, "XPT_PRINT_LEN is too large");
 
 /*
  * This is the maximum number of high powered commands (e.g. start unit)
  * that can be outstanding at a particular time.
  */
 #ifndef CAM_MAX_HIGHPOWER
 #define CAM_MAX_HIGHPOWER  4
 #endif
 
 /* Datastructures internal to the xpt layer */
 MALLOC_DEFINE(M_CAMXPT, "CAM XPT", "CAM XPT buffers");
 MALLOC_DEFINE(M_CAMDEV, "CAM DEV", "CAM devices");
 MALLOC_DEFINE(M_CAMCCB, "CAM CCB", "CAM CCBs");
 MALLOC_DEFINE(M_CAMPATH, "CAM path", "CAM paths");
 
 /* Object for defering XPT actions to a taskqueue */
 struct xpt_task {
 	struct task	task;
 	void		*data1;
 	uintptr_t	data2;
 };
 
 struct xpt_softc {
 	uint32_t		xpt_generation;
 
 	/* number of high powered commands that can go through right now */
 	struct mtx		xpt_highpower_lock;
 	STAILQ_HEAD(highpowerlist, cam_ed)	highpowerq;
 	int			num_highpower;
 
 	/* queue for handling async rescan requests. */
 	TAILQ_HEAD(, ccb_hdr) ccb_scanq;
 	int buses_to_config;
 	int buses_config_done;
 	int announce_nosbuf;
 
 	/*
 	 * Registered buses
 	 *
 	 * N.B., "busses" is an archaic spelling of "buses".  In new code
 	 * "buses" is preferred.
 	 */
 	TAILQ_HEAD(,cam_eb)	xpt_busses;
 	u_int			bus_generation;
 
 	struct intr_config_hook	*xpt_config_hook;
 
 	int			boot_delay;
 	struct callout 		boot_callout;
 
 	struct mtx		xpt_topo_lock;
 	struct mtx		xpt_lock;
 	struct taskqueue	*xpt_taskq;
 };
 
 typedef enum {
 	DM_RET_COPY		= 0x01,
 	DM_RET_FLAG_MASK	= 0x0f,
 	DM_RET_NONE		= 0x00,
 	DM_RET_STOP		= 0x10,
 	DM_RET_DESCEND		= 0x20,
 	DM_RET_ERROR		= 0x30,
 	DM_RET_ACTION_MASK	= 0xf0
 } dev_match_ret;
 
 typedef enum {
 	XPT_DEPTH_BUS,
 	XPT_DEPTH_TARGET,
 	XPT_DEPTH_DEVICE,
 	XPT_DEPTH_PERIPH
 } xpt_traverse_depth;
 
 struct xpt_traverse_config {
 	xpt_traverse_depth	depth;
 	void			*tr_func;
 	void			*tr_arg;
 };
 
 typedef	int	xpt_busfunc_t (struct cam_eb *bus, void *arg);
 typedef	int	xpt_targetfunc_t (struct cam_et *target, void *arg);
 typedef	int	xpt_devicefunc_t (struct cam_ed *device, void *arg);
 typedef	int	xpt_periphfunc_t (struct cam_periph *periph, void *arg);
 typedef int	xpt_pdrvfunc_t (struct periph_driver **pdrv, void *arg);
 
 /* Transport layer configuration information */
 static struct xpt_softc xsoftc;
 
 MTX_SYSINIT(xpt_topo_init, &xsoftc.xpt_topo_lock, "XPT topology lock", MTX_DEF);
 
 SYSCTL_INT(_kern_cam, OID_AUTO, boot_delay, CTLFLAG_RDTUN,
            &xsoftc.boot_delay, 0, "Bus registration wait time");
 SYSCTL_UINT(_kern_cam, OID_AUTO, xpt_generation, CTLFLAG_RD,
 	    &xsoftc.xpt_generation, 0, "CAM peripheral generation count");
 SYSCTL_INT(_kern_cam, OID_AUTO, announce_nosbuf, CTLFLAG_RWTUN,
 	    &xsoftc.announce_nosbuf, 0, "Don't use sbuf for announcements");
 
 struct cam_doneq {
 	struct mtx_padalign	cam_doneq_mtx;
 	STAILQ_HEAD(, ccb_hdr)	cam_doneq;
 	int			cam_doneq_sleep;
 };
 
 static struct cam_doneq cam_doneqs[MAXCPU];
 static int cam_num_doneqs;
 static struct proc *cam_proc;
 
 SYSCTL_INT(_kern_cam, OID_AUTO, num_doneqs, CTLFLAG_RDTUN,
            &cam_num_doneqs, 0, "Number of completion queues/threads");
 
 struct cam_periph *xpt_periph;
 
 static periph_init_t xpt_periph_init;
 
 static struct periph_driver xpt_driver =
 {
 	xpt_periph_init, "xpt",
 	TAILQ_HEAD_INITIALIZER(xpt_driver.units), /* generation */ 0,
 	CAM_PERIPH_DRV_EARLY
 };
 
 PERIPHDRIVER_DECLARE(xpt, xpt_driver);
 
 static d_open_t xptopen;
 static d_close_t xptclose;
 static d_ioctl_t xptioctl;
 static d_ioctl_t xptdoioctl;
 
 static struct cdevsw xpt_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	0,
 	.d_open =	xptopen,
 	.d_close =	xptclose,
 	.d_ioctl =	xptioctl,
 	.d_name =	"xpt",
 };
 
 /* Storage for debugging datastructures */
 struct cam_path *cam_dpath;
 u_int32_t cam_dflags = CAM_DEBUG_FLAGS;
 SYSCTL_UINT(_kern_cam, OID_AUTO, dflags, CTLFLAG_RWTUN,
 	&cam_dflags, 0, "Enabled debug flags");
 u_int32_t cam_debug_delay = CAM_DEBUG_DELAY;
 SYSCTL_UINT(_kern_cam, OID_AUTO, debug_delay, CTLFLAG_RWTUN,
 	&cam_debug_delay, 0, "Delay in us after each debug message");
 
 /* Our boot-time initialization hook */
 static int cam_module_event_handler(module_t, int /*modeventtype_t*/, void *);
 
 static moduledata_t cam_moduledata = {
 	"cam",
 	cam_module_event_handler,
 	NULL
 };
 
 static int	xpt_init(void *);
 
 DECLARE_MODULE(cam, cam_moduledata, SI_SUB_CONFIGURE, SI_ORDER_SECOND);
 MODULE_VERSION(cam, 1);
 
 
 static void		xpt_async_bcast(struct async_list *async_head,
 					u_int32_t async_code,
 					struct cam_path *path,
 					void *async_arg);
 static path_id_t xptnextfreepathid(void);
 static path_id_t xptpathid(const char *sim_name, int sim_unit, int sim_bus);
 static union ccb *xpt_get_ccb(struct cam_periph *periph);
 static union ccb *xpt_get_ccb_nowait(struct cam_periph *periph);
 static void	 xpt_run_allocq(struct cam_periph *periph, int sleep);
 static void	 xpt_run_allocq_task(void *context, int pending);
 static void	 xpt_run_devq(struct cam_devq *devq);
 static timeout_t xpt_release_devq_timeout;
 static void	 xpt_release_simq_timeout(void *arg) __unused;
 static void	 xpt_acquire_bus(struct cam_eb *bus);
 static void	 xpt_release_bus(struct cam_eb *bus);
 static uint32_t	 xpt_freeze_devq_device(struct cam_ed *dev, u_int count);
 static int	 xpt_release_devq_device(struct cam_ed *dev, u_int count,
 		    int run_queue);
 static struct cam_et*
 		 xpt_alloc_target(struct cam_eb *bus, target_id_t target_id);
 static void	 xpt_acquire_target(struct cam_et *target);
 static void	 xpt_release_target(struct cam_et *target);
 static struct cam_eb*
 		 xpt_find_bus(path_id_t path_id);
 static struct cam_et*
 		 xpt_find_target(struct cam_eb *bus, target_id_t target_id);
 static struct cam_ed*
 		 xpt_find_device(struct cam_et *target, lun_id_t lun_id);
 static void	 xpt_config(void *arg);
 static int	 xpt_schedule_dev(struct camq *queue, cam_pinfo *dev_pinfo,
 				 u_int32_t new_priority);
 static xpt_devicefunc_t xptpassannouncefunc;
 static void	 xptaction(struct cam_sim *sim, union ccb *work_ccb);
 static void	 xptpoll(struct cam_sim *sim);
 static void	 camisr_runqueue(void);
 static void	 xpt_done_process(struct ccb_hdr *ccb_h);
 static void	 xpt_done_td(void *);
 static dev_match_ret	xptbusmatch(struct dev_match_pattern *patterns,
 				    u_int num_patterns, struct cam_eb *bus);
 static dev_match_ret	xptdevicematch(struct dev_match_pattern *patterns,
 				       u_int num_patterns,
 				       struct cam_ed *device);
 static dev_match_ret	xptperiphmatch(struct dev_match_pattern *patterns,
 				       u_int num_patterns,
 				       struct cam_periph *periph);
 static xpt_busfunc_t	xptedtbusfunc;
 static xpt_targetfunc_t	xptedttargetfunc;
 static xpt_devicefunc_t	xptedtdevicefunc;
 static xpt_periphfunc_t	xptedtperiphfunc;
 static xpt_pdrvfunc_t	xptplistpdrvfunc;
 static xpt_periphfunc_t	xptplistperiphfunc;
 static int		xptedtmatch(struct ccb_dev_match *cdm);
 static int		xptperiphlistmatch(struct ccb_dev_match *cdm);
 static int		xptbustraverse(struct cam_eb *start_bus,
 				       xpt_busfunc_t *tr_func, void *arg);
 static int		xpttargettraverse(struct cam_eb *bus,
 					  struct cam_et *start_target,
 					  xpt_targetfunc_t *tr_func, void *arg);
 static int		xptdevicetraverse(struct cam_et *target,
 					  struct cam_ed *start_device,
 					  xpt_devicefunc_t *tr_func, void *arg);
 static int		xptperiphtraverse(struct cam_ed *device,
 					  struct cam_periph *start_periph,
 					  xpt_periphfunc_t *tr_func, void *arg);
 static int		xptpdrvtraverse(struct periph_driver **start_pdrv,
 					xpt_pdrvfunc_t *tr_func, void *arg);
 static int		xptpdperiphtraverse(struct periph_driver **pdrv,
 					    struct cam_periph *start_periph,
 					    xpt_periphfunc_t *tr_func,
 					    void *arg);
 static xpt_busfunc_t	xptdefbusfunc;
 static xpt_targetfunc_t	xptdeftargetfunc;
 static xpt_devicefunc_t	xptdefdevicefunc;
 static xpt_periphfunc_t	xptdefperiphfunc;
 static void		xpt_finishconfig_task(void *context, int pending);
 static void		xpt_dev_async_default(u_int32_t async_code,
 					      struct cam_eb *bus,
 					      struct cam_et *target,
 					      struct cam_ed *device,
 					      void *async_arg);
 static struct cam_ed *	xpt_alloc_device_default(struct cam_eb *bus,
 						 struct cam_et *target,
 						 lun_id_t lun_id);
 static xpt_devicefunc_t	xptsetasyncfunc;
 static xpt_busfunc_t	xptsetasyncbusfunc;
 static cam_status	xptregister(struct cam_periph *periph,
 				    void *arg);
 static __inline int device_is_queued(struct cam_ed *device);
 
 static __inline int
 xpt_schedule_devq(struct cam_devq *devq, struct cam_ed *dev)
 {
 	int	retval;
 
 	mtx_assert(&devq->send_mtx, MA_OWNED);
 	if ((dev->ccbq.queue.entries > 0) &&
 	    (dev->ccbq.dev_openings > 0) &&
 	    (dev->ccbq.queue.qfrozen_cnt == 0)) {
 		/*
 		 * The priority of a device waiting for controller
 		 * resources is that of the highest priority CCB
 		 * enqueued.
 		 */
 		retval =
 		    xpt_schedule_dev(&devq->send_queue,
 				     &dev->devq_entry,
 				     CAMQ_GET_PRIO(&dev->ccbq.queue));
 	} else {
 		retval = 0;
 	}
 	return (retval);
 }
 
 static __inline int
 device_is_queued(struct cam_ed *device)
 {
 	return (device->devq_entry.index != CAM_UNQUEUED_INDEX);
 }
 
 static void
 xpt_periph_init()
 {
 	make_dev(&xpt_cdevsw, 0, UID_ROOT, GID_OPERATOR, 0600, "xpt0");
 }
 
 static int
 xptopen(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 
 	/*
 	 * Only allow read-write access.
 	 */
 	if (((flags & FWRITE) == 0) || ((flags & FREAD) == 0))
 		return(EPERM);
 
 	/*
 	 * We don't allow nonblocking access.
 	 */
 	if ((flags & O_NONBLOCK) != 0) {
 		printf("%s: can't do nonblocking access\n", devtoname(dev));
 		return(ENODEV);
 	}
 
 	return(0);
 }
 
 static int
 xptclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
 
 	return(0);
 }
 
 /*
  * Don't automatically grab the xpt softc lock here even though this is going
  * through the xpt device.  The xpt device is really just a back door for
  * accessing other devices and SIMs, so the right thing to do is to grab
  * the appropriate SIM lock once the bus/SIM is located.
  */
 static int
 xptioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
 	int error;
 
 	if ((error = xptdoioctl(dev, cmd, addr, flag, td)) == ENOTTY) {
 		error = cam_compat_ioctl(dev, cmd, addr, flag, td, xptdoioctl);
 	}
 	return (error);
 }
 
 static int
 xptdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
 	int error;
 
 	error = 0;
 
 	switch(cmd) {
 	/*
 	 * For the transport layer CAMIOCOMMAND ioctl, we really only want
 	 * to accept CCB types that don't quite make sense to send through a
 	 * passthrough driver. XPT_PATH_INQ is an exception to this, as stated
 	 * in the CAM spec.
 	 */
 	case CAMIOCOMMAND: {
 		union ccb *ccb;
 		union ccb *inccb;
 		struct cam_eb *bus;
 
 		inccb = (union ccb *)addr;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		if (inccb->ccb_h.func_code == XPT_SCSI_IO)
 			inccb->csio.bio = NULL;
 #endif
 
 		if (inccb->ccb_h.flags & CAM_UNLOCKED)
 			return (EINVAL);
 
 		bus = xpt_find_bus(inccb->ccb_h.path_id);
 		if (bus == NULL)
 			return (EINVAL);
 
 		switch (inccb->ccb_h.func_code) {
 		case XPT_SCAN_BUS:
 		case XPT_RESET_BUS:
 			if (inccb->ccb_h.target_id != CAM_TARGET_WILDCARD ||
 			    inccb->ccb_h.target_lun != CAM_LUN_WILDCARD) {
 				xpt_release_bus(bus);
 				return (EINVAL);
 			}
 			break;
 		case XPT_SCAN_TGT:
 			if (inccb->ccb_h.target_id == CAM_TARGET_WILDCARD ||
 			    inccb->ccb_h.target_lun != CAM_LUN_WILDCARD) {
 				xpt_release_bus(bus);
 				return (EINVAL);
 			}
 			break;
 		default:
 			break;
 		}
 
 		switch(inccb->ccb_h.func_code) {
 		case XPT_SCAN_BUS:
 		case XPT_RESET_BUS:
 		case XPT_PATH_INQ:
 		case XPT_ENG_INQ:
 		case XPT_SCAN_LUN:
 		case XPT_SCAN_TGT:
 
 			ccb = xpt_alloc_ccb();
 
 			/*
 			 * Create a path using the bus, target, and lun the
 			 * user passed in.
 			 */
 			if (xpt_create_path(&ccb->ccb_h.path, NULL,
 					    inccb->ccb_h.path_id,
 					    inccb->ccb_h.target_id,
 					    inccb->ccb_h.target_lun) !=
 					    CAM_REQ_CMP){
 				error = EINVAL;
 				xpt_free_ccb(ccb);
 				break;
 			}
 			/* Ensure all of our fields are correct */
 			xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path,
 				      inccb->ccb_h.pinfo.priority);
 			xpt_merge_ccb(ccb, inccb);
 			xpt_path_lock(ccb->ccb_h.path);
 			cam_periph_runccb(ccb, NULL, 0, 0, NULL);
 			xpt_path_unlock(ccb->ccb_h.path);
 			bcopy(ccb, inccb, sizeof(union ccb));
 			xpt_free_path(ccb->ccb_h.path);
 			xpt_free_ccb(ccb);
 			break;
 
 		case XPT_DEBUG: {
 			union ccb ccb;
 
 			/*
 			 * This is an immediate CCB, so it's okay to
 			 * allocate it on the stack.
 			 */
 
 			/*
 			 * Create a path using the bus, target, and lun the
 			 * user passed in.
 			 */
 			if (xpt_create_path(&ccb.ccb_h.path, NULL,
 					    inccb->ccb_h.path_id,
 					    inccb->ccb_h.target_id,
 					    inccb->ccb_h.target_lun) !=
 					    CAM_REQ_CMP){
 				error = EINVAL;
 				break;
 			}
 			/* Ensure all of our fields are correct */
 			xpt_setup_ccb(&ccb.ccb_h, ccb.ccb_h.path,
 				      inccb->ccb_h.pinfo.priority);
 			xpt_merge_ccb(&ccb, inccb);
 			xpt_action(&ccb);
 			bcopy(&ccb, inccb, sizeof(union ccb));
 			xpt_free_path(ccb.ccb_h.path);
 			break;
 
 		}
 		case XPT_DEV_MATCH: {
 			struct cam_periph_map_info mapinfo;
 			struct cam_path *old_path;
 
 			/*
 			 * We can't deal with physical addresses for this
 			 * type of transaction.
 			 */
 			if ((inccb->ccb_h.flags & CAM_DATA_MASK) !=
 			    CAM_DATA_VADDR) {
 				error = EINVAL;
 				break;
 			}
 
 			/*
 			 * Save this in case the caller had it set to
 			 * something in particular.
 			 */
 			old_path = inccb->ccb_h.path;
 
 			/*
 			 * We really don't need a path for the matching
 			 * code.  The path is needed because of the
 			 * debugging statements in xpt_action().  They
 			 * assume that the CCB has a valid path.
 			 */
 			inccb->ccb_h.path = xpt_periph->path;
 
 			bzero(&mapinfo, sizeof(mapinfo));
 
 			/*
 			 * Map the pattern and match buffers into kernel
 			 * virtual address space.
 			 */
 			error = cam_periph_mapmem(inccb, &mapinfo, MAXPHYS);
 
 			if (error) {
 				inccb->ccb_h.path = old_path;
 				break;
 			}
 
 			/*
 			 * This is an immediate CCB, we can send it on directly.
 			 */
 			xpt_action(inccb);
 
 			/*
 			 * Map the buffers back into user space.
 			 */
 			cam_periph_unmapmem(inccb, &mapinfo);
 
 			inccb->ccb_h.path = old_path;
 
 			error = 0;
 			break;
 		}
 		default:
 			error = ENOTSUP;
 			break;
 		}
 		xpt_release_bus(bus);
 		break;
 	}
 	/*
 	 * This is the getpassthru ioctl. It takes a XPT_GDEVLIST ccb as input,
 	 * with the periphal driver name and unit name filled in.  The other
 	 * fields don't really matter as input.  The passthrough driver name
 	 * ("pass"), and unit number are passed back in the ccb.  The current
 	 * device generation number, and the index into the device peripheral
 	 * driver list, and the status are also passed back.  Note that
 	 * since we do everything in one pass, unlike the XPT_GDEVLIST ccb,
 	 * we never return a status of CAM_GDEVLIST_LIST_CHANGED.  It is
 	 * (or rather should be) impossible for the device peripheral driver
 	 * list to change since we look at the whole thing in one pass, and
 	 * we do it with lock protection.
 	 *
 	 */
 	case CAMGETPASSTHRU: {
 		union ccb *ccb;
 		struct cam_periph *periph;
 		struct periph_driver **p_drv;
 		char   *name;
 		u_int unit;
 		int base_periph_found;
 
 		ccb = (union ccb *)addr;
 		unit = ccb->cgdl.unit_number;
 		name = ccb->cgdl.periph_name;
 		base_periph_found = 0;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		if (ccb->ccb_h.func_code == XPT_SCSI_IO)
 			ccb->csio.bio = NULL;
 #endif
 
 		/*
 		 * Sanity check -- make sure we don't get a null peripheral
 		 * driver name.
 		 */
 		if (*ccb->cgdl.periph_name == '\0') {
 			error = EINVAL;
 			break;
 		}
 
 		/* Keep the list from changing while we traverse it */
 		xpt_lock_buses();
 
 		/* first find our driver in the list of drivers */
 		for (p_drv = periph_drivers; *p_drv != NULL; p_drv++)
 			if (strcmp((*p_drv)->driver_name, name) == 0)
 				break;
 
 		if (*p_drv == NULL) {
 			xpt_unlock_buses();
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			ccb->cgdl.status = CAM_GDEVLIST_ERROR;
 			*ccb->cgdl.periph_name = '\0';
 			ccb->cgdl.unit_number = 0;
 			error = ENOENT;
 			break;
 		}
 
 		/*
 		 * Run through every peripheral instance of this driver
 		 * and check to see whether it matches the unit passed
 		 * in by the user.  If it does, get out of the loops and
 		 * find the passthrough driver associated with that
 		 * peripheral driver.
 		 */
 		for (periph = TAILQ_FIRST(&(*p_drv)->units); periph != NULL;
 		     periph = TAILQ_NEXT(periph, unit_links)) {
 
 			if (periph->unit_number == unit)
 				break;
 		}
 		/*
 		 * If we found the peripheral driver that the user passed
 		 * in, go through all of the peripheral drivers for that
 		 * particular device and look for a passthrough driver.
 		 */
 		if (periph != NULL) {
 			struct cam_ed *device;
 			int i;
 
 			base_periph_found = 1;
 			device = periph->path->device;
 			for (i = 0, periph = SLIST_FIRST(&device->periphs);
 			     periph != NULL;
 			     periph = SLIST_NEXT(periph, periph_links), i++) {
 				/*
 				 * Check to see whether we have a
 				 * passthrough device or not.
 				 */
 				if (strcmp(periph->periph_name, "pass") == 0) {
 					/*
 					 * Fill in the getdevlist fields.
 					 */
 					strlcpy(ccb->cgdl.periph_name,
 					       periph->periph_name,
 					       sizeof(ccb->cgdl.periph_name));
 					ccb->cgdl.unit_number =
 						periph->unit_number;
 					if (SLIST_NEXT(periph, periph_links))
 						ccb->cgdl.status =
 							CAM_GDEVLIST_MORE_DEVS;
 					else
 						ccb->cgdl.status =
 						       CAM_GDEVLIST_LAST_DEVICE;
 					ccb->cgdl.generation =
 						device->generation;
 					ccb->cgdl.index = i;
 					/*
 					 * Fill in some CCB header fields
 					 * that the user may want.
 					 */
 					ccb->ccb_h.path_id =
 						periph->path->bus->path_id;
 					ccb->ccb_h.target_id =
 						periph->path->target->target_id;
 					ccb->ccb_h.target_lun =
 						periph->path->device->lun_id;
 					ccb->ccb_h.status = CAM_REQ_CMP;
 					break;
 				}
 			}
 		}
 
 		/*
 		 * If the periph is null here, one of two things has
 		 * happened.  The first possibility is that we couldn't
 		 * find the unit number of the particular peripheral driver
 		 * that the user is asking about.  e.g. the user asks for
 		 * the passthrough driver for "da11".  We find the list of
 		 * "da" peripherals all right, but there is no unit 11.
 		 * The other possibility is that we went through the list
 		 * of peripheral drivers attached to the device structure,
 		 * but didn't find one with the name "pass".  Either way,
 		 * we return ENOENT, since we couldn't find something.
 		 */
 		if (periph == NULL) {
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			ccb->cgdl.status = CAM_GDEVLIST_ERROR;
 			*ccb->cgdl.periph_name = '\0';
 			ccb->cgdl.unit_number = 0;
 			error = ENOENT;
 			/*
 			 * It is unfortunate that this is even necessary,
 			 * but there are many, many clueless users out there.
 			 * If this is true, the user is looking for the
 			 * passthrough driver, but doesn't have one in his
 			 * kernel.
 			 */
 			if (base_periph_found == 1) {
 				printf("xptioctl: pass driver is not in the "
 				       "kernel\n");
 				printf("xptioctl: put \"device pass\" in "
 				       "your kernel config file\n");
 			}
 		}
 		xpt_unlock_buses();
 		break;
 		}
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	return(error);
 }
 
 static int
 cam_module_event_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		if ((error = xpt_init(NULL)) != 0)
 			return (error);
 		break;
 	case MOD_UNLOAD:
 		return EBUSY;
 	default:
 		return EOPNOTSUPP;
 	}
 
 	return 0;
 }
 
 static struct xpt_proto *
 xpt_proto_find(cam_proto proto)
 {
 	struct xpt_proto **pp;
 
 	SET_FOREACH(pp, cam_xpt_proto_set) {
 		if ((*pp)->proto == proto)
 			return *pp;
 	}
 
 	return NULL;
 }
 
 static void
 xpt_rescan_done(struct cam_periph *periph, union ccb *done_ccb)
 {
 
 	if (done_ccb->ccb_h.ppriv_ptr1 == NULL) {
 		xpt_free_path(done_ccb->ccb_h.path);
 		xpt_free_ccb(done_ccb);
 	} else {
 		done_ccb->ccb_h.cbfcnp = done_ccb->ccb_h.ppriv_ptr1;
 		(*done_ccb->ccb_h.cbfcnp)(periph, done_ccb);
 	}
 	xpt_release_boot();
 }
 
 /* thread to handle bus rescans */
 static void
 xpt_scanner_thread(void *dummy)
 {
 	union ccb	*ccb;
 	struct cam_path	 path;
 
 	xpt_lock_buses();
 	for (;;) {
 		if (TAILQ_EMPTY(&xsoftc.ccb_scanq))
 			msleep(&xsoftc.ccb_scanq, &xsoftc.xpt_topo_lock, PRIBIO,
 			       "-", 0);
 		if ((ccb = (union ccb *)TAILQ_FIRST(&xsoftc.ccb_scanq)) != NULL) {
 			TAILQ_REMOVE(&xsoftc.ccb_scanq, &ccb->ccb_h, sim_links.tqe);
 			xpt_unlock_buses();
 
 			/*
 			 * Since lock can be dropped inside and path freed
 			 * by completion callback even before return here,
 			 * take our own path copy for reference.
 			 */
 			xpt_copy_path(&path, ccb->ccb_h.path);
 			xpt_path_lock(&path);
 			xpt_action(ccb);
 			xpt_path_unlock(&path);
 			xpt_release_path(&path);
 
 			xpt_lock_buses();
 		}
 	}
 }
 
 void
 xpt_rescan(union ccb *ccb)
 {
 	struct ccb_hdr *hdr;
 
 	/* Prepare request */
 	if (ccb->ccb_h.path->target->target_id == CAM_TARGET_WILDCARD &&
 	    ccb->ccb_h.path->device->lun_id == CAM_LUN_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
 	else if (ccb->ccb_h.path->target->target_id != CAM_TARGET_WILDCARD &&
 	    ccb->ccb_h.path->device->lun_id == CAM_LUN_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
 	else if (ccb->ccb_h.path->target->target_id != CAM_TARGET_WILDCARD &&
 	    ccb->ccb_h.path->device->lun_id != CAM_LUN_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_LUN;
 	else {
 		xpt_print(ccb->ccb_h.path, "illegal scan path\n");
 		xpt_free_path(ccb->ccb_h.path);
 		xpt_free_ccb(ccb);
 		return;
 	}
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_rescan: func %#x %s\n", ccb->ccb_h.func_code,
  		xpt_action_name(ccb->ccb_h.func_code)));
 
 	ccb->ccb_h.ppriv_ptr1 = ccb->ccb_h.cbfcnp;
 	ccb->ccb_h.cbfcnp = xpt_rescan_done;
 	xpt_setup_ccb(&ccb->ccb_h, ccb->ccb_h.path, CAM_PRIORITY_XPT);
 	/* Don't make duplicate entries for the same paths. */
 	xpt_lock_buses();
 	if (ccb->ccb_h.ppriv_ptr1 == NULL) {
 		TAILQ_FOREACH(hdr, &xsoftc.ccb_scanq, sim_links.tqe) {
 			if (xpt_path_comp(hdr->path, ccb->ccb_h.path) == 0) {
 				wakeup(&xsoftc.ccb_scanq);
 				xpt_unlock_buses();
 				xpt_print(ccb->ccb_h.path, "rescan already queued\n");
 				xpt_free_path(ccb->ccb_h.path);
 				xpt_free_ccb(ccb);
 				return;
 			}
 		}
 	}
 	TAILQ_INSERT_TAIL(&xsoftc.ccb_scanq, &ccb->ccb_h, sim_links.tqe);
 	xsoftc.buses_to_config++;
 	wakeup(&xsoftc.ccb_scanq);
 	xpt_unlock_buses();
 }
 
 /* Functions accessed by the peripheral drivers */
 static int
 xpt_init(void *dummy)
 {
 	struct cam_sim *xpt_sim;
 	struct cam_path *path;
 	struct cam_devq *devq;
 	cam_status status;
 	int error, i;
 
 	TAILQ_INIT(&xsoftc.xpt_busses);
 	TAILQ_INIT(&xsoftc.ccb_scanq);
 	STAILQ_INIT(&xsoftc.highpowerq);
 	xsoftc.num_highpower = CAM_MAX_HIGHPOWER;
 
 	mtx_init(&xsoftc.xpt_lock, "XPT lock", NULL, MTX_DEF);
 	mtx_init(&xsoftc.xpt_highpower_lock, "XPT highpower lock", NULL, MTX_DEF);
 	xsoftc.xpt_taskq = taskqueue_create("CAM XPT task", M_WAITOK,
 	    taskqueue_thread_enqueue, /*context*/&xsoftc.xpt_taskq);
 
 #ifdef CAM_BOOT_DELAY
 	/*
 	 * Override this value at compile time to assist our users
 	 * who don't use loader to boot a kernel.
 	 */
 	xsoftc.boot_delay = CAM_BOOT_DELAY;
 #endif
 	/*
 	 * The xpt layer is, itself, the equivalent of a SIM.
 	 * Allow 16 ccbs in the ccb pool for it.  This should
 	 * give decent parallelism when we probe buses and
 	 * perform other XPT functions.
 	 */
 	devq = cam_simq_alloc(16);
 	xpt_sim = cam_sim_alloc(xptaction,
 				xptpoll,
 				"xpt",
 				/*softc*/NULL,
 				/*unit*/0,
 				/*mtx*/&xsoftc.xpt_lock,
 				/*max_dev_transactions*/0,
 				/*max_tagged_dev_transactions*/0,
 				devq);
 	if (xpt_sim == NULL)
 		return (ENOMEM);
 
 	mtx_lock(&xsoftc.xpt_lock);
 	if ((status = xpt_bus_register(xpt_sim, NULL, 0)) != CAM_SUCCESS) {
 		mtx_unlock(&xsoftc.xpt_lock);
 		printf("xpt_init: xpt_bus_register failed with status %#x,"
 		       " failing attach\n", status);
 		return (EINVAL);
 	}
 	mtx_unlock(&xsoftc.xpt_lock);
 
 	/*
 	 * Looking at the XPT from the SIM layer, the XPT is
 	 * the equivalent of a peripheral driver.  Allocate
 	 * a peripheral driver entry for us.
 	 */
 	if ((status = xpt_create_path(&path, NULL, CAM_XPT_PATH_ID,
 				      CAM_TARGET_WILDCARD,
 				      CAM_LUN_WILDCARD)) != CAM_REQ_CMP) {
 		printf("xpt_init: xpt_create_path failed with status %#x,"
 		       " failing attach\n", status);
 		return (EINVAL);
 	}
 	xpt_path_lock(path);
 	cam_periph_alloc(xptregister, NULL, NULL, NULL, "xpt", CAM_PERIPH_BIO,
 			 path, NULL, 0, xpt_sim);
 	xpt_path_unlock(path);
 	xpt_free_path(path);
 
 	if (cam_num_doneqs < 1)
 		cam_num_doneqs = 1 + mp_ncpus / 6;
 	else if (cam_num_doneqs > MAXCPU)
 		cam_num_doneqs = MAXCPU;
 	for (i = 0; i < cam_num_doneqs; i++) {
 		mtx_init(&cam_doneqs[i].cam_doneq_mtx, "CAM doneq", NULL,
 		    MTX_DEF);
 		STAILQ_INIT(&cam_doneqs[i].cam_doneq);
 		error = kproc_kthread_add(xpt_done_td, &cam_doneqs[i],
 		    &cam_proc, NULL, 0, 0, "cam", "doneq%d", i);
 		if (error != 0) {
 			cam_num_doneqs = i;
 			break;
 		}
 	}
 	if (cam_num_doneqs < 1) {
 		printf("xpt_init: Cannot init completion queues "
 		       "- failing attach\n");
 		return (ENOMEM);
 	}
 	/*
 	 * Register a callback for when interrupts are enabled.
 	 */
 	xsoftc.xpt_config_hook =
 	    (struct intr_config_hook *)malloc(sizeof(struct intr_config_hook),
 					      M_CAMXPT, M_NOWAIT | M_ZERO);
 	if (xsoftc.xpt_config_hook == NULL) {
 		printf("xpt_init: Cannot malloc config hook "
 		       "- failing attach\n");
 		return (ENOMEM);
 	}
 	xsoftc.xpt_config_hook->ich_func = xpt_config;
 	if (config_intrhook_establish(xsoftc.xpt_config_hook) != 0) {
 		free (xsoftc.xpt_config_hook, M_CAMXPT);
 		printf("xpt_init: config_intrhook_establish failed "
 		       "- failing attach\n");
 	}
 
 	return (0);
 }
 
 static cam_status
 xptregister(struct cam_periph *periph, void *arg)
 {
 	struct cam_sim *xpt_sim;
 
 	if (periph == NULL) {
 		printf("xptregister: periph was NULL!!\n");
 		return(CAM_REQ_CMP_ERR);
 	}
 
 	xpt_sim = (struct cam_sim *)arg;
 	xpt_sim->softc = periph;
 	xpt_periph = periph;
 	periph->softc = NULL;
 
 	return(CAM_REQ_CMP);
 }
 
 int32_t
 xpt_add_periph(struct cam_periph *periph)
 {
 	struct cam_ed *device;
 	int32_t	 status;
 
 	TASK_INIT(&periph->periph_run_task, 0, xpt_run_allocq_task, periph);
 	device = periph->path->device;
 	status = CAM_REQ_CMP;
 	if (device != NULL) {
 		mtx_lock(&device->target->bus->eb_mtx);
 		device->generation++;
 		SLIST_INSERT_HEAD(&device->periphs, periph, periph_links);
 		mtx_unlock(&device->target->bus->eb_mtx);
 		atomic_add_32(&xsoftc.xpt_generation, 1);
 	}
 
 	return (status);
 }
 
 void
 xpt_remove_periph(struct cam_periph *periph)
 {
 	struct cam_ed *device;
 
 	device = periph->path->device;
 	if (device != NULL) {
 		mtx_lock(&device->target->bus->eb_mtx);
 		device->generation++;
 		SLIST_REMOVE(&device->periphs, periph, cam_periph, periph_links);
 		mtx_unlock(&device->target->bus->eb_mtx);
 		atomic_add_32(&xsoftc.xpt_generation, 1);
 	}
 }
 
 
 void
 xpt_announce_periph(struct cam_periph *periph, char *announce_string)
 {
 	struct	cam_path *path = periph->path;
 	struct  xpt_proto *proto;
 
 	cam_periph_assert(periph, MA_OWNED);
 	periph->flags |= CAM_PERIPH_ANNOUNCED;
 
 	printf("%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
 	       periph->periph_name, periph->unit_number,
 	       path->bus->sim->sim_name,
 	       path->bus->sim->unit_number,
 	       path->bus->sim->bus_id,
 	       path->bus->path_id,
 	       path->target->target_id,
 	       (uintmax_t)path->device->lun_id);
 	printf("%s%d: ", periph->periph_name, periph->unit_number);
 	proto = xpt_proto_find(path->device->protocol);
 	if (proto)
 		proto->ops->announce(path->device);
 	else
 		printf("%s%d: Unknown protocol device %d\n",
 		    periph->periph_name, periph->unit_number,
 		    path->device->protocol);
 	if (path->device->serial_num_len > 0) {
 		/* Don't wrap the screen  - print only the first 60 chars */
 		printf("%s%d: Serial Number %.60s\n", periph->periph_name,
 		       periph->unit_number, path->device->serial_num);
 	}
 	/* Announce transport details. */
 	path->bus->xport->ops->announce(periph);
 	/* Announce command queueing. */
 	if (path->device->inq_flags & SID_CmdQue
 	 || path->device->flags & CAM_DEV_TAG_AFTER_COUNT) {
 		printf("%s%d: Command Queueing enabled\n",
 		       periph->periph_name, periph->unit_number);
 	}
 	/* Announce caller's details if they've passed in. */
 	if (announce_string != NULL)
 		printf("%s%d: %s\n", periph->periph_name,
 		       periph->unit_number, announce_string);
 }
 
 void
 xpt_announce_periph_sbuf(struct cam_periph *periph, struct sbuf *sb,
     char *announce_string)
 {
 	struct	cam_path *path = periph->path;
 	struct  xpt_proto *proto;
 
 	cam_periph_assert(periph, MA_OWNED);
 	periph->flags |= CAM_PERIPH_ANNOUNCED;
 
 	/* Fall back to the non-sbuf method if necessary */
 	if (xsoftc.announce_nosbuf != 0) {
 		xpt_announce_periph(periph, announce_string);
 		return;
 	}
 	proto = xpt_proto_find(path->device->protocol);
 	if (((proto != NULL) && (proto->ops->announce_sbuf == NULL)) ||
 	    (path->bus->xport->ops->announce_sbuf == NULL)) {
 		xpt_announce_periph(periph, announce_string);
 		return;
 	}
 
 	sbuf_printf(sb, "%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
 	    periph->periph_name, periph->unit_number,
 	    path->bus->sim->sim_name,
 	    path->bus->sim->unit_number,
 	    path->bus->sim->bus_id,
 	    path->bus->path_id,
 	    path->target->target_id,
 	    (uintmax_t)path->device->lun_id);
 	sbuf_printf(sb, "%s%d: ", periph->periph_name, periph->unit_number);
 
 	if (proto)
 		proto->ops->announce_sbuf(path->device, sb);
 	else
 		sbuf_printf(sb, "%s%d: Unknown protocol device %d\n",
 		    periph->periph_name, periph->unit_number,
 		    path->device->protocol);
 	if (path->device->serial_num_len > 0) {
 		/* Don't wrap the screen  - print only the first 60 chars */
 		sbuf_printf(sb, "%s%d: Serial Number %.60s\n",
 		    periph->periph_name, periph->unit_number,
 		    path->device->serial_num);
 	}
 	/* Announce transport details. */
 	path->bus->xport->ops->announce_sbuf(periph, sb);
 	/* Announce command queueing. */
 	if (path->device->inq_flags & SID_CmdQue
 	 || path->device->flags & CAM_DEV_TAG_AFTER_COUNT) {
 		sbuf_printf(sb, "%s%d: Command Queueing enabled\n",
 		    periph->periph_name, periph->unit_number);
 	}
 	/* Announce caller's details if they've passed in. */
 	if (announce_string != NULL)
 		sbuf_printf(sb, "%s%d: %s\n", periph->periph_name,
 		    periph->unit_number, announce_string);
 }
 
 void
 xpt_announce_quirks(struct cam_periph *periph, int quirks, char *bit_string)
 {
 	if (quirks != 0) {
 		printf("%s%d: quirks=0x%b\n", periph->periph_name,
 		    periph->unit_number, quirks, bit_string);
 	}
 }
 
 void
 xpt_announce_quirks_sbuf(struct cam_periph *periph, struct sbuf *sb,
 			 int quirks, char *bit_string)
 {
 	if (xsoftc.announce_nosbuf != 0) {
 		xpt_announce_quirks(periph, quirks, bit_string);
 		return;
 	}
 
 	if (quirks != 0) {
 		sbuf_printf(sb, "%s%d: quirks=0x%b\n", periph->periph_name,
 		    periph->unit_number, quirks, bit_string);
 	}
 }
 
 void
 xpt_denounce_periph(struct cam_periph *periph)
 {
 	struct	cam_path *path = periph->path;
 	struct  xpt_proto *proto;
 
 	cam_periph_assert(periph, MA_OWNED);
 	printf("%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
 	       periph->periph_name, periph->unit_number,
 	       path->bus->sim->sim_name,
 	       path->bus->sim->unit_number,
 	       path->bus->sim->bus_id,
 	       path->bus->path_id,
 	       path->target->target_id,
 	       (uintmax_t)path->device->lun_id);
 	printf("%s%d: ", periph->periph_name, periph->unit_number);
 	proto = xpt_proto_find(path->device->protocol);
 	if (proto)
 		proto->ops->denounce(path->device);
 	else
 		printf("%s%d: Unknown protocol device %d\n",
 		    periph->periph_name, periph->unit_number,
 		    path->device->protocol);
 	if (path->device->serial_num_len > 0)
 		printf(" s/n %.60s", path->device->serial_num);
 	printf(" detached\n");
 }
 
 void
 xpt_denounce_periph_sbuf(struct cam_periph *periph, struct sbuf *sb)
 {
 	struct cam_path *path = periph->path;
 	struct xpt_proto *proto;
 
 	cam_periph_assert(periph, MA_OWNED);
 
 	/* Fall back to the non-sbuf method if necessary */
 	if (xsoftc.announce_nosbuf != 0) {
 		xpt_denounce_periph(periph);
 		return;
 	}
 	proto = xpt_proto_find(path->device->protocol);
 	if ((proto != NULL) && (proto->ops->denounce_sbuf == NULL)) {
 		xpt_denounce_periph(periph);
 		return;
 	}
 
 	sbuf_printf(sb, "%s%d at %s%d bus %d scbus%d target %d lun %jx\n",
 	    periph->periph_name, periph->unit_number,
 	    path->bus->sim->sim_name,
 	    path->bus->sim->unit_number,
 	    path->bus->sim->bus_id,
 	    path->bus->path_id,
 	    path->target->target_id,
 	    (uintmax_t)path->device->lun_id);
 	sbuf_printf(sb, "%s%d: ", periph->periph_name, periph->unit_number);
 
 	if (proto)
 		proto->ops->denounce_sbuf(path->device, sb);
 	else
 		sbuf_printf(sb, "%s%d: Unknown protocol device %d\n",
 		    periph->periph_name, periph->unit_number,
 		    path->device->protocol);
 	if (path->device->serial_num_len > 0)
 		sbuf_printf(sb, " s/n %.60s", path->device->serial_num);
 	sbuf_printf(sb, " detached\n");
 }
 
 int
 xpt_getattr(char *buf, size_t len, const char *attr, struct cam_path *path)
 {
 	int ret = -1, l, o;
 	struct ccb_dev_advinfo cdai;
 	struct scsi_vpd_id_descriptor *idd;
 
 	xpt_path_assert(path, MA_OWNED);
 
 	memset(&cdai, 0, sizeof(cdai));
 	xpt_setup_ccb(&cdai.ccb_h, path, CAM_PRIORITY_NORMAL);
 	cdai.ccb_h.func_code = XPT_DEV_ADVINFO;
 	cdai.flags = CDAI_FLAG_NONE;
 	cdai.bufsiz = len;
 
 	if (!strcmp(attr, "GEOM::ident"))
 		cdai.buftype = CDAI_TYPE_SERIAL_NUM;
 	else if (!strcmp(attr, "GEOM::physpath"))
 		cdai.buftype = CDAI_TYPE_PHYS_PATH;
 	else if (strcmp(attr, "GEOM::lunid") == 0 ||
 		 strcmp(attr, "GEOM::lunname") == 0) {
 		cdai.buftype = CDAI_TYPE_SCSI_DEVID;
 		cdai.bufsiz = CAM_SCSI_DEVID_MAXLEN;
 	} else
 		goto out;
 
 	cdai.buf = malloc(cdai.bufsiz, M_CAMXPT, M_NOWAIT|M_ZERO);
 	if (cdai.buf == NULL) {
 		ret = ENOMEM;
 		goto out;
 	}
 	xpt_action((union ccb *)&cdai); /* can only be synchronous */
 	if ((cdai.ccb_h.status & CAM_DEV_QFRZN) != 0)
 		cam_release_devq(cdai.ccb_h.path, 0, 0, 0, FALSE);
 	if (cdai.provsiz == 0)
 		goto out;
 	if (cdai.buftype == CDAI_TYPE_SCSI_DEVID) {
 		if (strcmp(attr, "GEOM::lunid") == 0) {
 			idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 			    cdai.provsiz, scsi_devid_is_lun_naa);
 			if (idd == NULL)
 				idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 				    cdai.provsiz, scsi_devid_is_lun_eui64);
 			if (idd == NULL)
 				idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 				    cdai.provsiz, scsi_devid_is_lun_uuid);
 			if (idd == NULL)
 				idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 				    cdai.provsiz, scsi_devid_is_lun_md5);
 		} else
 			idd = NULL;
 		if (idd == NULL)
 			idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 			    cdai.provsiz, scsi_devid_is_lun_t10);
 		if (idd == NULL)
 			idd = scsi_get_devid((struct scsi_vpd_device_id *)cdai.buf,
 			    cdai.provsiz, scsi_devid_is_lun_name);
 		if (idd == NULL)
 			goto out;
 		ret = 0;
 		if ((idd->proto_codeset & SVPD_ID_CODESET_MASK) == SVPD_ID_CODESET_ASCII) {
 			if (idd->length < len) {
 				for (l = 0; l < idd->length; l++)
 					buf[l] = idd->identifier[l] ?
 					    idd->identifier[l] : ' ';
 				buf[l] = 0;
 			} else
 				ret = EFAULT;
 		} else if ((idd->proto_codeset & SVPD_ID_CODESET_MASK) == SVPD_ID_CODESET_UTF8) {
 			l = strnlen(idd->identifier, idd->length);
 			if (l < len) {
 				bcopy(idd->identifier, buf, l);
 				buf[l] = 0;
 			} else
 				ret = EFAULT;
 		} else if ((idd->id_type & SVPD_ID_TYPE_MASK) == SVPD_ID_TYPE_UUID
 		    && idd->identifier[0] == 0x10) {
 			if ((idd->length - 2) * 2 + 4 < len) {
 				for (l = 2, o = 0; l < idd->length; l++) {
 					if (l == 6 || l == 8 || l == 10 || l == 12)
 					    o += sprintf(buf + o, "-");
 					o += sprintf(buf + o, "%02x",
 					    idd->identifier[l]);
 				}
 			} else
 				ret = EFAULT;
 		} else {
 			if (idd->length * 2 < len) {
 				for (l = 0; l < idd->length; l++)
 					sprintf(buf + l * 2, "%02x",
 					    idd->identifier[l]);
 			} else
 				ret = EFAULT;
 		}
 	} else {
 		ret = 0;
 		if (strlcpy(buf, cdai.buf, len) >= len)
 			ret = EFAULT;
 	}
 
 out:
 	if (cdai.buf != NULL)
 		free(cdai.buf, M_CAMXPT);
 	return ret;
 }
 
 static dev_match_ret
 xptbusmatch(struct dev_match_pattern *patterns, u_int num_patterns,
 	    struct cam_eb *bus)
 {
 	dev_match_ret retval;
 	u_int i;
 
 	retval = DM_RET_NONE;
 
 	/*
 	 * If we aren't given something to match against, that's an error.
 	 */
 	if (bus == NULL)
 		return(DM_RET_ERROR);
 
 	/*
 	 * If there are no match entries, then this bus matches no
 	 * matter what.
 	 */
 	if ((patterns == NULL) || (num_patterns == 0))
 		return(DM_RET_DESCEND | DM_RET_COPY);
 
 	for (i = 0; i < num_patterns; i++) {
 		struct bus_match_pattern *cur_pattern;
 
 		/*
 		 * If the pattern in question isn't for a bus node, we
 		 * aren't interested.  However, we do indicate to the
 		 * calling routine that we should continue descending the
 		 * tree, since the user wants to match against lower-level
 		 * EDT elements.
 		 */
 		if (patterns[i].type != DEV_MATCH_BUS) {
 			if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
 				retval |= DM_RET_DESCEND;
 			continue;
 		}
 
 		cur_pattern = &patterns[i].pattern.bus_pattern;
 
 		/*
 		 * If they want to match any bus node, we give them any
 		 * device node.
 		 */
 		if (cur_pattern->flags == BUS_MATCH_ANY) {
 			/* set the copy flag */
 			retval |= DM_RET_COPY;
 
 			/*
 			 * If we've already decided on an action, go ahead
 			 * and return.
 			 */
 			if ((retval & DM_RET_ACTION_MASK) != DM_RET_NONE)
 				return(retval);
 		}
 
 		/*
 		 * Not sure why someone would do this...
 		 */
 		if (cur_pattern->flags == BUS_MATCH_NONE)
 			continue;
 
 		if (((cur_pattern->flags & BUS_MATCH_PATH) != 0)
 		 && (cur_pattern->path_id != bus->path_id))
 			continue;
 
 		if (((cur_pattern->flags & BUS_MATCH_BUS_ID) != 0)
 		 && (cur_pattern->bus_id != bus->sim->bus_id))
 			continue;
 
 		if (((cur_pattern->flags & BUS_MATCH_UNIT) != 0)
 		 && (cur_pattern->unit_number != bus->sim->unit_number))
 			continue;
 
 		if (((cur_pattern->flags & BUS_MATCH_NAME) != 0)
 		 && (strncmp(cur_pattern->dev_name, bus->sim->sim_name,
 			     DEV_IDLEN) != 0))
 			continue;
 
 		/*
 		 * If we get to this point, the user definitely wants
 		 * information on this bus.  So tell the caller to copy the
 		 * data out.
 		 */
 		retval |= DM_RET_COPY;
 
 		/*
 		 * If the return action has been set to descend, then we
 		 * know that we've already seen a non-bus matching
 		 * expression, therefore we need to further descend the tree.
 		 * This won't change by continuing around the loop, so we
 		 * go ahead and return.  If we haven't seen a non-bus
 		 * matching expression, we keep going around the loop until
 		 * we exhaust the matching expressions.  We'll set the stop
 		 * flag once we fall out of the loop.
 		 */
 		if ((retval & DM_RET_ACTION_MASK) == DM_RET_DESCEND)
 			return(retval);
 	}
 
 	/*
 	 * If the return action hasn't been set to descend yet, that means
 	 * we haven't seen anything other than bus matching patterns.  So
 	 * tell the caller to stop descending the tree -- the user doesn't
 	 * want to match against lower level tree elements.
 	 */
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
 		retval |= DM_RET_STOP;
 
 	return(retval);
 }
 
 static dev_match_ret
 xptdevicematch(struct dev_match_pattern *patterns, u_int num_patterns,
 	       struct cam_ed *device)
 {
 	dev_match_ret retval;
 	u_int i;
 
 	retval = DM_RET_NONE;
 
 	/*
 	 * If we aren't given something to match against, that's an error.
 	 */
 	if (device == NULL)
 		return(DM_RET_ERROR);
 
 	/*
 	 * If there are no match entries, then this device matches no
 	 * matter what.
 	 */
 	if ((patterns == NULL) || (num_patterns == 0))
 		return(DM_RET_DESCEND | DM_RET_COPY);
 
 	for (i = 0; i < num_patterns; i++) {
 		struct device_match_pattern *cur_pattern;
 		struct scsi_vpd_device_id *device_id_page;
 
 		/*
 		 * If the pattern in question isn't for a device node, we
 		 * aren't interested.
 		 */
 		if (patterns[i].type != DEV_MATCH_DEVICE) {
 			if ((patterns[i].type == DEV_MATCH_PERIPH)
 			 && ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE))
 				retval |= DM_RET_DESCEND;
 			continue;
 		}
 
 		cur_pattern = &patterns[i].pattern.device_pattern;
 
 		/* Error out if mutually exclusive options are specified. */
 		if ((cur_pattern->flags & (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID))
 		 == (DEV_MATCH_INQUIRY|DEV_MATCH_DEVID))
 			return(DM_RET_ERROR);
 
 		/*
 		 * If they want to match any device node, we give them any
 		 * device node.
 		 */
 		if (cur_pattern->flags == DEV_MATCH_ANY)
 			goto copy_dev_node;
 
 		/*
 		 * Not sure why someone would do this...
 		 */
 		if (cur_pattern->flags == DEV_MATCH_NONE)
 			continue;
 
 		if (((cur_pattern->flags & DEV_MATCH_PATH) != 0)
 		 && (cur_pattern->path_id != device->target->bus->path_id))
 			continue;
 
 		if (((cur_pattern->flags & DEV_MATCH_TARGET) != 0)
 		 && (cur_pattern->target_id != device->target->target_id))
 			continue;
 
 		if (((cur_pattern->flags & DEV_MATCH_LUN) != 0)
 		 && (cur_pattern->target_lun != device->lun_id))
 			continue;
 
 		if (((cur_pattern->flags & DEV_MATCH_INQUIRY) != 0)
 		 && (cam_quirkmatch((caddr_t)&device->inq_data,
 				    (caddr_t)&cur_pattern->data.inq_pat,
 				    1, sizeof(cur_pattern->data.inq_pat),
 				    scsi_static_inquiry_match) == NULL))
 			continue;
 
 		device_id_page = (struct scsi_vpd_device_id *)device->device_id;
 		if (((cur_pattern->flags & DEV_MATCH_DEVID) != 0)
 		 && (device->device_id_len < SVPD_DEVICE_ID_HDR_LEN
 		  || scsi_devid_match((uint8_t *)device_id_page->desc_list,
 				      device->device_id_len
 				    - SVPD_DEVICE_ID_HDR_LEN,
 				      cur_pattern->data.devid_pat.id,
 				      cur_pattern->data.devid_pat.id_len) != 0))
 			continue;
 
 copy_dev_node:
 		/*
 		 * If we get to this point, the user definitely wants
 		 * information on this device.  So tell the caller to copy
 		 * the data out.
 		 */
 		retval |= DM_RET_COPY;
 
 		/*
 		 * If the return action has been set to descend, then we
 		 * know that we've already seen a peripheral matching
 		 * expression, therefore we need to further descend the tree.
 		 * This won't change by continuing around the loop, so we
 		 * go ahead and return.  If we haven't seen a peripheral
 		 * matching expression, we keep going around the loop until
 		 * we exhaust the matching expressions.  We'll set the stop
 		 * flag once we fall out of the loop.
 		 */
 		if ((retval & DM_RET_ACTION_MASK) == DM_RET_DESCEND)
 			return(retval);
 	}
 
 	/*
 	 * If the return action hasn't been set to descend yet, that means
 	 * we haven't seen any peripheral matching patterns.  So tell the
 	 * caller to stop descending the tree -- the user doesn't want to
 	 * match against lower level tree elements.
 	 */
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_NONE)
 		retval |= DM_RET_STOP;
 
 	return(retval);
 }
 
 /*
  * Match a single peripheral against any number of match patterns.
  */
 static dev_match_ret
 xptperiphmatch(struct dev_match_pattern *patterns, u_int num_patterns,
 	       struct cam_periph *periph)
 {
 	dev_match_ret retval;
 	u_int i;
 
 	/*
 	 * If we aren't given something to match against, that's an error.
 	 */
 	if (periph == NULL)
 		return(DM_RET_ERROR);
 
 	/*
 	 * If there are no match entries, then this peripheral matches no
 	 * matter what.
 	 */
 	if ((patterns == NULL) || (num_patterns == 0))
 		return(DM_RET_STOP | DM_RET_COPY);
 
 	/*
 	 * There aren't any nodes below a peripheral node, so there's no
 	 * reason to descend the tree any further.
 	 */
 	retval = DM_RET_STOP;
 
 	for (i = 0; i < num_patterns; i++) {
 		struct periph_match_pattern *cur_pattern;
 
 		/*
 		 * If the pattern in question isn't for a peripheral, we
 		 * aren't interested.
 		 */
 		if (patterns[i].type != DEV_MATCH_PERIPH)
 			continue;
 
 		cur_pattern = &patterns[i].pattern.periph_pattern;
 
 		/*
 		 * If they want to match on anything, then we will do so.
 		 */
 		if (cur_pattern->flags == PERIPH_MATCH_ANY) {
 			/* set the copy flag */
 			retval |= DM_RET_COPY;
 
 			/*
 			 * We've already set the return action to stop,
 			 * since there are no nodes below peripherals in
 			 * the tree.
 			 */
 			return(retval);
 		}
 
 		/*
 		 * Not sure why someone would do this...
 		 */
 		if (cur_pattern->flags == PERIPH_MATCH_NONE)
 			continue;
 
 		if (((cur_pattern->flags & PERIPH_MATCH_PATH) != 0)
 		 && (cur_pattern->path_id != periph->path->bus->path_id))
 			continue;
 
 		/*
 		 * For the target and lun id's, we have to make sure the
 		 * target and lun pointers aren't NULL.  The xpt peripheral
 		 * has a wildcard target and device.
 		 */
 		if (((cur_pattern->flags & PERIPH_MATCH_TARGET) != 0)
 		 && ((periph->path->target == NULL)
 		 ||(cur_pattern->target_id != periph->path->target->target_id)))
 			continue;
 
 		if (((cur_pattern->flags & PERIPH_MATCH_LUN) != 0)
 		 && ((periph->path->device == NULL)
 		 || (cur_pattern->target_lun != periph->path->device->lun_id)))
 			continue;
 
 		if (((cur_pattern->flags & PERIPH_MATCH_UNIT) != 0)
 		 && (cur_pattern->unit_number != periph->unit_number))
 			continue;
 
 		if (((cur_pattern->flags & PERIPH_MATCH_NAME) != 0)
 		 && (strncmp(cur_pattern->periph_name, periph->periph_name,
 			     DEV_IDLEN) != 0))
 			continue;
 
 		/*
 		 * If we get to this point, the user definitely wants
 		 * information on this peripheral.  So tell the caller to
 		 * copy the data out.
 		 */
 		retval |= DM_RET_COPY;
 
 		/*
 		 * The return action has already been set to stop, since
 		 * peripherals don't have any nodes below them in the EDT.
 		 */
 		return(retval);
 	}
 
 	/*
 	 * If we get to this point, the peripheral that was passed in
 	 * doesn't match any of the patterns.
 	 */
 	return(retval);
 }
 
 static int
 xptedtbusfunc(struct cam_eb *bus, void *arg)
 {
 	struct ccb_dev_match *cdm;
 	struct cam_et *target;
 	dev_match_ret retval;
 
 	cdm = (struct ccb_dev_match *)arg;
 
 	/*
 	 * If our position is for something deeper in the tree, that means
 	 * that we've already seen this node.  So, we keep going down.
 	 */
 	if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
 	 && (cdm->pos.cookie.bus == bus)
 	 && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
 	 && (cdm->pos.cookie.target != NULL))
 		retval = DM_RET_DESCEND;
 	else
 		retval = xptbusmatch(cdm->patterns, cdm->num_patterns, bus);
 
 	/*
 	 * If we got an error, bail out of the search.
 	 */
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
 		cdm->status = CAM_DEV_MATCH_ERROR;
 		return(0);
 	}
 
 	/*
 	 * If the copy flag is set, copy this bus out.
 	 */
 	if (retval & DM_RET_COPY) {
 		int spaceleft, j;
 
 		spaceleft = cdm->match_buf_len - (cdm->num_matches *
 			sizeof(struct dev_match_result));
 
 		/*
 		 * If we don't have enough space to put in another
 		 * match result, save our position and tell the
 		 * user there are more devices to check.
 		 */
 		if (spaceleft < sizeof(struct dev_match_result)) {
 			bzero(&cdm->pos, sizeof(cdm->pos));
 			cdm->pos.position_type =
 				CAM_DEV_POS_EDT | CAM_DEV_POS_BUS;
 
 			cdm->pos.cookie.bus = bus;
 			cdm->pos.generations[CAM_BUS_GENERATION]=
 				xsoftc.bus_generation;
 			cdm->status = CAM_DEV_MATCH_MORE;
 			return(0);
 		}
 		j = cdm->num_matches;
 		cdm->num_matches++;
 		cdm->matches[j].type = DEV_MATCH_BUS;
 		cdm->matches[j].result.bus_result.path_id = bus->path_id;
 		cdm->matches[j].result.bus_result.bus_id = bus->sim->bus_id;
 		cdm->matches[j].result.bus_result.unit_number =
 			bus->sim->unit_number;
 		strlcpy(cdm->matches[j].result.bus_result.dev_name,
 			bus->sim->sim_name,
 			sizeof(cdm->matches[j].result.bus_result.dev_name));
 	}
 
 	/*
 	 * If the user is only interested in buses, there's no
 	 * reason to descend to the next level in the tree.
 	 */
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_STOP)
 		return(1);
 
 	/*
 	 * If there is a target generation recorded, check it to
 	 * make sure the target list hasn't changed.
 	 */
 	mtx_lock(&bus->eb_mtx);
 	if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
 	 && (cdm->pos.cookie.bus == bus)
 	 && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
 	 && (cdm->pos.cookie.target != NULL)) {
 		if ((cdm->pos.generations[CAM_TARGET_GENERATION] !=
 		    bus->generation)) {
 			mtx_unlock(&bus->eb_mtx);
 			cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
 			return (0);
 		}
 		target = (struct cam_et *)cdm->pos.cookie.target;
 		target->refcount++;
 	} else
 		target = NULL;
 	mtx_unlock(&bus->eb_mtx);
 
 	return (xpttargettraverse(bus, target, xptedttargetfunc, arg));
 }
 
 static int
 xptedttargetfunc(struct cam_et *target, void *arg)
 {
 	struct ccb_dev_match *cdm;
 	struct cam_eb *bus;
 	struct cam_ed *device;
 
 	cdm = (struct ccb_dev_match *)arg;
 	bus = target->bus;
 
 	/*
 	 * If there is a device list generation recorded, check it to
 	 * make sure the device list hasn't changed.
 	 */
 	mtx_lock(&bus->eb_mtx);
 	if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
 	 && (cdm->pos.cookie.bus == bus)
 	 && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
 	 && (cdm->pos.cookie.target == target)
 	 && (cdm->pos.position_type & CAM_DEV_POS_DEVICE)
 	 && (cdm->pos.cookie.device != NULL)) {
 		if (cdm->pos.generations[CAM_DEV_GENERATION] !=
 		    target->generation) {
 			mtx_unlock(&bus->eb_mtx);
 			cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
 			return(0);
 		}
 		device = (struct cam_ed *)cdm->pos.cookie.device;
 		device->refcount++;
 	} else
 		device = NULL;
 	mtx_unlock(&bus->eb_mtx);
 
 	return (xptdevicetraverse(target, device, xptedtdevicefunc, arg));
 }
 
 static int
 xptedtdevicefunc(struct cam_ed *device, void *arg)
 {
 	struct cam_eb *bus;
 	struct cam_periph *periph;
 	struct ccb_dev_match *cdm;
 	dev_match_ret retval;
 
 	cdm = (struct ccb_dev_match *)arg;
 	bus = device->target->bus;
 
 	/*
 	 * If our position is for something deeper in the tree, that means
 	 * that we've already seen this node.  So, we keep going down.
 	 */
 	if ((cdm->pos.position_type & CAM_DEV_POS_DEVICE)
 	 && (cdm->pos.cookie.device == device)
 	 && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
 	 && (cdm->pos.cookie.periph != NULL))
 		retval = DM_RET_DESCEND;
 	else
 		retval = xptdevicematch(cdm->patterns, cdm->num_patterns,
 					device);
 
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
 		cdm->status = CAM_DEV_MATCH_ERROR;
 		return(0);
 	}
 
 	/*
 	 * If the copy flag is set, copy this device out.
 	 */
 	if (retval & DM_RET_COPY) {
 		int spaceleft, j;
 
 		spaceleft = cdm->match_buf_len - (cdm->num_matches *
 			sizeof(struct dev_match_result));
 
 		/*
 		 * If we don't have enough space to put in another
 		 * match result, save our position and tell the
 		 * user there are more devices to check.
 		 */
 		if (spaceleft < sizeof(struct dev_match_result)) {
 			bzero(&cdm->pos, sizeof(cdm->pos));
 			cdm->pos.position_type =
 				CAM_DEV_POS_EDT | CAM_DEV_POS_BUS |
 				CAM_DEV_POS_TARGET | CAM_DEV_POS_DEVICE;
 
 			cdm->pos.cookie.bus = device->target->bus;
 			cdm->pos.generations[CAM_BUS_GENERATION]=
 				xsoftc.bus_generation;
 			cdm->pos.cookie.target = device->target;
 			cdm->pos.generations[CAM_TARGET_GENERATION] =
 				device->target->bus->generation;
 			cdm->pos.cookie.device = device;
 			cdm->pos.generations[CAM_DEV_GENERATION] =
 				device->target->generation;
 			cdm->status = CAM_DEV_MATCH_MORE;
 			return(0);
 		}
 		j = cdm->num_matches;
 		cdm->num_matches++;
 		cdm->matches[j].type = DEV_MATCH_DEVICE;
 		cdm->matches[j].result.device_result.path_id =
 			device->target->bus->path_id;
 		cdm->matches[j].result.device_result.target_id =
 			device->target->target_id;
 		cdm->matches[j].result.device_result.target_lun =
 			device->lun_id;
 		cdm->matches[j].result.device_result.protocol =
 			device->protocol;
 		bcopy(&device->inq_data,
 		      &cdm->matches[j].result.device_result.inq_data,
 		      sizeof(struct scsi_inquiry_data));
 		bcopy(&device->ident_data,
 		      &cdm->matches[j].result.device_result.ident_data,
 		      sizeof(struct ata_params));
 
 		/* Let the user know whether this device is unconfigured */
 		if (device->flags & CAM_DEV_UNCONFIGURED)
 			cdm->matches[j].result.device_result.flags =
 				DEV_RESULT_UNCONFIGURED;
 		else
 			cdm->matches[j].result.device_result.flags =
 				DEV_RESULT_NOFLAG;
 	}
 
 	/*
 	 * If the user isn't interested in peripherals, don't descend
 	 * the tree any further.
 	 */
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_STOP)
 		return(1);
 
 	/*
 	 * If there is a peripheral list generation recorded, make sure
 	 * it hasn't changed.
 	 */
 	xpt_lock_buses();
 	mtx_lock(&bus->eb_mtx);
 	if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
 	 && (cdm->pos.cookie.bus == bus)
 	 && (cdm->pos.position_type & CAM_DEV_POS_TARGET)
 	 && (cdm->pos.cookie.target == device->target)
 	 && (cdm->pos.position_type & CAM_DEV_POS_DEVICE)
 	 && (cdm->pos.cookie.device == device)
 	 && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
 	 && (cdm->pos.cookie.periph != NULL)) {
 		if (cdm->pos.generations[CAM_PERIPH_GENERATION] !=
 		    device->generation) {
 			mtx_unlock(&bus->eb_mtx);
 			xpt_unlock_buses();
 			cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
 			return(0);
 		}
 		periph = (struct cam_periph *)cdm->pos.cookie.periph;
 		periph->refcount++;
 	} else
 		periph = NULL;
 	mtx_unlock(&bus->eb_mtx);
 	xpt_unlock_buses();
 
 	return (xptperiphtraverse(device, periph, xptedtperiphfunc, arg));
 }
 
 static int
 xptedtperiphfunc(struct cam_periph *periph, void *arg)
 {
 	struct ccb_dev_match *cdm;
 	dev_match_ret retval;
 
 	cdm = (struct ccb_dev_match *)arg;
 
 	retval = xptperiphmatch(cdm->patterns, cdm->num_patterns, periph);
 
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
 		cdm->status = CAM_DEV_MATCH_ERROR;
 		return(0);
 	}
 
 	/*
 	 * If the copy flag is set, copy this peripheral out.
 	 */
 	if (retval & DM_RET_COPY) {
 		int spaceleft, j;
 		size_t l;
 
 		spaceleft = cdm->match_buf_len - (cdm->num_matches *
 			sizeof(struct dev_match_result));
 
 		/*
 		 * If we don't have enough space to put in another
 		 * match result, save our position and tell the
 		 * user there are more devices to check.
 		 */
 		if (spaceleft < sizeof(struct dev_match_result)) {
 			bzero(&cdm->pos, sizeof(cdm->pos));
 			cdm->pos.position_type =
 				CAM_DEV_POS_EDT | CAM_DEV_POS_BUS |
 				CAM_DEV_POS_TARGET | CAM_DEV_POS_DEVICE |
 				CAM_DEV_POS_PERIPH;
 
 			cdm->pos.cookie.bus = periph->path->bus;
 			cdm->pos.generations[CAM_BUS_GENERATION]=
 				xsoftc.bus_generation;
 			cdm->pos.cookie.target = periph->path->target;
 			cdm->pos.generations[CAM_TARGET_GENERATION] =
 				periph->path->bus->generation;
 			cdm->pos.cookie.device = periph->path->device;
 			cdm->pos.generations[CAM_DEV_GENERATION] =
 				periph->path->target->generation;
 			cdm->pos.cookie.periph = periph;
 			cdm->pos.generations[CAM_PERIPH_GENERATION] =
 				periph->path->device->generation;
 			cdm->status = CAM_DEV_MATCH_MORE;
 			return(0);
 		}
 
 		j = cdm->num_matches;
 		cdm->num_matches++;
 		cdm->matches[j].type = DEV_MATCH_PERIPH;
 		cdm->matches[j].result.periph_result.path_id =
 			periph->path->bus->path_id;
 		cdm->matches[j].result.periph_result.target_id =
 			periph->path->target->target_id;
 		cdm->matches[j].result.periph_result.target_lun =
 			periph->path->device->lun_id;
 		cdm->matches[j].result.periph_result.unit_number =
 			periph->unit_number;
 		l = sizeof(cdm->matches[j].result.periph_result.periph_name);
 		strlcpy(cdm->matches[j].result.periph_result.periph_name,
 			periph->periph_name, l);
 	}
 
 	return(1);
 }
 
 static int
 xptedtmatch(struct ccb_dev_match *cdm)
 {
 	struct cam_eb *bus;
 	int ret;
 
 	cdm->num_matches = 0;
 
 	/*
 	 * Check the bus list generation.  If it has changed, the user
 	 * needs to reset everything and start over.
 	 */
 	xpt_lock_buses();
 	if ((cdm->pos.position_type & CAM_DEV_POS_BUS)
 	 && (cdm->pos.cookie.bus != NULL)) {
 		if (cdm->pos.generations[CAM_BUS_GENERATION] !=
 		    xsoftc.bus_generation) {
 			xpt_unlock_buses();
 			cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
 			return(0);
 		}
 		bus = (struct cam_eb *)cdm->pos.cookie.bus;
 		bus->refcount++;
 	} else
 		bus = NULL;
 	xpt_unlock_buses();
 
 	ret = xptbustraverse(bus, xptedtbusfunc, cdm);
 
 	/*
 	 * If we get back 0, that means that we had to stop before fully
 	 * traversing the EDT.  It also means that one of the subroutines
 	 * has set the status field to the proper value.  If we get back 1,
 	 * we've fully traversed the EDT and copied out any matching entries.
 	 */
 	if (ret == 1)
 		cdm->status = CAM_DEV_MATCH_LAST;
 
 	return(ret);
 }
 
 static int
 xptplistpdrvfunc(struct periph_driver **pdrv, void *arg)
 {
 	struct cam_periph *periph;
 	struct ccb_dev_match *cdm;
 
 	cdm = (struct ccb_dev_match *)arg;
 
 	xpt_lock_buses();
 	if ((cdm->pos.position_type & CAM_DEV_POS_PDPTR)
 	 && (cdm->pos.cookie.pdrv == pdrv)
 	 && (cdm->pos.position_type & CAM_DEV_POS_PERIPH)
 	 && (cdm->pos.cookie.periph != NULL)) {
 		if (cdm->pos.generations[CAM_PERIPH_GENERATION] !=
 		    (*pdrv)->generation) {
 			xpt_unlock_buses();
 			cdm->status = CAM_DEV_MATCH_LIST_CHANGED;
 			return(0);
 		}
 		periph = (struct cam_periph *)cdm->pos.cookie.periph;
 		periph->refcount++;
 	} else
 		periph = NULL;
 	xpt_unlock_buses();
 
 	return (xptpdperiphtraverse(pdrv, periph, xptplistperiphfunc, arg));
 }
 
 static int
 xptplistperiphfunc(struct cam_periph *periph, void *arg)
 {
 	struct ccb_dev_match *cdm;
 	dev_match_ret retval;
 
 	cdm = (struct ccb_dev_match *)arg;
 
 	retval = xptperiphmatch(cdm->patterns, cdm->num_patterns, periph);
 
 	if ((retval & DM_RET_ACTION_MASK) == DM_RET_ERROR) {
 		cdm->status = CAM_DEV_MATCH_ERROR;
 		return(0);
 	}
 
 	/*
 	 * If the copy flag is set, copy this peripheral out.
 	 */
 	if (retval & DM_RET_COPY) {
 		int spaceleft, j;
 		size_t l;
 
 		spaceleft = cdm->match_buf_len - (cdm->num_matches *
 			sizeof(struct dev_match_result));
 
 		/*
 		 * If we don't have enough space to put in another
 		 * match result, save our position and tell the
 		 * user there are more devices to check.
 		 */
 		if (spaceleft < sizeof(struct dev_match_result)) {
 			struct periph_driver **pdrv;
 
 			pdrv = NULL;
 			bzero(&cdm->pos, sizeof(cdm->pos));
 			cdm->pos.position_type =
 				CAM_DEV_POS_PDRV | CAM_DEV_POS_PDPTR |
 				CAM_DEV_POS_PERIPH;
 
 			/*
 			 * This may look a bit non-sensical, but it is
 			 * actually quite logical.  There are very few
 			 * peripheral drivers, and bloating every peripheral
 			 * structure with a pointer back to its parent
 			 * peripheral driver linker set entry would cost
 			 * more in the long run than doing this quick lookup.
 			 */
 			for (pdrv = periph_drivers; *pdrv != NULL; pdrv++) {
 				if (strcmp((*pdrv)->driver_name,
 				    periph->periph_name) == 0)
 					break;
 			}
 
 			if (*pdrv == NULL) {
 				cdm->status = CAM_DEV_MATCH_ERROR;
 				return(0);
 			}
 
 			cdm->pos.cookie.pdrv = pdrv;
 			/*
 			 * The periph generation slot does double duty, as
 			 * does the periph pointer slot.  They are used for
 			 * both edt and pdrv lookups and positioning.
 			 */
 			cdm->pos.cookie.periph = periph;
 			cdm->pos.generations[CAM_PERIPH_GENERATION] =
 				(*pdrv)->generation;
 			cdm->status = CAM_DEV_MATCH_MORE;
 			return(0);
 		}
 
 		j = cdm->num_matches;
 		cdm->num_matches++;
 		cdm->matches[j].type = DEV_MATCH_PERIPH;
 		cdm->matches[j].result.periph_result.path_id =
 			periph->path->bus->path_id;
 
 		/*
 		 * The transport layer peripheral doesn't have a target or
 		 * lun.
 		 */
 		if (periph->path->target)
 			cdm->matches[j].result.periph_result.target_id =
 				periph->path->target->target_id;
 		else
 			cdm->matches[j].result.periph_result.target_id =
 				CAM_TARGET_WILDCARD;
 
 		if (periph->path->device)
 			cdm->matches[j].result.periph_result.target_lun =
 				periph->path->device->lun_id;
 		else
 			cdm->matches[j].result.periph_result.target_lun =
 				CAM_LUN_WILDCARD;
 
 		cdm->matches[j].result.periph_result.unit_number =
 			periph->unit_number;
 		l = sizeof(cdm->matches[j].result.periph_result.periph_name);
 		strlcpy(cdm->matches[j].result.periph_result.periph_name,
 			periph->periph_name, l);
 	}
 
 	return(1);
 }
 
 static int
 xptperiphlistmatch(struct ccb_dev_match *cdm)
 {
 	int ret;
 
 	cdm->num_matches = 0;
 
 	/*
 	 * At this point in the edt traversal function, we check the bus
 	 * list generation to make sure that no buses have been added or
 	 * removed since the user last sent a XPT_DEV_MATCH ccb through.
 	 * For the peripheral driver list traversal function, however, we
 	 * don't have to worry about new peripheral driver types coming or
 	 * going; they're in a linker set, and therefore can't change
 	 * without a recompile.
 	 */
 
 	if ((cdm->pos.position_type & CAM_DEV_POS_PDPTR)
 	 && (cdm->pos.cookie.pdrv != NULL))
 		ret = xptpdrvtraverse(
 				(struct periph_driver **)cdm->pos.cookie.pdrv,
 				xptplistpdrvfunc, cdm);
 	else
 		ret = xptpdrvtraverse(NULL, xptplistpdrvfunc, cdm);
 
 	/*
 	 * If we get back 0, that means that we had to stop before fully
 	 * traversing the peripheral driver tree.  It also means that one of
 	 * the subroutines has set the status field to the proper value.  If
 	 * we get back 1, we've fully traversed the EDT and copied out any
 	 * matching entries.
 	 */
 	if (ret == 1)
 		cdm->status = CAM_DEV_MATCH_LAST;
 
 	return(ret);
 }
 
 static int
 xptbustraverse(struct cam_eb *start_bus, xpt_busfunc_t *tr_func, void *arg)
 {
 	struct cam_eb *bus, *next_bus;
 	int retval;
 
 	retval = 1;
 	if (start_bus)
 		bus = start_bus;
 	else {
 		xpt_lock_buses();
 		bus = TAILQ_FIRST(&xsoftc.xpt_busses);
 		if (bus == NULL) {
 			xpt_unlock_buses();
 			return (retval);
 		}
 		bus->refcount++;
 		xpt_unlock_buses();
 	}
 	for (; bus != NULL; bus = next_bus) {
 		retval = tr_func(bus, arg);
 		if (retval == 0) {
 			xpt_release_bus(bus);
 			break;
 		}
 		xpt_lock_buses();
 		next_bus = TAILQ_NEXT(bus, links);
 		if (next_bus)
 			next_bus->refcount++;
 		xpt_unlock_buses();
 		xpt_release_bus(bus);
 	}
 	return(retval);
 }
 
 static int
 xpttargettraverse(struct cam_eb *bus, struct cam_et *start_target,
 		  xpt_targetfunc_t *tr_func, void *arg)
 {
 	struct cam_et *target, *next_target;
 	int retval;
 
 	retval = 1;
 	if (start_target)
 		target = start_target;
 	else {
 		mtx_lock(&bus->eb_mtx);
 		target = TAILQ_FIRST(&bus->et_entries);
 		if (target == NULL) {
 			mtx_unlock(&bus->eb_mtx);
 			return (retval);
 		}
 		target->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 	}
 	for (; target != NULL; target = next_target) {
 		retval = tr_func(target, arg);
 		if (retval == 0) {
 			xpt_release_target(target);
 			break;
 		}
 		mtx_lock(&bus->eb_mtx);
 		next_target = TAILQ_NEXT(target, links);
 		if (next_target)
 			next_target->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 		xpt_release_target(target);
 	}
 	return(retval);
 }
 
 static int
 xptdevicetraverse(struct cam_et *target, struct cam_ed *start_device,
 		  xpt_devicefunc_t *tr_func, void *arg)
 {
 	struct cam_eb *bus;
 	struct cam_ed *device, *next_device;
 	int retval;
 
 	retval = 1;
 	bus = target->bus;
 	if (start_device)
 		device = start_device;
 	else {
 		mtx_lock(&bus->eb_mtx);
 		device = TAILQ_FIRST(&target->ed_entries);
 		if (device == NULL) {
 			mtx_unlock(&bus->eb_mtx);
 			return (retval);
 		}
 		device->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 	}
 	for (; device != NULL; device = next_device) {
 		mtx_lock(&device->device_mtx);
 		retval = tr_func(device, arg);
 		mtx_unlock(&device->device_mtx);
 		if (retval == 0) {
 			xpt_release_device(device);
 			break;
 		}
 		mtx_lock(&bus->eb_mtx);
 		next_device = TAILQ_NEXT(device, links);
 		if (next_device)
 			next_device->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 		xpt_release_device(device);
 	}
 	return(retval);
 }
 
 static int
 xptperiphtraverse(struct cam_ed *device, struct cam_periph *start_periph,
 		  xpt_periphfunc_t *tr_func, void *arg)
 {
 	struct cam_eb *bus;
 	struct cam_periph *periph, *next_periph;
 	int retval;
 
 	retval = 1;
 
 	bus = device->target->bus;
 	if (start_periph)
 		periph = start_periph;
 	else {
 		xpt_lock_buses();
 		mtx_lock(&bus->eb_mtx);
 		periph = SLIST_FIRST(&device->periphs);
 		while (periph != NULL && (periph->flags & CAM_PERIPH_FREE) != 0)
 			periph = SLIST_NEXT(periph, periph_links);
 		if (periph == NULL) {
 			mtx_unlock(&bus->eb_mtx);
 			xpt_unlock_buses();
 			return (retval);
 		}
 		periph->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 		xpt_unlock_buses();
 	}
 	for (; periph != NULL; periph = next_periph) {
 		retval = tr_func(periph, arg);
 		if (retval == 0) {
 			cam_periph_release_locked(periph);
 			break;
 		}
 		xpt_lock_buses();
 		mtx_lock(&bus->eb_mtx);
 		next_periph = SLIST_NEXT(periph, periph_links);
 		while (next_periph != NULL &&
 		    (next_periph->flags & CAM_PERIPH_FREE) != 0)
 			next_periph = SLIST_NEXT(next_periph, periph_links);
 		if (next_periph)
 			next_periph->refcount++;
 		mtx_unlock(&bus->eb_mtx);
 		xpt_unlock_buses();
 		cam_periph_release_locked(periph);
 	}
 	return(retval);
 }
 
 static int
 xptpdrvtraverse(struct periph_driver **start_pdrv,
 		xpt_pdrvfunc_t *tr_func, void *arg)
 {
 	struct periph_driver **pdrv;
 	int retval;
 
 	retval = 1;
 
 	/*
 	 * We don't traverse the peripheral driver list like we do the
 	 * other lists, because it is a linker set, and therefore cannot be
 	 * changed during runtime.  If the peripheral driver list is ever
 	 * re-done to be something other than a linker set (i.e. it can
 	 * change while the system is running), the list traversal should
 	 * be modified to work like the other traversal functions.
 	 */
 	for (pdrv = (start_pdrv ? start_pdrv : periph_drivers);
 	     *pdrv != NULL; pdrv++) {
 		retval = tr_func(pdrv, arg);
 
 		if (retval == 0)
 			return(retval);
 	}
 
 	return(retval);
 }
 
 static int
 xptpdperiphtraverse(struct periph_driver **pdrv,
 		    struct cam_periph *start_periph,
 		    xpt_periphfunc_t *tr_func, void *arg)
 {
 	struct cam_periph *periph, *next_periph;
 	int retval;
 
 	retval = 1;
 
 	if (start_periph)
 		periph = start_periph;
 	else {
 		xpt_lock_buses();
 		periph = TAILQ_FIRST(&(*pdrv)->units);
 		while (periph != NULL && (periph->flags & CAM_PERIPH_FREE) != 0)
 			periph = TAILQ_NEXT(periph, unit_links);
 		if (periph == NULL) {
 			xpt_unlock_buses();
 			return (retval);
 		}
 		periph->refcount++;
 		xpt_unlock_buses();
 	}
 	for (; periph != NULL; periph = next_periph) {
 		cam_periph_lock(periph);
 		retval = tr_func(periph, arg);
 		cam_periph_unlock(periph);
 		if (retval == 0) {
 			cam_periph_release(periph);
 			break;
 		}
 		xpt_lock_buses();
 		next_periph = TAILQ_NEXT(periph, unit_links);
 		while (next_periph != NULL &&
 		    (next_periph->flags & CAM_PERIPH_FREE) != 0)
 			next_periph = TAILQ_NEXT(next_periph, unit_links);
 		if (next_periph)
 			next_periph->refcount++;
 		xpt_unlock_buses();
 		cam_periph_release(periph);
 	}
 	return(retval);
 }
 
 static int
 xptdefbusfunc(struct cam_eb *bus, void *arg)
 {
 	struct xpt_traverse_config *tr_config;
 
 	tr_config = (struct xpt_traverse_config *)arg;
 
 	if (tr_config->depth == XPT_DEPTH_BUS) {
 		xpt_busfunc_t *tr_func;
 
 		tr_func = (xpt_busfunc_t *)tr_config->tr_func;
 
 		return(tr_func(bus, tr_config->tr_arg));
 	} else
 		return(xpttargettraverse(bus, NULL, xptdeftargetfunc, arg));
 }
 
 static int
 xptdeftargetfunc(struct cam_et *target, void *arg)
 {
 	struct xpt_traverse_config *tr_config;
 
 	tr_config = (struct xpt_traverse_config *)arg;
 
 	if (tr_config->depth == XPT_DEPTH_TARGET) {
 		xpt_targetfunc_t *tr_func;
 
 		tr_func = (xpt_targetfunc_t *)tr_config->tr_func;
 
 		return(tr_func(target, tr_config->tr_arg));
 	} else
 		return(xptdevicetraverse(target, NULL, xptdefdevicefunc, arg));
 }
 
 static int
 xptdefdevicefunc(struct cam_ed *device, void *arg)
 {
 	struct xpt_traverse_config *tr_config;
 
 	tr_config = (struct xpt_traverse_config *)arg;
 
 	if (tr_config->depth == XPT_DEPTH_DEVICE) {
 		xpt_devicefunc_t *tr_func;
 
 		tr_func = (xpt_devicefunc_t *)tr_config->tr_func;
 
 		return(tr_func(device, tr_config->tr_arg));
 	} else
 		return(xptperiphtraverse(device, NULL, xptdefperiphfunc, arg));
 }
 
 static int
 xptdefperiphfunc(struct cam_periph *periph, void *arg)
 {
 	struct xpt_traverse_config *tr_config;
 	xpt_periphfunc_t *tr_func;
 
 	tr_config = (struct xpt_traverse_config *)arg;
 
 	tr_func = (xpt_periphfunc_t *)tr_config->tr_func;
 
 	/*
 	 * Unlike the other default functions, we don't check for depth
 	 * here.  The peripheral driver level is the last level in the EDT,
 	 * so if we're here, we should execute the function in question.
 	 */
 	return(tr_func(periph, tr_config->tr_arg));
 }
 
 /*
  * Execute the given function for every bus in the EDT.
  */
 static int
 xpt_for_all_busses(xpt_busfunc_t *tr_func, void *arg)
 {
 	struct xpt_traverse_config tr_config;
 
 	tr_config.depth = XPT_DEPTH_BUS;
 	tr_config.tr_func = tr_func;
 	tr_config.tr_arg = arg;
 
 	return(xptbustraverse(NULL, xptdefbusfunc, &tr_config));
 }
 
 /*
  * Execute the given function for every device in the EDT.
  */
 static int
 xpt_for_all_devices(xpt_devicefunc_t *tr_func, void *arg)
 {
 	struct xpt_traverse_config tr_config;
 
 	tr_config.depth = XPT_DEPTH_DEVICE;
 	tr_config.tr_func = tr_func;
 	tr_config.tr_arg = arg;
 
 	return(xptbustraverse(NULL, xptdefbusfunc, &tr_config));
 }
 
 static int
 xptsetasyncfunc(struct cam_ed *device, void *arg)
 {
 	struct cam_path path;
 	struct ccb_getdev cgd;
 	struct ccb_setasync *csa = (struct ccb_setasync *)arg;
 
 	/*
 	 * Don't report unconfigured devices (Wildcard devs,
 	 * devices only for target mode, device instances
 	 * that have been invalidated but are waiting for
 	 * their last reference count to be released).
 	 */
 	if ((device->flags & CAM_DEV_UNCONFIGURED) != 0)
 		return (1);
 
 	xpt_compile_path(&path,
 			 NULL,
 			 device->target->bus->path_id,
 			 device->target->target_id,
 			 device->lun_id);
 	xpt_setup_ccb(&cgd.ccb_h, &path, CAM_PRIORITY_NORMAL);
 	cgd.ccb_h.func_code = XPT_GDEV_TYPE;
 	xpt_action((union ccb *)&cgd);
 	csa->callback(csa->callback_arg,
 			    AC_FOUND_DEVICE,
 			    &path, &cgd);
 	xpt_release_path(&path);
 
 	return(1);
 }
 
 static int
 xptsetasyncbusfunc(struct cam_eb *bus, void *arg)
 {
 	struct cam_path path;
 	struct ccb_pathinq cpi;
 	struct ccb_setasync *csa = (struct ccb_setasync *)arg;
 
 	xpt_compile_path(&path, /*periph*/NULL,
 			 bus->path_id,
 			 CAM_TARGET_WILDCARD,
 			 CAM_LUN_WILDCARD);
 	xpt_path_lock(&path);
 	xpt_path_inq(&cpi, &path);
 	csa->callback(csa->callback_arg,
 			    AC_PATH_REGISTERED,
 			    &path, &cpi);
 	xpt_path_unlock(&path);
 	xpt_release_path(&path);
 
 	return(1);
 }
 
 void
 xpt_action(union ccb *start_ccb)
 {
 
 	CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_action: func %#x %s\n", start_ccb->ccb_h.func_code,
 		xpt_action_name(start_ccb->ccb_h.func_code)));
 
 	start_ccb->ccb_h.status = CAM_REQ_INPROG;
 	(*(start_ccb->ccb_h.path->bus->xport->ops->action))(start_ccb);
 }
 
 void
 xpt_action_default(union ccb *start_ccb)
 {
 	struct cam_path *path;
 	struct cam_sim *sim;
 	struct mtx *mtx;
 
 	path = start_ccb->ccb_h.path;
 	CAM_DEBUG(path, CAM_DEBUG_TRACE,
 	    ("xpt_action_default: func %#x %s\n", start_ccb->ccb_h.func_code,
 		xpt_action_name(start_ccb->ccb_h.func_code)));
 
 	switch (start_ccb->ccb_h.func_code) {
 	case XPT_SCSI_IO:
 	{
 		struct cam_ed *device;
 
 		/*
 		 * For the sake of compatibility with SCSI-1
 		 * devices that may not understand the identify
 		 * message, we include lun information in the
 		 * second byte of all commands.  SCSI-1 specifies
 		 * that luns are a 3 bit value and reserves only 3
 		 * bits for lun information in the CDB.  Later
 		 * revisions of the SCSI spec allow for more than 8
 		 * luns, but have deprecated lun information in the
 		 * CDB.  So, if the lun won't fit, we must omit.
 		 *
 		 * Also be aware that during initial probing for devices,
 		 * the inquiry information is unknown but initialized to 0.
 		 * This means that this code will be exercised while probing
 		 * devices with an ANSI revision greater than 2.
 		 */
 		device = path->device;
 		if (device->protocol_version <= SCSI_REV_2
 		 && start_ccb->ccb_h.target_lun < 8
 		 && (start_ccb->ccb_h.flags & CAM_CDB_POINTER) == 0) {
 
 			start_ccb->csio.cdb_io.cdb_bytes[1] |=
 			    start_ccb->ccb_h.target_lun << 5;
 		}
 		start_ccb->csio.scsi_status = SCSI_STATUS_OK;
 	}
 	/* FALLTHROUGH */
 	case XPT_TARGET_IO:
 	case XPT_CONT_TARGET_IO:
 		start_ccb->csio.sense_resid = 0;
 		start_ccb->csio.resid = 0;
 		/* FALLTHROUGH */
 	case XPT_ATA_IO:
 		if (start_ccb->ccb_h.func_code == XPT_ATA_IO)
 			start_ccb->ataio.resid = 0;
 		/* FALLTHROUGH */
 	case XPT_NVME_IO:
 		/* FALLTHROUGH */
 	case XPT_NVME_ADMIN:
 		/* FALLTHROUGH */
 	case XPT_MMC_IO:
 		/* XXX just like nmve_io? */
 	case XPT_RESET_DEV:
 	case XPT_ENG_EXEC:
 	case XPT_SMP_IO:
 	{
 		struct cam_devq *devq;
 
 		devq = path->bus->sim->devq;
 		mtx_lock(&devq->send_mtx);
 		cam_ccbq_insert_ccb(&path->device->ccbq, start_ccb);
 		if (xpt_schedule_devq(devq, path->device) != 0)
 			xpt_run_devq(devq);
 		mtx_unlock(&devq->send_mtx);
 		break;
 	}
 	case XPT_CALC_GEOMETRY:
 		/* Filter out garbage */
 		if (start_ccb->ccg.block_size == 0
 		 || start_ccb->ccg.volume_size == 0) {
 			start_ccb->ccg.cylinders = 0;
 			start_ccb->ccg.heads = 0;
 			start_ccb->ccg.secs_per_track = 0;
 			start_ccb->ccb_h.status = CAM_REQ_CMP;
 			break;
 		}
 #if defined(__sparc64__)
 		/*
 		 * For sparc64, we may need adjust the geometry of large
 		 * disks in order to fit the limitations of the 16-bit
 		 * fields of the VTOC8 disk label.
 		 */
 		if (scsi_da_bios_params(&start_ccb->ccg) != 0) {
 			start_ccb->ccb_h.status = CAM_REQ_CMP;
 			break;
 		}
 #endif
 		goto call_sim;
 	case XPT_ABORT:
 	{
 		union ccb* abort_ccb;
 
 		abort_ccb = start_ccb->cab.abort_ccb;
 		if (XPT_FC_IS_DEV_QUEUED(abort_ccb)) {
 			struct cam_ed *device;
 			struct cam_devq *devq;
 
 			device = abort_ccb->ccb_h.path->device;
 			devq = device->sim->devq;
 
 			mtx_lock(&devq->send_mtx);
 			if (abort_ccb->ccb_h.pinfo.index > 0) {
 				cam_ccbq_remove_ccb(&device->ccbq, abort_ccb);
 				abort_ccb->ccb_h.status =
 				    CAM_REQ_ABORTED|CAM_DEV_QFRZN;
 				xpt_freeze_devq_device(device, 1);
 				mtx_unlock(&devq->send_mtx);
 				xpt_done(abort_ccb);
 				start_ccb->ccb_h.status = CAM_REQ_CMP;
 				break;
 			}
 			mtx_unlock(&devq->send_mtx);
 
 			if (abort_ccb->ccb_h.pinfo.index == CAM_UNQUEUED_INDEX
 			 && (abort_ccb->ccb_h.status & CAM_SIM_QUEUED) == 0) {
 				/*
 				 * We've caught this ccb en route to
 				 * the SIM.  Flag it for abort and the
 				 * SIM will do so just before starting
 				 * real work on the CCB.
 				 */
 				abort_ccb->ccb_h.status =
 				    CAM_REQ_ABORTED|CAM_DEV_QFRZN;
 				xpt_freeze_devq(abort_ccb->ccb_h.path, 1);
 				start_ccb->ccb_h.status = CAM_REQ_CMP;
 				break;
 			}
 		}
 		if (XPT_FC_IS_QUEUED(abort_ccb)
 		 && (abort_ccb->ccb_h.pinfo.index == CAM_DONEQ_INDEX)) {
 			/*
 			 * It's already completed but waiting
 			 * for our SWI to get to it.
 			 */
 			start_ccb->ccb_h.status = CAM_UA_ABORT;
 			break;
 		}
 		/*
 		 * If we weren't able to take care of the abort request
 		 * in the XPT, pass the request down to the SIM for processing.
 		 */
 	}
 	/* FALLTHROUGH */
 	case XPT_ACCEPT_TARGET_IO:
 	case XPT_EN_LUN:
 	case XPT_IMMED_NOTIFY:
 	case XPT_NOTIFY_ACK:
 	case XPT_RESET_BUS:
 	case XPT_IMMEDIATE_NOTIFY:
 	case XPT_NOTIFY_ACKNOWLEDGE:
 	case XPT_GET_SIM_KNOB_OLD:
 	case XPT_GET_SIM_KNOB:
 	case XPT_SET_SIM_KNOB:
 	case XPT_GET_TRAN_SETTINGS:
 	case XPT_SET_TRAN_SETTINGS:
 	case XPT_PATH_INQ:
 call_sim:
 		sim = path->bus->sim;
 		mtx = sim->mtx;
 		if (mtx && !mtx_owned(mtx))
 			mtx_lock(mtx);
 		else
 			mtx = NULL;
 
 		CAM_DEBUG(path, CAM_DEBUG_TRACE,
 		    ("Calling sim->sim_action(): func=%#x\n", start_ccb->ccb_h.func_code));
 		(*(sim->sim_action))(sim, start_ccb);
 		CAM_DEBUG(path, CAM_DEBUG_TRACE,
 		    ("sim->sim_action returned: status=%#x\n", start_ccb->ccb_h.status));
 		if (mtx)
 			mtx_unlock(mtx);
 		break;
 	case XPT_PATH_STATS:
 		start_ccb->cpis.last_reset = path->bus->last_reset;
 		start_ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_GDEV_TYPE:
 	{
 		struct cam_ed *dev;
 
 		dev = path->device;
 		if ((dev->flags & CAM_DEV_UNCONFIGURED) != 0) {
 			start_ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 		} else {
 			struct ccb_getdev *cgd;
 
 			cgd = &start_ccb->cgd;
 			cgd->protocol = dev->protocol;
 			cgd->inq_data = dev->inq_data;
 			cgd->ident_data = dev->ident_data;
 			cgd->inq_flags = dev->inq_flags;
 			cgd->ccb_h.status = CAM_REQ_CMP;
 			cgd->serial_num_len = dev->serial_num_len;
 			if ((dev->serial_num_len > 0)
 			 && (dev->serial_num != NULL))
 				bcopy(dev->serial_num, cgd->serial_num,
 				      dev->serial_num_len);
 		}
 		break;
 	}
 	case XPT_GDEV_STATS:
 	{
 		struct ccb_getdevstats *cgds = &start_ccb->cgds;
 		struct cam_ed *dev = path->device;
 		struct cam_eb *bus = path->bus;
 		struct cam_et *tar = path->target;
 		struct cam_devq *devq = bus->sim->devq;
 
 		mtx_lock(&devq->send_mtx);
 		cgds->dev_openings = dev->ccbq.dev_openings;
 		cgds->dev_active = dev->ccbq.dev_active;
 		cgds->allocated = dev->ccbq.allocated;
 		cgds->queued = cam_ccbq_pending_ccb_count(&dev->ccbq);
 		cgds->held = cgds->allocated - cgds->dev_active - cgds->queued;
 		cgds->last_reset = tar->last_reset;
 		cgds->maxtags = dev->maxtags;
 		cgds->mintags = dev->mintags;
 		if (timevalcmp(&tar->last_reset, &bus->last_reset, <))
 			cgds->last_reset = bus->last_reset;
 		mtx_unlock(&devq->send_mtx);
 		cgds->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_GDEVLIST:
 	{
 		struct cam_periph	*nperiph;
 		struct periph_list	*periph_head;
 		struct ccb_getdevlist	*cgdl;
 		u_int			i;
 		struct cam_ed		*device;
 		int			found;
 
 
 		found = 0;
 
 		/*
 		 * Don't want anyone mucking with our data.
 		 */
 		device = path->device;
 		periph_head = &device->periphs;
 		cgdl = &start_ccb->cgdl;
 
 		/*
 		 * Check and see if the list has changed since the user
 		 * last requested a list member.  If so, tell them that the
 		 * list has changed, and therefore they need to start over
 		 * from the beginning.
 		 */
 		if ((cgdl->index != 0) &&
 		    (cgdl->generation != device->generation)) {
 			cgdl->status = CAM_GDEVLIST_LIST_CHANGED;
 			break;
 		}
 
 		/*
 		 * Traverse the list of peripherals and attempt to find
 		 * the requested peripheral.
 		 */
 		for (nperiph = SLIST_FIRST(periph_head), i = 0;
 		     (nperiph != NULL) && (i <= cgdl->index);
 		     nperiph = SLIST_NEXT(nperiph, periph_links), i++) {
 			if (i == cgdl->index) {
 				strlcpy(cgdl->periph_name,
 					nperiph->periph_name,
 					sizeof(cgdl->periph_name));
 				cgdl->unit_number = nperiph->unit_number;
 				found = 1;
 			}
 		}
 		if (found == 0) {
 			cgdl->status = CAM_GDEVLIST_ERROR;
 			break;
 		}
 
 		if (nperiph == NULL)
 			cgdl->status = CAM_GDEVLIST_LAST_DEVICE;
 		else
 			cgdl->status = CAM_GDEVLIST_MORE_DEVS;
 
 		cgdl->index++;
 		cgdl->generation = device->generation;
 
 		cgdl->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_DEV_MATCH:
 	{
 		dev_pos_type position_type;
 		struct ccb_dev_match *cdm;
 
 		cdm = &start_ccb->cdm;
 
 		/*
 		 * There are two ways of getting at information in the EDT.
 		 * The first way is via the primary EDT tree.  It starts
 		 * with a list of buses, then a list of targets on a bus,
 		 * then devices/luns on a target, and then peripherals on a
 		 * device/lun.  The "other" way is by the peripheral driver
 		 * lists.  The peripheral driver lists are organized by
 		 * peripheral driver.  (obviously)  So it makes sense to
 		 * use the peripheral driver list if the user is looking
 		 * for something like "da1", or all "da" devices.  If the
 		 * user is looking for something on a particular bus/target
 		 * or lun, it's generally better to go through the EDT tree.
 		 */
 
 		if (cdm->pos.position_type != CAM_DEV_POS_NONE)
 			position_type = cdm->pos.position_type;
 		else {
 			u_int i;
 
 			position_type = CAM_DEV_POS_NONE;
 
 			for (i = 0; i < cdm->num_patterns; i++) {
 				if ((cdm->patterns[i].type == DEV_MATCH_BUS)
 				 ||(cdm->patterns[i].type == DEV_MATCH_DEVICE)){
 					position_type = CAM_DEV_POS_EDT;
 					break;
 				}
 			}
 
 			if (cdm->num_patterns == 0)
 				position_type = CAM_DEV_POS_EDT;
 			else if (position_type == CAM_DEV_POS_NONE)
 				position_type = CAM_DEV_POS_PDRV;
 		}
 
 		switch(position_type & CAM_DEV_POS_TYPEMASK) {
 		case CAM_DEV_POS_EDT:
 			xptedtmatch(cdm);
 			break;
 		case CAM_DEV_POS_PDRV:
 			xptperiphlistmatch(cdm);
 			break;
 		default:
 			cdm->status = CAM_DEV_MATCH_ERROR;
 			break;
 		}
 
 		if (cdm->status == CAM_DEV_MATCH_ERROR)
 			start_ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 		else
 			start_ccb->ccb_h.status = CAM_REQ_CMP;
 
 		break;
 	}
 	case XPT_SASYNC_CB:
 	{
 		struct ccb_setasync *csa;
 		struct async_node *cur_entry;
 		struct async_list *async_head;
 		u_int32_t added;
 
 		csa = &start_ccb->csa;
 		added = csa->event_enable;
 		async_head = &path->device->asyncs;
 
 		/*
 		 * If there is already an entry for us, simply
 		 * update it.
 		 */
 		cur_entry = SLIST_FIRST(async_head);
 		while (cur_entry != NULL) {
 			if ((cur_entry->callback_arg == csa->callback_arg)
 			 && (cur_entry->callback == csa->callback))
 				break;
 			cur_entry = SLIST_NEXT(cur_entry, links);
 		}
 
 		if (cur_entry != NULL) {
 		 	/*
 			 * If the request has no flags set,
 			 * remove the entry.
 			 */
 			added &= ~cur_entry->event_enable;
 			if (csa->event_enable == 0) {
 				SLIST_REMOVE(async_head, cur_entry,
 					     async_node, links);
 				xpt_release_device(path->device);
 				free(cur_entry, M_CAMXPT);
 			} else {
 				cur_entry->event_enable = csa->event_enable;
 			}
 			csa->event_enable = added;
 		} else {
 			cur_entry = malloc(sizeof(*cur_entry), M_CAMXPT,
 					   M_NOWAIT);
 			if (cur_entry == NULL) {
 				csa->ccb_h.status = CAM_RESRC_UNAVAIL;
 				break;
 			}
 			cur_entry->event_enable = csa->event_enable;
 			cur_entry->event_lock = (path->bus->sim->mtx &&
 			    mtx_owned(path->bus->sim->mtx)) ? 1 : 0;
 			cur_entry->callback_arg = csa->callback_arg;
 			cur_entry->callback = csa->callback;
 			SLIST_INSERT_HEAD(async_head, cur_entry, links);
 			xpt_acquire_device(path->device);
 		}
 		start_ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_REL_SIMQ:
 	{
 		struct ccb_relsim *crs;
 		struct cam_ed *dev;
 
 		crs = &start_ccb->crs;
 		dev = path->device;
 		if (dev == NULL) {
 
 			crs->ccb_h.status = CAM_DEV_NOT_THERE;
 			break;
 		}
 
 		if ((crs->release_flags & RELSIM_ADJUST_OPENINGS) != 0) {
 
 			/* Don't ever go below one opening */
 			if (crs->openings > 0) {
 				xpt_dev_ccbq_resize(path, crs->openings);
 				if (bootverbose) {
 					xpt_print(path,
 					    "number of openings is now %d\n",
 					    crs->openings);
 				}
 			}
 		}
 
 		mtx_lock(&dev->sim->devq->send_mtx);
 		if ((crs->release_flags & RELSIM_RELEASE_AFTER_TIMEOUT) != 0) {
 
 			if ((dev->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0) {
 
 				/*
 				 * Just extend the old timeout and decrement
 				 * the freeze count so that a single timeout
 				 * is sufficient for releasing the queue.
 				 */
 				start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
 				callout_stop(&dev->callout);
 			} else {
 
 				start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
 			}
 
 			callout_reset_sbt(&dev->callout,
 			    SBT_1MS * crs->release_timeout, 0,
 			    xpt_release_devq_timeout, dev, 0);
 
 			dev->flags |= CAM_DEV_REL_TIMEOUT_PENDING;
 
 		}
 
 		if ((crs->release_flags & RELSIM_RELEASE_AFTER_CMDCMPLT) != 0) {
 
 			if ((dev->flags & CAM_DEV_REL_ON_COMPLETE) != 0) {
 				/*
 				 * Decrement the freeze count so that a single
 				 * completion is still sufficient to unfreeze
 				 * the queue.
 				 */
 				start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
 			} else {
 
 				dev->flags |= CAM_DEV_REL_ON_COMPLETE;
 				start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
 			}
 		}
 
 		if ((crs->release_flags & RELSIM_RELEASE_AFTER_QEMPTY) != 0) {
 
 			if ((dev->flags & CAM_DEV_REL_ON_QUEUE_EMPTY) != 0
 			 || (dev->ccbq.dev_active == 0)) {
 
 				start_ccb->ccb_h.flags &= ~CAM_DEV_QFREEZE;
 			} else {
 
 				dev->flags |= CAM_DEV_REL_ON_QUEUE_EMPTY;
 				start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
 			}
 		}
 		mtx_unlock(&dev->sim->devq->send_mtx);
 
 		if ((start_ccb->ccb_h.flags & CAM_DEV_QFREEZE) == 0)
 			xpt_release_devq(path, /*count*/1, /*run_queue*/TRUE);
 		start_ccb->crs.qfrozen_cnt = dev->ccbq.queue.qfrozen_cnt;
 		start_ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_DEBUG: {
 		struct cam_path *oldpath;
 
 		/* Check that all request bits are supported. */
 		if (start_ccb->cdbg.flags & ~(CAM_DEBUG_COMPILE)) {
 			start_ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 			break;
 		}
 
 		cam_dflags = CAM_DEBUG_NONE;
 		if (cam_dpath != NULL) {
 			oldpath = cam_dpath;
 			cam_dpath = NULL;
 			xpt_free_path(oldpath);
 		}
 		if (start_ccb->cdbg.flags != CAM_DEBUG_NONE) {
 			if (xpt_create_path(&cam_dpath, NULL,
 					    start_ccb->ccb_h.path_id,
 					    start_ccb->ccb_h.target_id,
 					    start_ccb->ccb_h.target_lun) !=
 					    CAM_REQ_CMP) {
 				start_ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 			} else {
 				cam_dflags = start_ccb->cdbg.flags;
 				start_ccb->ccb_h.status = CAM_REQ_CMP;
 				xpt_print(cam_dpath, "debugging flags now %x\n",
 				    cam_dflags);
 			}
 		} else
 			start_ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_NOOP:
 		if ((start_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0)
 			xpt_freeze_devq(path, 1);
 		start_ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_REPROBE_LUN:
 		xpt_async(AC_INQ_CHANGED, path, NULL);
 		start_ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(start_ccb);
 		break;
 	default:
 	case XPT_SDEV_TYPE:
 	case XPT_TERM_IO:
 	case XPT_ENG_INQ:
 		/* XXX Implement */
 		xpt_print(start_ccb->ccb_h.path,
 		    "%s: CCB type %#x %s not supported\n", __func__,
 		    start_ccb->ccb_h.func_code,
 		    xpt_action_name(start_ccb->ccb_h.func_code));
 		start_ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		if (start_ccb->ccb_h.func_code & XPT_FC_DEV_QUEUED) {
 			xpt_done(start_ccb);
 		}
 		break;
 	}
 	CAM_DEBUG(path, CAM_DEBUG_TRACE,
 	    ("xpt_action_default: func= %#x %s status %#x\n",
 		start_ccb->ccb_h.func_code,
  		xpt_action_name(start_ccb->ccb_h.func_code),
 		start_ccb->ccb_h.status));
 }
 
 uint32_t
 xpt_poll_setup(union ccb *start_ccb)
 {
 	u_int32_t timeout;
 	struct	  cam_sim *sim;
 	struct	  cam_devq *devq;
 	struct	  cam_ed *dev;
 	struct mtx *mtx;
 
 	timeout = start_ccb->ccb_h.timeout * 10;
 	sim = start_ccb->ccb_h.path->bus->sim;
 	devq = sim->devq;
 	mtx = sim->mtx;
 	dev = start_ccb->ccb_h.path->device;
 
 	/*
 	 * Steal an opening so that no other queued requests
 	 * can get it before us while we simulate interrupts.
 	 */
 	mtx_lock(&devq->send_mtx);
 	dev->ccbq.dev_openings--;
 	while((devq->send_openings <= 0 || dev->ccbq.dev_openings < 0) &&
 	    (--timeout > 0)) {
 		mtx_unlock(&devq->send_mtx);
 		DELAY(100);
 		if (mtx)
 			mtx_lock(mtx);
 		(*(sim->sim_poll))(sim);
 		if (mtx)
 			mtx_unlock(mtx);
 		camisr_runqueue();
 		mtx_lock(&devq->send_mtx);
 	}
 	dev->ccbq.dev_openings++;
 	mtx_unlock(&devq->send_mtx);
 
 	return (timeout);
 }
 
 void
 xpt_pollwait(union ccb *start_ccb, uint32_t timeout)
 {
 	struct cam_sim	*sim;
 	struct mtx	*mtx;
 
 	sim = start_ccb->ccb_h.path->bus->sim;
 	mtx = sim->mtx;
 
 	while (--timeout > 0) {
 		if (mtx)
 			mtx_lock(mtx);
 		(*(sim->sim_poll))(sim);
 		if (mtx)
 			mtx_unlock(mtx);
 		camisr_runqueue();
 		if ((start_ccb->ccb_h.status & CAM_STATUS_MASK)
 		    != CAM_REQ_INPROG)
 			break;
 		DELAY(100);
 	}
 
 	if (timeout == 0) {
 		/*
 		 * XXX Is it worth adding a sim_timeout entry
 		 * point so we can attempt recovery?  If
 		 * this is only used for dumps, I don't think
 		 * it is.
 		 */
 		start_ccb->ccb_h.status = CAM_CMD_TIMEOUT;
 	}
 }
 
 void
 xpt_polled_action(union ccb *start_ccb)
 {
 	uint32_t	timeout;
 	struct cam_ed	*dev;
 
 	timeout = start_ccb->ccb_h.timeout * 10;
 	dev = start_ccb->ccb_h.path->device;
 
 	mtx_unlock(&dev->device_mtx);
 
 	timeout = xpt_poll_setup(start_ccb);
 	if (timeout > 0) {
 		xpt_action(start_ccb);
 		xpt_pollwait(start_ccb, timeout);
 	} else {
 		start_ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 	}
 
 	mtx_lock(&dev->device_mtx);
 }
 
 /*
  * Schedule a peripheral driver to receive a ccb when its
  * target device has space for more transactions.
  */
 void
 xpt_schedule(struct cam_periph *periph, u_int32_t new_priority)
 {
 
 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("xpt_schedule\n"));
 	cam_periph_assert(periph, MA_OWNED);
 	if (new_priority < periph->scheduled_priority) {
 		periph->scheduled_priority = new_priority;
 		xpt_run_allocq(periph, 0);
 	}
 }
 
 
 /*
  * Schedule a device to run on a given queue.
  * If the device was inserted as a new entry on the queue,
  * return 1 meaning the device queue should be run. If we
  * were already queued, implying someone else has already
  * started the queue, return 0 so the caller doesn't attempt
  * to run the queue.
  */
 static int
 xpt_schedule_dev(struct camq *queue, cam_pinfo *pinfo,
 		 u_int32_t new_priority)
 {
 	int retval;
 	u_int32_t old_priority;
 
 	CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_schedule_dev\n"));
 
 
 	old_priority = pinfo->priority;
 
 	/*
 	 * Are we already queued?
 	 */
 	if (pinfo->index != CAM_UNQUEUED_INDEX) {
 		/* Simply reorder based on new priority */
 		if (new_priority < old_priority) {
 			camq_change_priority(queue, pinfo->index,
 					     new_priority);
 			CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
 					("changed priority to %d\n",
 					 new_priority));
 			retval = 1;
 		} else
 			retval = 0;
 	} else {
 		/* New entry on the queue */
 		if (new_priority < old_priority)
 			pinfo->priority = new_priority;
 
 		CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
 				("Inserting onto queue\n"));
 		pinfo->generation = ++queue->generation;
 		camq_insert(queue, pinfo);
 		retval = 1;
 	}
 	return (retval);
 }
 
 static void
 xpt_run_allocq_task(void *context, int pending)
 {
 	struct cam_periph *periph = context;
 
 	cam_periph_lock(periph);
 	periph->flags &= ~CAM_PERIPH_RUN_TASK;
 	xpt_run_allocq(periph, 1);
 	cam_periph_unlock(periph);
 	cam_periph_release(periph);
 }
 
 static void
 xpt_run_allocq(struct cam_periph *periph, int sleep)
 {
 	struct cam_ed	*device;
 	union ccb	*ccb;
 	uint32_t	 prio;
 
 	cam_periph_assert(periph, MA_OWNED);
 	if (periph->periph_allocating)
 		return;
 	cam_periph_doacquire(periph);
 	periph->periph_allocating = 1;
 	CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_run_allocq(%p)\n", periph));
 	device = periph->path->device;
 	ccb = NULL;
 restart:
 	while ((prio = min(periph->scheduled_priority,
 	    periph->immediate_priority)) != CAM_PRIORITY_NONE &&
 	    (periph->periph_allocated - (ccb != NULL ? 1 : 0) <
 	     device->ccbq.total_openings || prio <= CAM_PRIORITY_OOB)) {
 
 		if (ccb == NULL &&
 		    (ccb = xpt_get_ccb_nowait(periph)) == NULL) {
 			if (sleep) {
 				ccb = xpt_get_ccb(periph);
 				goto restart;
 			}
 			if (periph->flags & CAM_PERIPH_RUN_TASK)
 				break;
 			cam_periph_doacquire(periph);
 			periph->flags |= CAM_PERIPH_RUN_TASK;
 			taskqueue_enqueue(xsoftc.xpt_taskq,
 			    &periph->periph_run_task);
 			break;
 		}
 		xpt_setup_ccb(&ccb->ccb_h, periph->path, prio);
 		if (prio == periph->immediate_priority) {
 			periph->immediate_priority = CAM_PRIORITY_NONE;
 			CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
 					("waking cam_periph_getccb()\n"));
 			SLIST_INSERT_HEAD(&periph->ccb_list, &ccb->ccb_h,
 					  periph_links.sle);
 			wakeup(&periph->ccb_list);
 		} else {
 			periph->scheduled_priority = CAM_PRIORITY_NONE;
 			CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
 					("calling periph_start()\n"));
 			periph->periph_start(periph, ccb);
 		}
 		ccb = NULL;
 	}
 	if (ccb != NULL)
 		xpt_release_ccb(ccb);
 	periph->periph_allocating = 0;
 	cam_periph_release_locked(periph);
 }
 
 static void
 xpt_run_devq(struct cam_devq *devq)
 {
 	struct mtx *mtx;
 
 	CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_run_devq\n"));
 
 	devq->send_queue.qfrozen_cnt++;
 	while ((devq->send_queue.entries > 0)
 	    && (devq->send_openings > 0)
 	    && (devq->send_queue.qfrozen_cnt <= 1)) {
 		struct	cam_ed *device;
 		union ccb *work_ccb;
 		struct	cam_sim *sim;
 		struct xpt_proto *proto;
 
 		device = (struct cam_ed *)camq_remove(&devq->send_queue,
 							   CAMQ_HEAD);
 		CAM_DEBUG_PRINT(CAM_DEBUG_XPT,
 				("running device %p\n", device));
 
 		work_ccb = cam_ccbq_peek_ccb(&device->ccbq, CAMQ_HEAD);
 		if (work_ccb == NULL) {
 			printf("device on run queue with no ccbs???\n");
 			continue;
 		}
 
 		if ((work_ccb->ccb_h.flags & CAM_HIGH_POWER) != 0) {
 
 			mtx_lock(&xsoftc.xpt_highpower_lock);
 		 	if (xsoftc.num_highpower <= 0) {
 				/*
 				 * We got a high power command, but we
 				 * don't have any available slots.  Freeze
 				 * the device queue until we have a slot
 				 * available.
 				 */
 				xpt_freeze_devq_device(device, 1);
 				STAILQ_INSERT_TAIL(&xsoftc.highpowerq, device,
 						   highpowerq_entry);
 
 				mtx_unlock(&xsoftc.xpt_highpower_lock);
 				continue;
 			} else {
 				/*
 				 * Consume a high power slot while
 				 * this ccb runs.
 				 */
 				xsoftc.num_highpower--;
 			}
 			mtx_unlock(&xsoftc.xpt_highpower_lock);
 		}
 		cam_ccbq_remove_ccb(&device->ccbq, work_ccb);
 		cam_ccbq_send_ccb(&device->ccbq, work_ccb);
 		devq->send_openings--;
 		devq->send_active++;
 		xpt_schedule_devq(devq, device);
 		mtx_unlock(&devq->send_mtx);
 
 		if ((work_ccb->ccb_h.flags & CAM_DEV_QFREEZE) != 0) {
 			/*
 			 * The client wants to freeze the queue
 			 * after this CCB is sent.
 			 */
 			xpt_freeze_devq(work_ccb->ccb_h.path, 1);
 		}
 
 		/* In Target mode, the peripheral driver knows best... */
 		if (work_ccb->ccb_h.func_code == XPT_SCSI_IO) {
 			if ((device->inq_flags & SID_CmdQue) != 0
 			 && work_ccb->csio.tag_action != CAM_TAG_ACTION_NONE)
 				work_ccb->ccb_h.flags |= CAM_TAG_ACTION_VALID;
 			else
 				/*
 				 * Clear this in case of a retried CCB that
 				 * failed due to a rejected tag.
 				 */
 				work_ccb->ccb_h.flags &= ~CAM_TAG_ACTION_VALID;
 		}
 
 		KASSERT(device == work_ccb->ccb_h.path->device,
 		    ("device (%p) / path->device (%p) mismatch",
 			device, work_ccb->ccb_h.path->device));
 		proto = xpt_proto_find(device->protocol);
 		if (proto && proto->ops->debug_out)
 			proto->ops->debug_out(work_ccb);
 
 		/*
 		 * Device queues can be shared among multiple SIM instances
 		 * that reside on different buses.  Use the SIM from the
 		 * queued device, rather than the one from the calling bus.
 		 */
 		sim = device->sim;
 		mtx = sim->mtx;
 		if (mtx && !mtx_owned(mtx))
 			mtx_lock(mtx);
 		else
 			mtx = NULL;
 		work_ccb->ccb_h.qos.periph_data = cam_iosched_now();
 		(*(sim->sim_action))(sim, work_ccb);
 		if (mtx)
 			mtx_unlock(mtx);
 		mtx_lock(&devq->send_mtx);
 	}
 	devq->send_queue.qfrozen_cnt--;
 }
 
 /*
  * This function merges stuff from the slave ccb into the master ccb, while
  * keeping important fields in the master ccb constant.
  */
 void
 xpt_merge_ccb(union ccb *master_ccb, union ccb *slave_ccb)
 {
 
 	/*
 	 * Pull fields that are valid for peripheral drivers to set
 	 * into the master CCB along with the CCB "payload".
 	 */
 	master_ccb->ccb_h.retry_count = slave_ccb->ccb_h.retry_count;
 	master_ccb->ccb_h.func_code = slave_ccb->ccb_h.func_code;
 	master_ccb->ccb_h.timeout = slave_ccb->ccb_h.timeout;
 	master_ccb->ccb_h.flags = slave_ccb->ccb_h.flags;
 	bcopy(&(&slave_ccb->ccb_h)[1], &(&master_ccb->ccb_h)[1],
 	      sizeof(union ccb) - sizeof(struct ccb_hdr));
 }
 
 void
 xpt_setup_ccb_flags(struct ccb_hdr *ccb_h, struct cam_path *path,
 		    u_int32_t priority, u_int32_t flags)
 {
 
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_setup_ccb\n"));
 	ccb_h->pinfo.priority = priority;
 	ccb_h->path = path;
 	ccb_h->path_id = path->bus->path_id;
 	if (path->target)
 		ccb_h->target_id = path->target->target_id;
 	else
 		ccb_h->target_id = CAM_TARGET_WILDCARD;
 	if (path->device) {
 		ccb_h->target_lun = path->device->lun_id;
 		ccb_h->pinfo.generation = ++path->device->ccbq.queue.generation;
 	} else {
 		ccb_h->target_lun = CAM_TARGET_WILDCARD;
 	}
 	ccb_h->pinfo.index = CAM_UNQUEUED_INDEX;
 	ccb_h->flags = flags;
 	ccb_h->xflags = 0;
 }
 
 void
 xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
 {
 	xpt_setup_ccb_flags(ccb_h, path, priority, /*flags*/ 0);
 }
 
 /* Path manipulation functions */
 cam_status
 xpt_create_path(struct cam_path **new_path_ptr, struct cam_periph *perph,
 		path_id_t path_id, target_id_t target_id, lun_id_t lun_id)
 {
 	struct	   cam_path *path;
 	cam_status status;
 
 	path = (struct cam_path *)malloc(sizeof(*path), M_CAMPATH, M_NOWAIT);
 
 	if (path == NULL) {
 		status = CAM_RESRC_UNAVAIL;
 		return(status);
 	}
 	status = xpt_compile_path(path, perph, path_id, target_id, lun_id);
 	if (status != CAM_REQ_CMP) {
 		free(path, M_CAMPATH);
 		path = NULL;
 	}
 	*new_path_ptr = path;
 	return (status);
 }
 
 cam_status
 xpt_create_path_unlocked(struct cam_path **new_path_ptr,
 			 struct cam_periph *periph, path_id_t path_id,
 			 target_id_t target_id, lun_id_t lun_id)
 {
 
 	return (xpt_create_path(new_path_ptr, periph, path_id, target_id,
 	    lun_id));
 }
 
 cam_status
 xpt_compile_path(struct cam_path *new_path, struct cam_periph *perph,
 		 path_id_t path_id, target_id_t target_id, lun_id_t lun_id)
 {
 	struct	     cam_eb *bus;
 	struct	     cam_et *target;
 	struct	     cam_ed *device;
 	cam_status   status;
 
 	status = CAM_REQ_CMP;	/* Completed without error */
 	target = NULL;		/* Wildcarded */
 	device = NULL;		/* Wildcarded */
 
 	/*
 	 * We will potentially modify the EDT, so block interrupts
 	 * that may attempt to create cam paths.
 	 */
 	bus = xpt_find_bus(path_id);
 	if (bus == NULL) {
 		status = CAM_PATH_INVALID;
 	} else {
 		xpt_lock_buses();
 		mtx_lock(&bus->eb_mtx);
 		target = xpt_find_target(bus, target_id);
 		if (target == NULL) {
 			/* Create one */
 			struct cam_et *new_target;
 
 			new_target = xpt_alloc_target(bus, target_id);
 			if (new_target == NULL) {
 				status = CAM_RESRC_UNAVAIL;
 			} else {
 				target = new_target;
 			}
 		}
 		xpt_unlock_buses();
 		if (target != NULL) {
 			device = xpt_find_device(target, lun_id);
 			if (device == NULL) {
 				/* Create one */
 				struct cam_ed *new_device;
 
 				new_device =
 				    (*(bus->xport->ops->alloc_device))(bus,
 								       target,
 								       lun_id);
 				if (new_device == NULL) {
 					status = CAM_RESRC_UNAVAIL;
 				} else {
 					device = new_device;
 				}
 			}
 		}
 		mtx_unlock(&bus->eb_mtx);
 	}
 
 	/*
 	 * Only touch the user's data if we are successful.
 	 */
 	if (status == CAM_REQ_CMP) {
 		new_path->periph = perph;
 		new_path->bus = bus;
 		new_path->target = target;
 		new_path->device = device;
 		CAM_DEBUG(new_path, CAM_DEBUG_TRACE, ("xpt_compile_path\n"));
 	} else {
 		if (device != NULL)
 			xpt_release_device(device);
 		if (target != NULL)
 			xpt_release_target(target);
 		if (bus != NULL)
 			xpt_release_bus(bus);
 	}
 	return (status);
 }
 
 cam_status
 xpt_clone_path(struct cam_path **new_path_ptr, struct cam_path *path)
 {
 	struct	   cam_path *new_path;
 
 	new_path = (struct cam_path *)malloc(sizeof(*path), M_CAMPATH, M_NOWAIT);
 	if (new_path == NULL)
 		return(CAM_RESRC_UNAVAIL);
 	xpt_copy_path(new_path, path);
 	*new_path_ptr = new_path;
 	return (CAM_REQ_CMP);
 }
 
 void
 xpt_copy_path(struct cam_path *new_path, struct cam_path *path)
 {
 
 	*new_path = *path;
 	if (path->bus != NULL)
 		xpt_acquire_bus(path->bus);
 	if (path->target != NULL)
 		xpt_acquire_target(path->target);
 	if (path->device != NULL)
 		xpt_acquire_device(path->device);
 }
 
 void
 xpt_release_path(struct cam_path *path)
 {
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_release_path\n"));
 	if (path->device != NULL) {
 		xpt_release_device(path->device);
 		path->device = NULL;
 	}
 	if (path->target != NULL) {
 		xpt_release_target(path->target);
 		path->target = NULL;
 	}
 	if (path->bus != NULL) {
 		xpt_release_bus(path->bus);
 		path->bus = NULL;
 	}
 }
 
 void
 xpt_free_path(struct cam_path *path)
 {
 
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_free_path\n"));
 	xpt_release_path(path);
 	free(path, M_CAMPATH);
 }
 
 void
 xpt_path_counts(struct cam_path *path, uint32_t *bus_ref,
     uint32_t *periph_ref, uint32_t *target_ref, uint32_t *device_ref)
 {
 
 	xpt_lock_buses();
 	if (bus_ref) {
 		if (path->bus)
 			*bus_ref = path->bus->refcount;
 		else
 			*bus_ref = 0;
 	}
 	if (periph_ref) {
 		if (path->periph)
 			*periph_ref = path->periph->refcount;
 		else
 			*periph_ref = 0;
 	}
 	xpt_unlock_buses();
 	if (target_ref) {
 		if (path->target)
 			*target_ref = path->target->refcount;
 		else
 			*target_ref = 0;
 	}
 	if (device_ref) {
 		if (path->device)
 			*device_ref = path->device->refcount;
 		else
 			*device_ref = 0;
 	}
 }
 
 /*
  * Return -1 for failure, 0 for exact match, 1 for match with wildcards
  * in path1, 2 for match with wildcards in path2.
  */
 int
 xpt_path_comp(struct cam_path *path1, struct cam_path *path2)
 {
 	int retval = 0;
 
 	if (path1->bus != path2->bus) {
 		if (path1->bus->path_id == CAM_BUS_WILDCARD)
 			retval = 1;
 		else if (path2->bus->path_id == CAM_BUS_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	if (path1->target != path2->target) {
 		if (path1->target->target_id == CAM_TARGET_WILDCARD) {
 			if (retval == 0)
 				retval = 1;
 		} else if (path2->target->target_id == CAM_TARGET_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	if (path1->device != path2->device) {
 		if (path1->device->lun_id == CAM_LUN_WILDCARD) {
 			if (retval == 0)
 				retval = 1;
 		} else if (path2->device->lun_id == CAM_LUN_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	return (retval);
 }
 
 int
 xpt_path_comp_dev(struct cam_path *path, struct cam_ed *dev)
 {
 	int retval = 0;
 
 	if (path->bus != dev->target->bus) {
 		if (path->bus->path_id == CAM_BUS_WILDCARD)
 			retval = 1;
 		else if (dev->target->bus->path_id == CAM_BUS_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	if (path->target != dev->target) {
 		if (path->target->target_id == CAM_TARGET_WILDCARD) {
 			if (retval == 0)
 				retval = 1;
 		} else if (dev->target->target_id == CAM_TARGET_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	if (path->device != dev) {
 		if (path->device->lun_id == CAM_LUN_WILDCARD) {
 			if (retval == 0)
 				retval = 1;
 		} else if (dev->lun_id == CAM_LUN_WILDCARD)
 			retval = 2;
 		else
 			return (-1);
 	}
 	return (retval);
 }
 
 void
 xpt_print_path(struct cam_path *path)
 {
 	struct sbuf sb;
 	char buffer[XPT_PRINT_LEN];
 
 	sbuf_new(&sb, buffer, XPT_PRINT_LEN, SBUF_FIXEDLEN);
 	xpt_path_sbuf(path, &sb);
 	sbuf_finish(&sb);
 	printf("%s", sbuf_data(&sb));
 	sbuf_delete(&sb);
 }
 
 void
 xpt_print_device(struct cam_ed *device)
 {
 
 	if (device == NULL)
 		printf("(nopath): ");
 	else {
 		printf("(noperiph:%s%d:%d:%d:%jx): ", device->sim->sim_name,
 		       device->sim->unit_number,
 		       device->sim->bus_id,
 		       device->target->target_id,
 		       (uintmax_t)device->lun_id);
 	}
 }
 
 void
 xpt_print(struct cam_path *path, const char *fmt, ...)
 {
 	va_list ap;
 	struct sbuf sb;
 	char buffer[XPT_PRINT_LEN];
 
 	sbuf_new(&sb, buffer, XPT_PRINT_LEN, SBUF_FIXEDLEN);
 
 	xpt_path_sbuf(path, &sb);
 	va_start(ap, fmt);
 	sbuf_vprintf(&sb, fmt, ap);
 	va_end(ap);
 
 	sbuf_finish(&sb);
 	printf("%s", sbuf_data(&sb));
 	sbuf_delete(&sb);
 }
 
 int
 xpt_path_string(struct cam_path *path, char *str, size_t str_len)
 {
 	struct sbuf sb;
 	int len;
 
 	sbuf_new(&sb, str, str_len, 0);
 	len = xpt_path_sbuf(path, &sb);
 	sbuf_finish(&sb);
 	return (len);
 }
 
 int
 xpt_path_sbuf(struct cam_path *path, struct sbuf *sb)
 {
 
 	if (path == NULL)
 		sbuf_printf(sb, "(nopath): ");
 	else {
 		if (path->periph != NULL)
 			sbuf_printf(sb, "(%s%d:", path->periph->periph_name,
 				    path->periph->unit_number);
 		else
 			sbuf_printf(sb, "(noperiph:");
 
 		if (path->bus != NULL)
 			sbuf_printf(sb, "%s%d:%d:", path->bus->sim->sim_name,
 				    path->bus->sim->unit_number,
 				    path->bus->sim->bus_id);
 		else
 			sbuf_printf(sb, "nobus:");
 
 		if (path->target != NULL)
 			sbuf_printf(sb, "%d:", path->target->target_id);
 		else
 			sbuf_printf(sb, "X:");
 
 		if (path->device != NULL)
 			sbuf_printf(sb, "%jx): ",
 			    (uintmax_t)path->device->lun_id);
 		else
 			sbuf_printf(sb, "X): ");
 	}
 
 	return(sbuf_len(sb));
 }
 
 path_id_t
 xpt_path_path_id(struct cam_path *path)
 {
 	return(path->bus->path_id);
 }
 
 target_id_t
 xpt_path_target_id(struct cam_path *path)
 {
 	if (path->target != NULL)
 		return (path->target->target_id);
 	else
 		return (CAM_TARGET_WILDCARD);
 }
 
 lun_id_t
 xpt_path_lun_id(struct cam_path *path)
 {
 	if (path->device != NULL)
 		return (path->device->lun_id);
 	else
 		return (CAM_LUN_WILDCARD);
 }
 
 struct cam_sim *
 xpt_path_sim(struct cam_path *path)
 {
 
 	return (path->bus->sim);
 }
 
 struct cam_periph*
 xpt_path_periph(struct cam_path *path)
 {
 
 	return (path->periph);
 }
 
 /*
  * Release a CAM control block for the caller.  Remit the cost of the structure
  * to the device referenced by the path.  If the this device had no 'credits'
  * and peripheral drivers have registered async callbacks for this notification
  * call them now.
  */
 void
 xpt_release_ccb(union ccb *free_ccb)
 {
 	struct	 cam_ed *device;
 	struct	 cam_periph *periph;
 
 	CAM_DEBUG_PRINT(CAM_DEBUG_XPT, ("xpt_release_ccb\n"));
 	xpt_path_assert(free_ccb->ccb_h.path, MA_OWNED);
 	device = free_ccb->ccb_h.path->device;
 	periph = free_ccb->ccb_h.path->periph;
 
 	xpt_free_ccb(free_ccb);
 	periph->periph_allocated--;
 	cam_ccbq_release_opening(&device->ccbq);
 	xpt_run_allocq(periph, 0);
 }
 
 /* Functions accessed by SIM drivers */
 
 static struct xpt_xport_ops xport_default_ops = {
 	.alloc_device = xpt_alloc_device_default,
 	.action = xpt_action_default,
 	.async = xpt_dev_async_default,
 };
 static struct xpt_xport xport_default = {
 	.xport = XPORT_UNKNOWN,
 	.name = "unknown",
 	.ops = &xport_default_ops,
 };
 
 CAM_XPT_XPORT(xport_default);
 
 /*
  * A sim structure, listing the SIM entry points and instance
  * identification info is passed to xpt_bus_register to hook the SIM
  * into the CAM framework.  xpt_bus_register creates a cam_eb entry
  * for this new bus and places it in the array of buses and assigns
  * it a path_id.  The path_id may be influenced by "hard wiring"
  * information specified by the user.  Once interrupt services are
  * available, the bus will be probed.
  */
 int32_t
 xpt_bus_register(struct cam_sim *sim, device_t parent, u_int32_t bus)
 {
 	struct cam_eb *new_bus;
 	struct cam_eb *old_bus;
 	struct ccb_pathinq cpi;
 	struct cam_path *path;
 	cam_status status;
 
 	sim->bus_id = bus;
 	new_bus = (struct cam_eb *)malloc(sizeof(*new_bus),
 					  M_CAMXPT, M_NOWAIT|M_ZERO);
 	if (new_bus == NULL) {
 		/* Couldn't satisfy request */
 		return (CAM_RESRC_UNAVAIL);
 	}
 
 	mtx_init(&new_bus->eb_mtx, "CAM bus lock", NULL, MTX_DEF);
 	TAILQ_INIT(&new_bus->et_entries);
 	cam_sim_hold(sim);
 	new_bus->sim = sim;
 	timevalclear(&new_bus->last_reset);
 	new_bus->flags = 0;
 	new_bus->refcount = 1;	/* Held until a bus_deregister event */
 	new_bus->generation = 0;
 
 	xpt_lock_buses();
 	sim->path_id = new_bus->path_id =
 	    xptpathid(sim->sim_name, sim->unit_number, sim->bus_id);
 	old_bus = TAILQ_FIRST(&xsoftc.xpt_busses);
 	while (old_bus != NULL
 	    && old_bus->path_id < new_bus->path_id)
 		old_bus = TAILQ_NEXT(old_bus, links);
 	if (old_bus != NULL)
 		TAILQ_INSERT_BEFORE(old_bus, new_bus, links);
 	else
 		TAILQ_INSERT_TAIL(&xsoftc.xpt_busses, new_bus, links);
 	xsoftc.bus_generation++;
 	xpt_unlock_buses();
 
 	/*
 	 * Set a default transport so that a PATH_INQ can be issued to
 	 * the SIM.  This will then allow for probing and attaching of
 	 * a more appropriate transport.
 	 */
 	new_bus->xport = &xport_default;
 
 	status = xpt_create_path(&path, /*periph*/NULL, sim->path_id,
 				  CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
 	if (status != CAM_REQ_CMP) {
 		xpt_release_bus(new_bus);
 		return (CAM_RESRC_UNAVAIL);
 	}
 
 	xpt_path_inq(&cpi, path);
 
 	if (cpi.ccb_h.status == CAM_REQ_CMP) {
 		struct xpt_xport **xpt;
 
 		SET_FOREACH(xpt, cam_xpt_xport_set) {
 			if ((*xpt)->xport == cpi.transport) {
 				new_bus->xport = *xpt;
 				break;
 			}
 		}
 		if (new_bus->xport == NULL) {
 			xpt_print(path,
 			    "No transport found for %d\n", cpi.transport);
 			xpt_release_bus(new_bus);
 			free(path, M_CAMXPT);
 			return (CAM_RESRC_UNAVAIL);
 		}
 	}
 
 	/* Notify interested parties */
 	if (sim->path_id != CAM_XPT_PATH_ID) {
 
 		xpt_async(AC_PATH_REGISTERED, path, &cpi);
 		if ((cpi.hba_misc & PIM_NOSCAN) == 0) {
 			union	ccb *scan_ccb;
 
 			/* Initiate bus rescan. */
 			scan_ccb = xpt_alloc_ccb_nowait();
 			if (scan_ccb != NULL) {
 				scan_ccb->ccb_h.path = path;
 				scan_ccb->ccb_h.func_code = XPT_SCAN_BUS;
 				scan_ccb->crcn.flags = 0;
 				xpt_rescan(scan_ccb);
 			} else {
 				xpt_print(path,
 					  "Can't allocate CCB to scan bus\n");
 				xpt_free_path(path);
 			}
 		} else
 			xpt_free_path(path);
 	} else
 		xpt_free_path(path);
 	return (CAM_SUCCESS);
 }
 
 int32_t
 xpt_bus_deregister(path_id_t pathid)
 {
 	struct cam_path bus_path;
 	cam_status status;
 
 	status = xpt_compile_path(&bus_path, NULL, pathid,
 				  CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
 	if (status != CAM_REQ_CMP)
 		return (status);
 
 	xpt_async(AC_LOST_DEVICE, &bus_path, NULL);
 	xpt_async(AC_PATH_DEREGISTERED, &bus_path, NULL);
 
 	/* Release the reference count held while registered. */
 	xpt_release_bus(bus_path.bus);
 	xpt_release_path(&bus_path);
 
 	return (CAM_REQ_CMP);
 }
 
 static path_id_t
 xptnextfreepathid(void)
 {
 	struct cam_eb *bus;
 	path_id_t pathid;
 	const char *strval;
 
 	mtx_assert(&xsoftc.xpt_topo_lock, MA_OWNED);
 	pathid = 0;
 	bus = TAILQ_FIRST(&xsoftc.xpt_busses);
 retry:
 	/* Find an unoccupied pathid */
 	while (bus != NULL && bus->path_id <= pathid) {
 		if (bus->path_id == pathid)
 			pathid++;
 		bus = TAILQ_NEXT(bus, links);
 	}
 
 	/*
 	 * Ensure that this pathid is not reserved for
 	 * a bus that may be registered in the future.
 	 */
 	if (resource_string_value("scbus", pathid, "at", &strval) == 0) {
 		++pathid;
 		/* Start the search over */
 		goto retry;
 	}
 	return (pathid);
 }
 
 static path_id_t
 xptpathid(const char *sim_name, int sim_unit, int sim_bus)
 {
 	path_id_t pathid;
 	int i, dunit, val;
 	char buf[32];
 	const char *dname;
 
 	pathid = CAM_XPT_PATH_ID;
 	snprintf(buf, sizeof(buf), "%s%d", sim_name, sim_unit);
 	if (strcmp(buf, "xpt0") == 0 && sim_bus == 0)
 		return (pathid);
 	i = 0;
 	while ((resource_find_match(&i, &dname, &dunit, "at", buf)) == 0) {
 		if (strcmp(dname, "scbus")) {
 			/* Avoid a bit of foot shooting. */
 			continue;
 		}
 		if (dunit < 0)		/* unwired?! */
 			continue;
 		if (resource_int_value("scbus", dunit, "bus", &val) == 0) {
 			if (sim_bus == val) {
 				pathid = dunit;
 				break;
 			}
 		} else if (sim_bus == 0) {
 			/* Unspecified matches bus 0 */
 			pathid = dunit;
 			break;
 		} else {
 			printf("Ambiguous scbus configuration for %s%d "
 			       "bus %d, cannot wire down.  The kernel "
 			       "config entry for scbus%d should "
 			       "specify a controller bus.\n"
 			       "Scbus will be assigned dynamically.\n",
 			       sim_name, sim_unit, sim_bus, dunit);
 			break;
 		}
 	}
 
 	if (pathid == CAM_XPT_PATH_ID)
 		pathid = xptnextfreepathid();
 	return (pathid);
 }
 
 static const char *
 xpt_async_string(u_int32_t async_code)
 {
 
 	switch (async_code) {
 	case AC_BUS_RESET: return ("AC_BUS_RESET");
 	case AC_UNSOL_RESEL: return ("AC_UNSOL_RESEL");
 	case AC_SCSI_AEN: return ("AC_SCSI_AEN");
 	case AC_SENT_BDR: return ("AC_SENT_BDR");
 	case AC_PATH_REGISTERED: return ("AC_PATH_REGISTERED");
 	case AC_PATH_DEREGISTERED: return ("AC_PATH_DEREGISTERED");
 	case AC_FOUND_DEVICE: return ("AC_FOUND_DEVICE");
 	case AC_LOST_DEVICE: return ("AC_LOST_DEVICE");
 	case AC_TRANSFER_NEG: return ("AC_TRANSFER_NEG");
 	case AC_INQ_CHANGED: return ("AC_INQ_CHANGED");
 	case AC_GETDEV_CHANGED: return ("AC_GETDEV_CHANGED");
 	case AC_CONTRACT: return ("AC_CONTRACT");
 	case AC_ADVINFO_CHANGED: return ("AC_ADVINFO_CHANGED");
 	case AC_UNIT_ATTENTION: return ("AC_UNIT_ATTENTION");
 	}
 	return ("AC_UNKNOWN");
 }
 
 static int
 xpt_async_size(u_int32_t async_code)
 {
 
 	switch (async_code) {
 	case AC_BUS_RESET: return (0);
 	case AC_UNSOL_RESEL: return (0);
 	case AC_SCSI_AEN: return (0);
 	case AC_SENT_BDR: return (0);
 	case AC_PATH_REGISTERED: return (sizeof(struct ccb_pathinq));
 	case AC_PATH_DEREGISTERED: return (0);
 	case AC_FOUND_DEVICE: return (sizeof(struct ccb_getdev));
 	case AC_LOST_DEVICE: return (0);
 	case AC_TRANSFER_NEG: return (sizeof(struct ccb_trans_settings));
 	case AC_INQ_CHANGED: return (0);
 	case AC_GETDEV_CHANGED: return (0);
 	case AC_CONTRACT: return (sizeof(struct ac_contract));
 	case AC_ADVINFO_CHANGED: return (-1);
 	case AC_UNIT_ATTENTION: return (sizeof(struct ccb_scsiio));
 	}
 	return (0);
 }
 
 static int
 xpt_async_process_dev(struct cam_ed *device, void *arg)
 {
 	union ccb *ccb = arg;
 	struct cam_path *path = ccb->ccb_h.path;
 	void *async_arg = ccb->casync.async_arg_ptr;
 	u_int32_t async_code = ccb->casync.async_code;
 	int relock;
 
 	if (path->device != device
 	 && path->device->lun_id != CAM_LUN_WILDCARD
 	 && device->lun_id != CAM_LUN_WILDCARD)
 		return (1);
 
 	/*
 	 * The async callback could free the device.
 	 * If it is a broadcast async, it doesn't hold
 	 * device reference, so take our own reference.
 	 */
 	xpt_acquire_device(device);
 
 	/*
 	 * If async for specific device is to be delivered to
 	 * the wildcard client, take the specific device lock.
 	 * XXX: We may need a way for client to specify it.
 	 */
 	if ((device->lun_id == CAM_LUN_WILDCARD &&
 	     path->device->lun_id != CAM_LUN_WILDCARD) ||
 	    (device->target->target_id == CAM_TARGET_WILDCARD &&
 	     path->target->target_id != CAM_TARGET_WILDCARD) ||
 	    (device->target->bus->path_id == CAM_BUS_WILDCARD &&
 	     path->target->bus->path_id != CAM_BUS_WILDCARD)) {
 		mtx_unlock(&device->device_mtx);
 		xpt_path_lock(path);
 		relock = 1;
 	} else
 		relock = 0;
 
 	(*(device->target->bus->xport->ops->async))(async_code,
 	    device->target->bus, device->target, device, async_arg);
 	xpt_async_bcast(&device->asyncs, async_code, path, async_arg);
 
 	if (relock) {
 		xpt_path_unlock(path);
 		mtx_lock(&device->device_mtx);
 	}
 	xpt_release_device(device);
 	return (1);
 }
 
 static int
 xpt_async_process_tgt(struct cam_et *target, void *arg)
 {
 	union ccb *ccb = arg;
 	struct cam_path *path = ccb->ccb_h.path;
 
 	if (path->target != target
 	 && path->target->target_id != CAM_TARGET_WILDCARD
 	 && target->target_id != CAM_TARGET_WILDCARD)
 		return (1);
 
 	if (ccb->casync.async_code == AC_SENT_BDR) {
 		/* Update our notion of when the last reset occurred */
 		microtime(&target->last_reset);
 	}
 
 	return (xptdevicetraverse(target, NULL, xpt_async_process_dev, ccb));
 }
 
 static void
 xpt_async_process(struct cam_periph *periph, union ccb *ccb)
 {
 	struct cam_eb *bus;
 	struct cam_path *path;
 	void *async_arg;
 	u_int32_t async_code;
 
 	path = ccb->ccb_h.path;
 	async_code = ccb->casync.async_code;
 	async_arg = ccb->casync.async_arg_ptr;
 	CAM_DEBUG(path, CAM_DEBUG_TRACE | CAM_DEBUG_INFO,
 	    ("xpt_async(%s)\n", xpt_async_string(async_code)));
 	bus = path->bus;
 
 	if (async_code == AC_BUS_RESET) {
 		/* Update our notion of when the last reset occurred */
 		microtime(&bus->last_reset);
 	}
 
 	xpttargettraverse(bus, NULL, xpt_async_process_tgt, ccb);
 
 	/*
 	 * If this wasn't a fully wildcarded async, tell all
 	 * clients that want all async events.
 	 */
 	if (bus != xpt_periph->path->bus) {
 		xpt_path_lock(xpt_periph->path);
 		xpt_async_process_dev(xpt_periph->path->device, ccb);
 		xpt_path_unlock(xpt_periph->path);
 	}
 
 	if (path->device != NULL && path->device->lun_id != CAM_LUN_WILDCARD)
 		xpt_release_devq(path, 1, TRUE);
 	else
 		xpt_release_simq(path->bus->sim, TRUE);
 	if (ccb->casync.async_arg_size > 0)
 		free(async_arg, M_CAMXPT);
 	xpt_free_path(path);
 	xpt_free_ccb(ccb);
 }
 
 static void
 xpt_async_bcast(struct async_list *async_head,
 		u_int32_t async_code,
 		struct cam_path *path, void *async_arg)
 {
 	struct async_node *cur_entry;
 	struct mtx *mtx;
 
 	cur_entry = SLIST_FIRST(async_head);
 	while (cur_entry != NULL) {
 		struct async_node *next_entry;
 		/*
 		 * Grab the next list entry before we call the current
 		 * entry's callback.  This is because the callback function
 		 * can delete its async callback entry.
 		 */
 		next_entry = SLIST_NEXT(cur_entry, links);
 		if ((cur_entry->event_enable & async_code) != 0) {
 			mtx = cur_entry->event_lock ?
 			    path->device->sim->mtx : NULL;
 			if (mtx)
 				mtx_lock(mtx);
 			cur_entry->callback(cur_entry->callback_arg,
 					    async_code, path,
 					    async_arg);
 			if (mtx)
 				mtx_unlock(mtx);
 		}
 		cur_entry = next_entry;
 	}
 }
 
 void
 xpt_async(u_int32_t async_code, struct cam_path *path, void *async_arg)
 {
 	union ccb *ccb;
 	int size;
 
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		xpt_print(path, "Can't allocate CCB to send %s\n",
 		    xpt_async_string(async_code));
 		return;
 	}
 
 	if (xpt_clone_path(&ccb->ccb_h.path, path) != CAM_REQ_CMP) {
 		xpt_print(path, "Can't allocate path to send %s\n",
 		    xpt_async_string(async_code));
 		xpt_free_ccb(ccb);
 		return;
 	}
 	ccb->ccb_h.path->periph = NULL;
 	ccb->ccb_h.func_code = XPT_ASYNC;
 	ccb->ccb_h.cbfcnp = xpt_async_process;
 	ccb->ccb_h.flags |= CAM_UNLOCKED;
 	ccb->casync.async_code = async_code;
 	ccb->casync.async_arg_size = 0;
 	size = xpt_async_size(async_code);
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_async: func %#x %s aync_code %d %s\n",
 		ccb->ccb_h.func_code,
 		xpt_action_name(ccb->ccb_h.func_code),
 		async_code,
 		xpt_async_string(async_code)));
 	if (size > 0 && async_arg != NULL) {
 		ccb->casync.async_arg_ptr = malloc(size, M_CAMXPT, M_NOWAIT);
 		if (ccb->casync.async_arg_ptr == NULL) {
 			xpt_print(path, "Can't allocate argument to send %s\n",
 			    xpt_async_string(async_code));
 			xpt_free_path(ccb->ccb_h.path);
 			xpt_free_ccb(ccb);
 			return;
 		}
 		memcpy(ccb->casync.async_arg_ptr, async_arg, size);
 		ccb->casync.async_arg_size = size;
 	} else if (size < 0) {
 		ccb->casync.async_arg_ptr = async_arg;
 		ccb->casync.async_arg_size = size;
 	}
 	if (path->device != NULL && path->device->lun_id != CAM_LUN_WILDCARD)
 		xpt_freeze_devq(path, 1);
 	else
 		xpt_freeze_simq(path->bus->sim, 1);
 	xpt_done(ccb);
 }
 
 static void
 xpt_dev_async_default(u_int32_t async_code, struct cam_eb *bus,
 		      struct cam_et *target, struct cam_ed *device,
 		      void *async_arg)
 {
 
 	/*
 	 * We only need to handle events for real devices.
 	 */
 	if (target->target_id == CAM_TARGET_WILDCARD
 	 || device->lun_id == CAM_LUN_WILDCARD)
 		return;
 
 	printf("%s called\n", __func__);
 }
 
 static uint32_t
 xpt_freeze_devq_device(struct cam_ed *dev, u_int count)
 {
 	struct cam_devq	*devq;
 	uint32_t freeze;
 
 	devq = dev->sim->devq;
 	mtx_assert(&devq->send_mtx, MA_OWNED);
 	CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE,
 	    ("xpt_freeze_devq_device(%d) %u->%u\n", count,
 	    dev->ccbq.queue.qfrozen_cnt, dev->ccbq.queue.qfrozen_cnt + count));
 	freeze = (dev->ccbq.queue.qfrozen_cnt += count);
 	/* Remove frozen device from sendq. */
 	if (device_is_queued(dev))
 		camq_remove(&devq->send_queue, dev->devq_entry.index);
 	return (freeze);
 }
 
 u_int32_t
 xpt_freeze_devq(struct cam_path *path, u_int count)
 {
 	struct cam_ed	*dev = path->device;
 	struct cam_devq	*devq;
 	uint32_t	 freeze;
 
 	devq = dev->sim->devq;
 	mtx_lock(&devq->send_mtx);
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_freeze_devq(%d)\n", count));
 	freeze = xpt_freeze_devq_device(dev, count);
 	mtx_unlock(&devq->send_mtx);
 	return (freeze);
 }
 
 u_int32_t
 xpt_freeze_simq(struct cam_sim *sim, u_int count)
 {
 	struct cam_devq	*devq;
 	uint32_t	 freeze;
 
 	devq = sim->devq;
 	mtx_lock(&devq->send_mtx);
 	freeze = (devq->send_queue.qfrozen_cnt += count);
 	mtx_unlock(&devq->send_mtx);
 	return (freeze);
 }
 
 static void
 xpt_release_devq_timeout(void *arg)
 {
 	struct cam_ed *dev;
 	struct cam_devq *devq;
 
 	dev = (struct cam_ed *)arg;
 	CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE, ("xpt_release_devq_timeout\n"));
 	devq = dev->sim->devq;
 	mtx_assert(&devq->send_mtx, MA_OWNED);
 	if (xpt_release_devq_device(dev, /*count*/1, /*run_queue*/TRUE))
 		xpt_run_devq(devq);
 }
 
 void
 xpt_release_devq(struct cam_path *path, u_int count, int run_queue)
 {
 	struct cam_ed *dev;
 	struct cam_devq *devq;
 
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_release_devq(%d, %d)\n",
 	    count, run_queue));
 	dev = path->device;
 	devq = dev->sim->devq;
 	mtx_lock(&devq->send_mtx);
 	if (xpt_release_devq_device(dev, count, run_queue))
 		xpt_run_devq(dev->sim->devq);
 	mtx_unlock(&devq->send_mtx);
 }
 
 static int
 xpt_release_devq_device(struct cam_ed *dev, u_int count, int run_queue)
 {
 
 	mtx_assert(&dev->sim->devq->send_mtx, MA_OWNED);
 	CAM_DEBUG_DEV(dev, CAM_DEBUG_TRACE,
 	    ("xpt_release_devq_device(%d, %d) %u->%u\n", count, run_queue,
 	    dev->ccbq.queue.qfrozen_cnt, dev->ccbq.queue.qfrozen_cnt - count));
 	if (count > dev->ccbq.queue.qfrozen_cnt) {
 #ifdef INVARIANTS
 		printf("xpt_release_devq(): requested %u > present %u\n",
 		    count, dev->ccbq.queue.qfrozen_cnt);
 #endif
 		count = dev->ccbq.queue.qfrozen_cnt;
 	}
 	dev->ccbq.queue.qfrozen_cnt -= count;
 	if (dev->ccbq.queue.qfrozen_cnt == 0) {
 		/*
 		 * No longer need to wait for a successful
 		 * command completion.
 		 */
 		dev->flags &= ~CAM_DEV_REL_ON_COMPLETE;
 		/*
 		 * Remove any timeouts that might be scheduled
 		 * to release this queue.
 		 */
 		if ((dev->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0) {
 			callout_stop(&dev->callout);
 			dev->flags &= ~CAM_DEV_REL_TIMEOUT_PENDING;
 		}
 		/*
 		 * Now that we are unfrozen schedule the
 		 * device so any pending transactions are
 		 * run.
 		 */
 		xpt_schedule_devq(dev->sim->devq, dev);
 	} else
 		run_queue = 0;
 	return (run_queue);
 }
 
 void
 xpt_release_simq(struct cam_sim *sim, int run_queue)
 {
 	struct cam_devq	*devq;
 
 	devq = sim->devq;
 	mtx_lock(&devq->send_mtx);
 	if (devq->send_queue.qfrozen_cnt <= 0) {
 #ifdef INVARIANTS
 		printf("xpt_release_simq: requested 1 > present %u\n",
 		    devq->send_queue.qfrozen_cnt);
 #endif
 	} else
 		devq->send_queue.qfrozen_cnt--;
 	if (devq->send_queue.qfrozen_cnt == 0) {
 		/*
 		 * If there is a timeout scheduled to release this
 		 * sim queue, remove it.  The queue frozen count is
 		 * already at 0.
 		 */
 		if ((sim->flags & CAM_SIM_REL_TIMEOUT_PENDING) != 0){
 			callout_stop(&sim->callout);
 			sim->flags &= ~CAM_SIM_REL_TIMEOUT_PENDING;
 		}
 		if (run_queue) {
 			/*
 			 * Now that we are unfrozen run the send queue.
 			 */
 			xpt_run_devq(sim->devq);
 		}
 	}
 	mtx_unlock(&devq->send_mtx);
 }
 
 /*
  * XXX Appears to be unused.
  */
 static void
 xpt_release_simq_timeout(void *arg)
 {
 	struct cam_sim *sim;
 
 	sim = (struct cam_sim *)arg;
 	xpt_release_simq(sim, /* run_queue */ TRUE);
 }
 
 void
 xpt_done(union ccb *done_ccb)
 {
 	struct cam_doneq *queue;
 	int	run, hash;
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 	if (done_ccb->ccb_h.func_code == XPT_SCSI_IO &&
 	    done_ccb->csio.bio != NULL)
 		biotrack(done_ccb->csio.bio, __func__);
 #endif
 
 	CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_done: func= %#x %s status %#x\n",
 		done_ccb->ccb_h.func_code,
 		xpt_action_name(done_ccb->ccb_h.func_code),
 		done_ccb->ccb_h.status));
 	if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0)
 		return;
 
 	/* Store the time the ccb was in the sim */
 	done_ccb->ccb_h.qos.periph_data = cam_iosched_delta_t(done_ccb->ccb_h.qos.periph_data);
 	hash = (done_ccb->ccb_h.path_id + done_ccb->ccb_h.target_id +
 	    done_ccb->ccb_h.target_lun) % cam_num_doneqs;
 	queue = &cam_doneqs[hash];
 	mtx_lock(&queue->cam_doneq_mtx);
 	run = (queue->cam_doneq_sleep && STAILQ_EMPTY(&queue->cam_doneq));
 	STAILQ_INSERT_TAIL(&queue->cam_doneq, &done_ccb->ccb_h, sim_links.stqe);
 	done_ccb->ccb_h.pinfo.index = CAM_DONEQ_INDEX;
 	mtx_unlock(&queue->cam_doneq_mtx);
 	if (run)
 		wakeup(&queue->cam_doneq);
 }
 
 void
 xpt_done_direct(union ccb *done_ccb)
 {
 
 	CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_done_direct: status %#x\n", done_ccb->ccb_h.status));
 	if ((done_ccb->ccb_h.func_code & XPT_FC_QUEUED) == 0)
 		return;
 
 	/* Store the time the ccb was in the sim */
 	done_ccb->ccb_h.qos.periph_data = cam_iosched_delta_t(done_ccb->ccb_h.qos.periph_data);
 	xpt_done_process(&done_ccb->ccb_h);
 }
 
 union ccb *
 xpt_alloc_ccb()
 {
 	union ccb *new_ccb;
 
 	new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_WAITOK);
 	return (new_ccb);
 }
 
 union ccb *
 xpt_alloc_ccb_nowait()
 {
 	union ccb *new_ccb;
 
 	new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_NOWAIT);
 	return (new_ccb);
 }
 
 void
 xpt_free_ccb(union ccb *free_ccb)
 {
 	free(free_ccb, M_CAMCCB);
 }
 
 
 
 /* Private XPT functions */
 
 /*
  * Get a CAM control block for the caller. Charge the structure to the device
  * referenced by the path.  If we don't have sufficient resources to allocate
  * more ccbs, we return NULL.
  */
 static union ccb *
 xpt_get_ccb_nowait(struct cam_periph *periph)
 {
 	union ccb *new_ccb;
 
 	new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_NOWAIT);
 	if (new_ccb == NULL)
 		return (NULL);
 	periph->periph_allocated++;
 	cam_ccbq_take_opening(&periph->path->device->ccbq);
 	return (new_ccb);
 }
 
 static union ccb *
 xpt_get_ccb(struct cam_periph *periph)
 {
 	union ccb *new_ccb;
 
 	cam_periph_unlock(periph);
 	new_ccb = malloc(sizeof(*new_ccb), M_CAMCCB, M_ZERO|M_WAITOK);
 	cam_periph_lock(periph);
 	periph->periph_allocated++;
 	cam_ccbq_take_opening(&periph->path->device->ccbq);
 	return (new_ccb);
 }
 
 union ccb *
 cam_periph_getccb(struct cam_periph *periph, u_int32_t priority)
 {
 	struct ccb_hdr *ccb_h;
 
 	CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("cam_periph_getccb\n"));
 	cam_periph_assert(periph, MA_OWNED);
 	while ((ccb_h = SLIST_FIRST(&periph->ccb_list)) == NULL ||
 	    ccb_h->pinfo.priority != priority) {
 		if (priority < periph->immediate_priority) {
 			periph->immediate_priority = priority;
 			xpt_run_allocq(periph, 0);
 		} else
 			cam_periph_sleep(periph, &periph->ccb_list, PRIBIO,
 			    "cgticb", 0);
 	}
 	SLIST_REMOVE_HEAD(&periph->ccb_list, periph_links.sle);
 	return ((union ccb *)ccb_h);
 }
 
 static void
 xpt_acquire_bus(struct cam_eb *bus)
 {
 
 	xpt_lock_buses();
 	bus->refcount++;
 	xpt_unlock_buses();
 }
 
 static void
 xpt_release_bus(struct cam_eb *bus)
 {
 
 	xpt_lock_buses();
 	KASSERT(bus->refcount >= 1, ("bus->refcount >= 1"));
 	if (--bus->refcount > 0) {
 		xpt_unlock_buses();
 		return;
 	}
 	TAILQ_REMOVE(&xsoftc.xpt_busses, bus, links);
 	xsoftc.bus_generation++;
 	xpt_unlock_buses();
 	KASSERT(TAILQ_EMPTY(&bus->et_entries),
 	    ("destroying bus, but target list is not empty"));
 	cam_sim_release(bus->sim);
 	mtx_destroy(&bus->eb_mtx);
 	free(bus, M_CAMXPT);
 }
 
 static struct cam_et *
 xpt_alloc_target(struct cam_eb *bus, target_id_t target_id)
 {
 	struct cam_et *cur_target, *target;
 
 	mtx_assert(&xsoftc.xpt_topo_lock, MA_OWNED);
 	mtx_assert(&bus->eb_mtx, MA_OWNED);
 	target = (struct cam_et *)malloc(sizeof(*target), M_CAMXPT,
 					 M_NOWAIT|M_ZERO);
 	if (target == NULL)
 		return (NULL);
 
 	TAILQ_INIT(&target->ed_entries);
 	target->bus = bus;
 	target->target_id = target_id;
 	target->refcount = 1;
 	target->generation = 0;
 	target->luns = NULL;
 	mtx_init(&target->luns_mtx, "CAM LUNs lock", NULL, MTX_DEF);
 	timevalclear(&target->last_reset);
 	/*
 	 * Hold a reference to our parent bus so it
 	 * will not go away before we do.
 	 */
 	bus->refcount++;
 
 	/* Insertion sort into our bus's target list */
 	cur_target = TAILQ_FIRST(&bus->et_entries);
 	while (cur_target != NULL && cur_target->target_id < target_id)
 		cur_target = TAILQ_NEXT(cur_target, links);
 	if (cur_target != NULL) {
 		TAILQ_INSERT_BEFORE(cur_target, target, links);
 	} else {
 		TAILQ_INSERT_TAIL(&bus->et_entries, target, links);
 	}
 	bus->generation++;
 	return (target);
 }
 
 static void
 xpt_acquire_target(struct cam_et *target)
 {
 	struct cam_eb *bus = target->bus;
 
 	mtx_lock(&bus->eb_mtx);
 	target->refcount++;
 	mtx_unlock(&bus->eb_mtx);
 }
 
 static void
 xpt_release_target(struct cam_et *target)
 {
 	struct cam_eb *bus = target->bus;
 
 	mtx_lock(&bus->eb_mtx);
 	if (--target->refcount > 0) {
 		mtx_unlock(&bus->eb_mtx);
 		return;
 	}
 	TAILQ_REMOVE(&bus->et_entries, target, links);
 	bus->generation++;
 	mtx_unlock(&bus->eb_mtx);
 	KASSERT(TAILQ_EMPTY(&target->ed_entries),
 	    ("destroying target, but device list is not empty"));
 	xpt_release_bus(bus);
 	mtx_destroy(&target->luns_mtx);
 	if (target->luns)
 		free(target->luns, M_CAMXPT);
 	free(target, M_CAMXPT);
 }
 
 static struct cam_ed *
 xpt_alloc_device_default(struct cam_eb *bus, struct cam_et *target,
 			 lun_id_t lun_id)
 {
 	struct cam_ed *device;
 
 	device = xpt_alloc_device(bus, target, lun_id);
 	if (device == NULL)
 		return (NULL);
 
 	device->mintags = 1;
 	device->maxtags = 1;
 	return (device);
 }
 
 static void
 xpt_destroy_device(void *context, int pending)
 {
 	struct cam_ed	*device = context;
 
 	mtx_lock(&device->device_mtx);
 	mtx_destroy(&device->device_mtx);
 	free(device, M_CAMDEV);
 }
 
 struct cam_ed *
 xpt_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id)
 {
 	struct cam_ed	*cur_device, *device;
 	struct cam_devq	*devq;
 	cam_status status;
 
 	mtx_assert(&bus->eb_mtx, MA_OWNED);
 	/* Make space for us in the device queue on our bus */
 	devq = bus->sim->devq;
 	mtx_lock(&devq->send_mtx);
 	status = cam_devq_resize(devq, devq->send_queue.array_size + 1);
 	mtx_unlock(&devq->send_mtx);
 	if (status != CAM_REQ_CMP)
 		return (NULL);
 
 	device = (struct cam_ed *)malloc(sizeof(*device),
 					 M_CAMDEV, M_NOWAIT|M_ZERO);
 	if (device == NULL)
 		return (NULL);
 
 	cam_init_pinfo(&device->devq_entry);
 	device->target = target;
 	device->lun_id = lun_id;
 	device->sim = bus->sim;
 	if (cam_ccbq_init(&device->ccbq,
 			  bus->sim->max_dev_openings) != 0) {
 		free(device, M_CAMDEV);
 		return (NULL);
 	}
 	SLIST_INIT(&device->asyncs);
 	SLIST_INIT(&device->periphs);
 	device->generation = 0;
 	device->flags = CAM_DEV_UNCONFIGURED;
 	device->tag_delay_count = 0;
 	device->tag_saved_openings = 0;
 	device->refcount = 1;
 	mtx_init(&device->device_mtx, "CAM device lock", NULL, MTX_DEF);
 	callout_init_mtx(&device->callout, &devq->send_mtx, 0);
 	TASK_INIT(&device->device_destroy_task, 0, xpt_destroy_device, device);
 	/*
 	 * Hold a reference to our parent bus so it
 	 * will not go away before we do.
 	 */
 	target->refcount++;
 
 	cur_device = TAILQ_FIRST(&target->ed_entries);
 	while (cur_device != NULL && cur_device->lun_id < lun_id)
 		cur_device = TAILQ_NEXT(cur_device, links);
 	if (cur_device != NULL)
 		TAILQ_INSERT_BEFORE(cur_device, device, links);
 	else
 		TAILQ_INSERT_TAIL(&target->ed_entries, device, links);
 	target->generation++;
 	return (device);
 }
 
 void
 xpt_acquire_device(struct cam_ed *device)
 {
 	struct cam_eb *bus = device->target->bus;
 
 	mtx_lock(&bus->eb_mtx);
 	device->refcount++;
 	mtx_unlock(&bus->eb_mtx);
 }
 
 void
 xpt_release_device(struct cam_ed *device)
 {
 	struct cam_eb *bus = device->target->bus;
 	struct cam_devq *devq;
 
 	mtx_lock(&bus->eb_mtx);
 	if (--device->refcount > 0) {
 		mtx_unlock(&bus->eb_mtx);
 		return;
 	}
 
 	TAILQ_REMOVE(&device->target->ed_entries, device,links);
 	device->target->generation++;
 	mtx_unlock(&bus->eb_mtx);
 
 	/* Release our slot in the devq */
 	devq = bus->sim->devq;
 	mtx_lock(&devq->send_mtx);
 	cam_devq_resize(devq, devq->send_queue.array_size - 1);
 	mtx_unlock(&devq->send_mtx);
 
 	KASSERT(SLIST_EMPTY(&device->periphs),
 	    ("destroying device, but periphs list is not empty"));
 	KASSERT(device->devq_entry.index == CAM_UNQUEUED_INDEX,
 	    ("destroying device while still queued for ccbs"));
 
 	if ((device->flags & CAM_DEV_REL_TIMEOUT_PENDING) != 0)
 		callout_stop(&device->callout);
 
 	xpt_release_target(device->target);
 
 	cam_ccbq_fini(&device->ccbq);
 	/*
 	 * Free allocated memory.  free(9) does nothing if the
 	 * supplied pointer is NULL, so it is safe to call without
 	 * checking.
 	 */
 	free(device->supported_vpds, M_CAMXPT);
 	free(device->device_id, M_CAMXPT);
 	free(device->ext_inq, M_CAMXPT);
 	free(device->physpath, M_CAMXPT);
 	free(device->rcap_buf, M_CAMXPT);
 	free(device->serial_num, M_CAMXPT);
+	free(device->nvme_data, M_CAMXPT);
+	free(device->nvme_cdata, M_CAMXPT);
 	taskqueue_enqueue(xsoftc.xpt_taskq, &device->device_destroy_task);
 }
 
 u_int32_t
 xpt_dev_ccbq_resize(struct cam_path *path, int newopenings)
 {
 	int	result;
 	struct	cam_ed *dev;
 
 	dev = path->device;
 	mtx_lock(&dev->sim->devq->send_mtx);
 	result = cam_ccbq_resize(&dev->ccbq, newopenings);
 	mtx_unlock(&dev->sim->devq->send_mtx);
 	if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0
 	 || (dev->inq_flags & SID_CmdQue) != 0)
 		dev->tag_saved_openings = newopenings;
 	return (result);
 }
 
 static struct cam_eb *
 xpt_find_bus(path_id_t path_id)
 {
 	struct cam_eb *bus;
 
 	xpt_lock_buses();
 	for (bus = TAILQ_FIRST(&xsoftc.xpt_busses);
 	     bus != NULL;
 	     bus = TAILQ_NEXT(bus, links)) {
 		if (bus->path_id == path_id) {
 			bus->refcount++;
 			break;
 		}
 	}
 	xpt_unlock_buses();
 	return (bus);
 }
 
 static struct cam_et *
 xpt_find_target(struct cam_eb *bus, target_id_t	target_id)
 {
 	struct cam_et *target;
 
 	mtx_assert(&bus->eb_mtx, MA_OWNED);
 	for (target = TAILQ_FIRST(&bus->et_entries);
 	     target != NULL;
 	     target = TAILQ_NEXT(target, links)) {
 		if (target->target_id == target_id) {
 			target->refcount++;
 			break;
 		}
 	}
 	return (target);
 }
 
 static struct cam_ed *
 xpt_find_device(struct cam_et *target, lun_id_t lun_id)
 {
 	struct cam_ed *device;
 
 	mtx_assert(&target->bus->eb_mtx, MA_OWNED);
 	for (device = TAILQ_FIRST(&target->ed_entries);
 	     device != NULL;
 	     device = TAILQ_NEXT(device, links)) {
 		if (device->lun_id == lun_id) {
 			device->refcount++;
 			break;
 		}
 	}
 	return (device);
 }
 
 void
 xpt_start_tags(struct cam_path *path)
 {
 	struct ccb_relsim crs;
 	struct cam_ed *device;
 	struct cam_sim *sim;
 	int    newopenings;
 
 	device = path->device;
 	sim = path->bus->sim;
 	device->flags &= ~CAM_DEV_TAG_AFTER_COUNT;
 	xpt_freeze_devq(path, /*count*/1);
 	device->inq_flags |= SID_CmdQue;
 	if (device->tag_saved_openings != 0)
 		newopenings = device->tag_saved_openings;
 	else
 		newopenings = min(device->maxtags,
 				  sim->max_tagged_dev_openings);
 	xpt_dev_ccbq_resize(path, newopenings);
 	xpt_async(AC_GETDEV_CHANGED, path, NULL);
 	xpt_setup_ccb(&crs.ccb_h, path, CAM_PRIORITY_NORMAL);
 	crs.ccb_h.func_code = XPT_REL_SIMQ;
 	crs.release_flags = RELSIM_RELEASE_AFTER_QEMPTY;
 	crs.openings
 	    = crs.release_timeout
 	    = crs.qfrozen_cnt
 	    = 0;
 	xpt_action((union ccb *)&crs);
 }
 
 void
 xpt_stop_tags(struct cam_path *path)
 {
 	struct ccb_relsim crs;
 	struct cam_ed *device;
 	struct cam_sim *sim;
 
 	device = path->device;
 	sim = path->bus->sim;
 	device->flags &= ~CAM_DEV_TAG_AFTER_COUNT;
 	device->tag_delay_count = 0;
 	xpt_freeze_devq(path, /*count*/1);
 	device->inq_flags &= ~SID_CmdQue;
 	xpt_dev_ccbq_resize(path, sim->max_dev_openings);
 	xpt_async(AC_GETDEV_CHANGED, path, NULL);
 	xpt_setup_ccb(&crs.ccb_h, path, CAM_PRIORITY_NORMAL);
 	crs.ccb_h.func_code = XPT_REL_SIMQ;
 	crs.release_flags = RELSIM_RELEASE_AFTER_QEMPTY;
 	crs.openings
 	    = crs.release_timeout
 	    = crs.qfrozen_cnt
 	    = 0;
 	xpt_action((union ccb *)&crs);
 }
 
 static void
 xpt_boot_delay(void *arg)
 {
 
 	xpt_release_boot();
 }
 
 static void
 xpt_config(void *arg)
 {
 	/*
 	 * Now that interrupts are enabled, go find our devices
 	 */
 	if (taskqueue_start_threads(&xsoftc.xpt_taskq, 1, PRIBIO, "CAM taskq"))
 		printf("xpt_config: failed to create taskqueue thread.\n");
 
 	/* Setup debugging path */
 	if (cam_dflags != CAM_DEBUG_NONE) {
 		if (xpt_create_path(&cam_dpath, NULL,
 				    CAM_DEBUG_BUS, CAM_DEBUG_TARGET,
 				    CAM_DEBUG_LUN) != CAM_REQ_CMP) {
 			printf("xpt_config: xpt_create_path() failed for debug"
 			       " target %d:%d:%d, debugging disabled\n",
 			       CAM_DEBUG_BUS, CAM_DEBUG_TARGET, CAM_DEBUG_LUN);
 			cam_dflags = CAM_DEBUG_NONE;
 		}
 	} else
 		cam_dpath = NULL;
 
 	periphdriver_init(1);
 	xpt_hold_boot();
 	callout_init(&xsoftc.boot_callout, 1);
 	callout_reset_sbt(&xsoftc.boot_callout, SBT_1MS * xsoftc.boot_delay, 0,
 	    xpt_boot_delay, NULL, 0);
 	/* Fire up rescan thread. */
 	if (kproc_kthread_add(xpt_scanner_thread, NULL, &cam_proc, NULL, 0, 0,
 	    "cam", "scanner")) {
 		printf("xpt_config: failed to create rescan thread.\n");
 	}
 }
 
 void
 xpt_hold_boot(void)
 {
 	xpt_lock_buses();
 	xsoftc.buses_to_config++;
 	xpt_unlock_buses();
 }
 
 void
 xpt_release_boot(void)
 {
 	xpt_lock_buses();
 	xsoftc.buses_to_config--;
 	if (xsoftc.buses_to_config == 0 && xsoftc.buses_config_done == 0) {
 		struct	xpt_task *task;
 
 		xsoftc.buses_config_done = 1;
 		xpt_unlock_buses();
 		/* Call manually because we don't have any buses */
 		task = malloc(sizeof(struct xpt_task), M_CAMXPT, M_NOWAIT);
 		if (task != NULL) {
 			TASK_INIT(&task->task, 0, xpt_finishconfig_task, task);
 			taskqueue_enqueue(taskqueue_thread, &task->task);
 		}
 	} else
 		xpt_unlock_buses();
 }
 
 /*
  * If the given device only has one peripheral attached to it, and if that
  * peripheral is the passthrough driver, announce it.  This insures that the
  * user sees some sort of announcement for every peripheral in their system.
  */
 static int
 xptpassannouncefunc(struct cam_ed *device, void *arg)
 {
 	struct cam_periph *periph;
 	int i;
 
 	for (periph = SLIST_FIRST(&device->periphs), i = 0; periph != NULL;
 	     periph = SLIST_NEXT(periph, periph_links), i++);
 
 	periph = SLIST_FIRST(&device->periphs);
 	if ((i == 1)
 	 && (strncmp(periph->periph_name, "pass", 4) == 0))
 		xpt_announce_periph(periph, NULL);
 
 	return(1);
 }
 
 static void
 xpt_finishconfig_task(void *context, int pending)
 {
 
 	periphdriver_init(2);
 	/*
 	 * Check for devices with no "standard" peripheral driver
 	 * attached.  For any devices like that, announce the
 	 * passthrough driver so the user will see something.
 	 */
 	if (!bootverbose)
 		xpt_for_all_devices(xptpassannouncefunc, NULL);
 
 	/* Release our hook so that the boot can continue. */
 	config_intrhook_disestablish(xsoftc.xpt_config_hook);
 	free(xsoftc.xpt_config_hook, M_CAMXPT);
 	xsoftc.xpt_config_hook = NULL;
 
 	free(context, M_CAMXPT);
 }
 
 cam_status
 xpt_register_async(int event, ac_callback_t *cbfunc, void *cbarg,
 		   struct cam_path *path)
 {
 	struct ccb_setasync csa;
 	cam_status status;
 	int xptpath = 0;
 
 	if (path == NULL) {
 		status = xpt_create_path(&path, /*periph*/NULL, CAM_XPT_PATH_ID,
 					 CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD);
 		if (status != CAM_REQ_CMP)
 			return (status);
 		xpt_path_lock(path);
 		xptpath = 1;
 	}
 
 	xpt_setup_ccb(&csa.ccb_h, path, CAM_PRIORITY_NORMAL);
 	csa.ccb_h.func_code = XPT_SASYNC_CB;
 	csa.event_enable = event;
 	csa.callback = cbfunc;
 	csa.callback_arg = cbarg;
 	xpt_action((union ccb *)&csa);
 	status = csa.ccb_h.status;
 
 	CAM_DEBUG(csa.ccb_h.path, CAM_DEBUG_TRACE,
 	    ("xpt_register_async: func %p\n", cbfunc));
 
 	if (xptpath) {
 		xpt_path_unlock(path);
 		xpt_free_path(path);
 	}
 
 	if ((status == CAM_REQ_CMP) &&
 	    (csa.event_enable & AC_FOUND_DEVICE)) {
 		/*
 		 * Get this peripheral up to date with all
 		 * the currently existing devices.
 		 */
 		xpt_for_all_devices(xptsetasyncfunc, &csa);
 	}
 	if ((status == CAM_REQ_CMP) &&
 	    (csa.event_enable & AC_PATH_REGISTERED)) {
 		/*
 		 * Get this peripheral up to date with all
 		 * the currently existing buses.
 		 */
 		xpt_for_all_busses(xptsetasyncbusfunc, &csa);
 	}
 
 	return (status);
 }
 
 static void
 xptaction(struct cam_sim *sim, union ccb *work_ccb)
 {
 	CAM_DEBUG(work_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("xptaction\n"));
 
 	switch (work_ccb->ccb_h.func_code) {
 	/* Common cases first */
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq *cpi;
 
 		cpi = &work_ccb->cpi;
 		cpi->version_num = 1; /* XXX??? */
 		cpi->hba_inquiry = 0;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = 0;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = 0;
 		cpi->max_lun = 0;
 		cpi->initiator_id = 0;
 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strlcpy(cpi->hba_vid, "", HBA_IDLEN);
 		strlcpy(cpi->dev_name, sim->sim_name, DEV_IDLEN);
 		cpi->unit_number = sim->unit_number;
 		cpi->bus_id = sim->bus_id;
 		cpi->base_transfer_speed = 0;
 		cpi->protocol = PROTO_UNSPECIFIED;
 		cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
 		cpi->transport = XPORT_UNSPECIFIED;
 		cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(work_ccb);
 		break;
 	}
 	default:
 		work_ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(work_ccb);
 		break;
 	}
 }
 
 /*
  * The xpt as a "controller" has no interrupt sources, so polling
  * is a no-op.
  */
 static void
 xptpoll(struct cam_sim *sim)
 {
 }
 
 void
 xpt_lock_buses(void)
 {
 	mtx_lock(&xsoftc.xpt_topo_lock);
 }
 
 void
 xpt_unlock_buses(void)
 {
 	mtx_unlock(&xsoftc.xpt_topo_lock);
 }
 
 struct mtx *
 xpt_path_mtx(struct cam_path *path)
 {
 
 	return (&path->device->device_mtx);
 }
 
 static void
 xpt_done_process(struct ccb_hdr *ccb_h)
 {
 	struct cam_sim *sim = NULL;
 	struct cam_devq *devq = NULL;
 	struct mtx *mtx = NULL;
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 	struct ccb_scsiio *csio;
 
 	if (ccb_h->func_code == XPT_SCSI_IO) {
 		csio = &((union ccb *)ccb_h)->csio;
 		if (csio->bio != NULL)
 			biotrack(csio->bio, __func__);
 	}
 #endif
 
 	if (ccb_h->flags & CAM_HIGH_POWER) {
 		struct highpowerlist	*hphead;
 		struct cam_ed		*device;
 
 		mtx_lock(&xsoftc.xpt_highpower_lock);
 		hphead = &xsoftc.highpowerq;
 
 		device = STAILQ_FIRST(hphead);
 
 		/*
 		 * Increment the count since this command is done.
 		 */
 		xsoftc.num_highpower++;
 
 		/*
 		 * Any high powered commands queued up?
 		 */
 		if (device != NULL) {
 
 			STAILQ_REMOVE_HEAD(hphead, highpowerq_entry);
 			mtx_unlock(&xsoftc.xpt_highpower_lock);
 
 			mtx_lock(&device->sim->devq->send_mtx);
 			xpt_release_devq_device(device,
 					 /*count*/1, /*runqueue*/TRUE);
 			mtx_unlock(&device->sim->devq->send_mtx);
 		} else
 			mtx_unlock(&xsoftc.xpt_highpower_lock);
 	}
 
 	/*
 	 * Insulate against a race where the periph is destroyed
 	 * but CCBs are still not all processed.
 	 */
 	if (ccb_h->path->bus)
 		sim = ccb_h->path->bus->sim;
 
 	if (ccb_h->status & CAM_RELEASE_SIMQ) {
 		KASSERT(sim, ("sim missing for CAM_RELEASE_SIMQ request"));
 		xpt_release_simq(sim, /*run_queue*/FALSE);
 		ccb_h->status &= ~CAM_RELEASE_SIMQ;
 	}
 
 	if ((ccb_h->flags & CAM_DEV_QFRZDIS)
 	 && (ccb_h->status & CAM_DEV_QFRZN)) {
 		xpt_release_devq(ccb_h->path, /*count*/1, /*run_queue*/TRUE);
 		ccb_h->status &= ~CAM_DEV_QFRZN;
 	}
 
 	if ((ccb_h->func_code & XPT_FC_USER_CCB) == 0) {
 		struct cam_ed *dev = ccb_h->path->device;
 
 		if (sim)
 			devq = sim->devq;
 		KASSERT(devq, ("sim missing for XPT_FC_USER_CCB request"));
 
 		mtx_lock(&devq->send_mtx);
 		devq->send_active--;
 		devq->send_openings++;
 		cam_ccbq_ccb_done(&dev->ccbq, (union ccb *)ccb_h);
 
 		if (((dev->flags & CAM_DEV_REL_ON_QUEUE_EMPTY) != 0
 		  && (dev->ccbq.dev_active == 0))) {
 			dev->flags &= ~CAM_DEV_REL_ON_QUEUE_EMPTY;
 			xpt_release_devq_device(dev, /*count*/1,
 					 /*run_queue*/FALSE);
 		}
 
 		if (((dev->flags & CAM_DEV_REL_ON_COMPLETE) != 0
 		  && (ccb_h->status&CAM_STATUS_MASK) != CAM_REQUEUE_REQ)) {
 			dev->flags &= ~CAM_DEV_REL_ON_COMPLETE;
 			xpt_release_devq_device(dev, /*count*/1,
 					 /*run_queue*/FALSE);
 		}
 
 		if (!device_is_queued(dev))
 			(void)xpt_schedule_devq(devq, dev);
 		xpt_run_devq(devq);
 		mtx_unlock(&devq->send_mtx);
 
 		if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0) {
 			mtx = xpt_path_mtx(ccb_h->path);
 			mtx_lock(mtx);
 
 			if ((dev->flags & CAM_DEV_TAG_AFTER_COUNT) != 0
 			 && (--dev->tag_delay_count == 0))
 				xpt_start_tags(ccb_h->path);
 		}
 	}
 
 	if ((ccb_h->flags & CAM_UNLOCKED) == 0) {
 		if (mtx == NULL) {
 			mtx = xpt_path_mtx(ccb_h->path);
 			mtx_lock(mtx);
 		}
 	} else {
 		if (mtx != NULL) {
 			mtx_unlock(mtx);
 			mtx = NULL;
 		}
 	}
 
 	/* Call the peripheral driver's callback */
 	ccb_h->pinfo.index = CAM_UNQUEUED_INDEX;
 	(*ccb_h->cbfcnp)(ccb_h->path->periph, (union ccb *)ccb_h);
 	if (mtx != NULL)
 		mtx_unlock(mtx);
 }
 
 void
 xpt_done_td(void *arg)
 {
 	struct cam_doneq *queue = arg;
 	struct ccb_hdr *ccb_h;
 	STAILQ_HEAD(, ccb_hdr)	doneq;
 
 	STAILQ_INIT(&doneq);
 	mtx_lock(&queue->cam_doneq_mtx);
 	while (1) {
 		while (STAILQ_EMPTY(&queue->cam_doneq)) {
 			queue->cam_doneq_sleep = 1;
 			msleep(&queue->cam_doneq, &queue->cam_doneq_mtx,
 			    PRIBIO, "-", 0);
 			queue->cam_doneq_sleep = 0;
 		}
 		STAILQ_CONCAT(&doneq, &queue->cam_doneq);
 		mtx_unlock(&queue->cam_doneq_mtx);
 
 		THREAD_NO_SLEEPING();
 		while ((ccb_h = STAILQ_FIRST(&doneq)) != NULL) {
 			STAILQ_REMOVE_HEAD(&doneq, sim_links.stqe);
 			xpt_done_process(ccb_h);
 		}
 		THREAD_SLEEPING_OK();
 
 		mtx_lock(&queue->cam_doneq_mtx);
 	}
 }
 
 static void
 camisr_runqueue(void)
 {
 	struct	ccb_hdr *ccb_h;
 	struct cam_doneq *queue;
 	int i;
 
 	/* Process global queues. */
 	for (i = 0; i < cam_num_doneqs; i++) {
 		queue = &cam_doneqs[i];
 		mtx_lock(&queue->cam_doneq_mtx);
 		while ((ccb_h = STAILQ_FIRST(&queue->cam_doneq)) != NULL) {
 			STAILQ_REMOVE_HEAD(&queue->cam_doneq, sim_links.stqe);
 			mtx_unlock(&queue->cam_doneq_mtx);
 			xpt_done_process(ccb_h);
 			mtx_lock(&queue->cam_doneq_mtx);
 		}
 		mtx_unlock(&queue->cam_doneq_mtx);
 	}
 }
 
 struct kv 
 {
 	uint32_t v;
 	const char *name;
 };
 
 static struct kv map[] = {
 	{ XPT_NOOP, "XPT_NOOP" },
 	{ XPT_SCSI_IO, "XPT_SCSI_IO" },
 	{ XPT_GDEV_TYPE, "XPT_GDEV_TYPE" },
 	{ XPT_GDEVLIST, "XPT_GDEVLIST" },
 	{ XPT_PATH_INQ, "XPT_PATH_INQ" },
 	{ XPT_REL_SIMQ, "XPT_REL_SIMQ" },
 	{ XPT_SASYNC_CB, "XPT_SASYNC_CB" },
 	{ XPT_SDEV_TYPE, "XPT_SDEV_TYPE" },
 	{ XPT_SCAN_BUS, "XPT_SCAN_BUS" },
 	{ XPT_DEV_MATCH, "XPT_DEV_MATCH" },
 	{ XPT_DEBUG, "XPT_DEBUG" },
 	{ XPT_PATH_STATS, "XPT_PATH_STATS" },
 	{ XPT_GDEV_STATS, "XPT_GDEV_STATS" },
 	{ XPT_DEV_ADVINFO, "XPT_DEV_ADVINFO" },
 	{ XPT_ASYNC, "XPT_ASYNC" },
 	{ XPT_ABORT, "XPT_ABORT" },
 	{ XPT_RESET_BUS, "XPT_RESET_BUS" },
 	{ XPT_RESET_DEV, "XPT_RESET_DEV" },
 	{ XPT_TERM_IO, "XPT_TERM_IO" },
 	{ XPT_SCAN_LUN, "XPT_SCAN_LUN" },
 	{ XPT_GET_TRAN_SETTINGS, "XPT_GET_TRAN_SETTINGS" },
 	{ XPT_SET_TRAN_SETTINGS, "XPT_SET_TRAN_SETTINGS" },
 	{ XPT_CALC_GEOMETRY, "XPT_CALC_GEOMETRY" },
 	{ XPT_ATA_IO, "XPT_ATA_IO" },
 	{ XPT_GET_SIM_KNOB, "XPT_GET_SIM_KNOB" },
 	{ XPT_SET_SIM_KNOB, "XPT_SET_SIM_KNOB" },
 	{ XPT_NVME_IO, "XPT_NVME_IO" },
 	{ XPT_MMC_IO, "XPT_MMC_IO" },
 	{ XPT_SMP_IO, "XPT_SMP_IO" },
 	{ XPT_SCAN_TGT, "XPT_SCAN_TGT" },
 	{ XPT_NVME_ADMIN, "XPT_NVME_ADMIN" },
 	{ XPT_ENG_INQ, "XPT_ENG_INQ" },
 	{ XPT_ENG_EXEC, "XPT_ENG_EXEC" },
 	{ XPT_EN_LUN, "XPT_EN_LUN" },
 	{ XPT_TARGET_IO, "XPT_TARGET_IO" },
 	{ XPT_ACCEPT_TARGET_IO, "XPT_ACCEPT_TARGET_IO" },
 	{ XPT_CONT_TARGET_IO, "XPT_CONT_TARGET_IO" },
 	{ XPT_IMMED_NOTIFY, "XPT_IMMED_NOTIFY" },
 	{ XPT_NOTIFY_ACK, "XPT_NOTIFY_ACK" },
 	{ XPT_IMMEDIATE_NOTIFY, "XPT_IMMEDIATE_NOTIFY" },
 	{ XPT_NOTIFY_ACKNOWLEDGE, "XPT_NOTIFY_ACKNOWLEDGE" },
 	{ 0, 0 }
 };
 
 const char *
 xpt_action_name(uint32_t action) 
 {
 	static char buffer[32];	/* Only for unknown messages -- racy */
 	struct kv *walker = map;
 
 	while (walker->name != NULL) {
 		if (walker->v == action)
 			return (walker->name);
 		walker++;
 	}
 
 	snprintf(buffer, sizeof(buffer), "%#x", action);
 	return (buffer);
 }
Index: head/sys/cam/cam_xpt_internal.h
===================================================================
--- head/sys/cam/cam_xpt_internal.h	(revision 334199)
+++ head/sys/cam/cam_xpt_internal.h	(revision 334200)
@@ -1,219 +1,219 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright 2009 Scott Long
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _CAM_CAM_XPT_INTERNAL_H
 #define _CAM_CAM_XPT_INTERNAL_H 1
 
 #include <sys/taskqueue.h>
 
 /* Forward Declarations */
 struct cam_eb;
 struct cam_et;
 struct cam_ed;
 
 typedef struct cam_ed * (*xpt_alloc_device_func)(struct cam_eb *bus,
 					         struct cam_et *target,
 					         lun_id_t lun_id);
 typedef void (*xpt_release_device_func)(struct cam_ed *device);
 typedef void (*xpt_action_func)(union ccb *start_ccb);
 typedef void (*xpt_dev_async_func)(u_int32_t async_code,
 				   struct cam_eb *bus,
 				   struct cam_et *target,
 				   struct cam_ed *device,
 				   void *async_arg);
 typedef void (*xpt_announce_periph_func)(struct cam_periph *periph);
 typedef void (*xpt_announce_periph_sbuf_func)(struct cam_periph *periph, struct sbuf *sbuf);
 
 struct xpt_xport_ops {
 	xpt_alloc_device_func	alloc_device;
 	xpt_release_device_func	reldev;
 	xpt_action_func		action;
 	xpt_dev_async_func	async;
 	xpt_announce_periph_func announce;
 	xpt_announce_periph_sbuf_func announce_sbuf;
 };
 
 struct xpt_xport {
 	cam_xport		xport;
 	const char		*name;
 	struct xpt_xport_ops	*ops;
 };
 
 SET_DECLARE(cam_xpt_xport_set, struct xpt_xport);
 #define CAM_XPT_XPORT(data) 				\
 	DATA_SET(cam_xpt_xport_set, data)
 
 typedef void (*xpt_proto_announce_func)(struct cam_ed *);
 typedef void (*xpt_proto_announce_sbuf_func)(struct cam_ed *, struct sbuf *);
 typedef void (*xpt_proto_debug_out_func)(union ccb *);
 
 struct xpt_proto_ops {
 	xpt_proto_announce_func	announce;
 	xpt_proto_announce_sbuf_func	announce_sbuf;
 	xpt_proto_announce_func	denounce;
 	xpt_proto_announce_sbuf_func	denounce_sbuf;
 	xpt_proto_debug_out_func debug_out;
 };
 
 struct xpt_proto {
 	cam_proto		proto;
 	const char		*name;
 	struct xpt_proto_ops	*ops;
 };
 
 SET_DECLARE(cam_xpt_proto_set, struct xpt_proto);
 #define CAM_XPT_PROTO(data) 				\
 	DATA_SET(cam_xpt_proto_set, data)
 
 
 /*
  * The CAM EDT (Existing Device Table) contains the device information for
  * all devices for all buses in the system.  The table contains a
  * cam_ed structure for each device on the bus.
  */
 struct cam_ed {
 	cam_pinfo	 devq_entry;
 	TAILQ_ENTRY(cam_ed) links;
 	struct	cam_et	 *target;
 	struct	cam_sim  *sim;
 	lun_id_t	 lun_id;
 	struct	cam_ccbq ccbq;		/* Queue of pending ccbs */
 	struct	async_list asyncs;	/* Async callback info for this B/T/L */
 	struct	periph_list periphs;	/* All attached devices */
 	u_int	generation;		/* Generation number */
 	void		 *quirk;	/* Oddities about this device */
 	u_int		 maxtags;
 	u_int		 mintags;
 	cam_proto	 protocol;
 	u_int		 protocol_version;
 	cam_xport	 transport;
 	u_int		 transport_version;
 	struct		 scsi_inquiry_data inq_data;
 	uint8_t		 *supported_vpds;
 	uint8_t		 supported_vpds_len;
 	uint32_t	 device_id_len;
 	uint8_t		 *device_id;
 	uint32_t	 ext_inq_len;
 	uint8_t		 *ext_inq;
 	uint8_t		 physpath_len;
 	uint8_t		 *physpath;	/* physical path string form */
 	uint32_t	 rcap_len;
 	uint8_t		 *rcap_buf;
 	struct		 ata_params ident_data;
         struct		 mmc_params mmc_ident_data;
 	u_int8_t	 inq_flags;	/*
 					 * Current settings for inquiry flags.
 					 * This allows us to override settings
 					 * like disconnection and tagged
 					 * queuing for a device.
 					 */
 	u_int8_t	 queue_flags;	/* Queue flags from the control page */
 	u_int8_t	 serial_num_len;
 	u_int8_t	*serial_num;
 	u_int32_t	 flags;
 #define CAM_DEV_UNCONFIGURED	 	0x01
 #define CAM_DEV_REL_TIMEOUT_PENDING	0x02
 #define CAM_DEV_REL_ON_COMPLETE		0x04
 #define CAM_DEV_REL_ON_QUEUE_EMPTY	0x08
 #define CAM_DEV_TAG_AFTER_COUNT		0x20
 #define CAM_DEV_INQUIRY_DATA_VALID	0x40
 #define	CAM_DEV_IN_DV			0x80
 #define	CAM_DEV_DV_HIT_BOTTOM		0x100
 #define CAM_DEV_IDENTIFY_DATA_VALID	0x200
 	u_int32_t	 tag_delay_count;
 #define	CAM_TAG_DELAY_COUNT		5
 	u_int32_t	 tag_saved_openings;
 	u_int32_t	 refcount;
 	struct callout	 callout;
 	STAILQ_ENTRY(cam_ed) highpowerq_entry;
 	struct mtx	 device_mtx;
 	struct task	 device_destroy_task;
-	const struct	 nvme_controller_data *nvme_cdata;
-	const struct	 nvme_namespace_data *nvme_data;
+	struct nvme_controller_data *nvme_cdata;
+	struct nvme_namespace_data *nvme_data;
 };
 
 /*
  * Each target is represented by an ET (Existing Target).  These
  * entries are created when a target is successfully probed with an
  * identify, and removed when a device fails to respond after a number
  * of retries, or a bus rescan finds the device missing.
  */
 struct cam_et {
 	TAILQ_HEAD(, cam_ed) ed_entries;
 	TAILQ_ENTRY(cam_et) links;
 	struct	cam_eb	*bus;
 	target_id_t	target_id;
 	u_int32_t	refcount;
 	u_int		generation;
 	struct		timeval last_reset;
 	u_int		rpl_size;
 	struct scsi_report_luns_data *luns;
 	struct mtx	luns_mtx;	/* Protection for luns field. */
 };
 
 /*
  * Each bus is represented by an EB (Existing Bus).  These entries
  * are created by calls to xpt_bus_register and deleted by calls to
  * xpt_bus_deregister.
  */
 struct cam_eb {
 	TAILQ_HEAD(, cam_et) et_entries;
 	TAILQ_ENTRY(cam_eb)  links;
 	path_id_t	     path_id;
 	struct cam_sim	     *sim;
 	struct timeval	     last_reset;
 	u_int32_t	     flags;
 #define	CAM_EB_RUNQ_SCHEDULED	0x01
 	u_int32_t	     refcount;
 	u_int		     generation;
 	device_t	     parent_dev;
 	struct xpt_xport     *xport;
 	struct mtx	     eb_mtx;	/* Bus topology mutex. */
 };
 
 struct cam_path {
 	struct cam_periph *periph;
 	struct cam_eb	  *bus;
 	struct cam_et	  *target;
 	struct cam_ed	  *device;
 };
 
 struct cam_ed *		xpt_alloc_device(struct cam_eb *bus,
 					 struct cam_et *target,
 					 lun_id_t lun_id);
 void			xpt_acquire_device(struct cam_ed *device);
 void			xpt_release_device(struct cam_ed *device);
 u_int32_t		xpt_dev_ccbq_resize(struct cam_path *path, int newopenings);
 void			xpt_start_tags(struct cam_path *path);
 void			xpt_stop_tags(struct cam_path *path);
 
 MALLOC_DECLARE(M_CAMXPT);
 
 #endif
Index: head/sys/cam/nvme/nvme_xpt.c
===================================================================
--- head/sys/cam/nvme/nvme_xpt.c	(revision 334199)
+++ head/sys/cam/nvme/nvme_xpt.c	(revision 334200)
@@ -1,671 +1,780 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2015 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * derived from ata_xpt.c: Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/interrupt.h>
 #include <sys/sbuf.h>
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_queue.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 #include <cam/nvme/nvme_all.h>
 #include <machine/stdarg.h>	/* for xpt_print below */
 #include "opt_cam.h"
 
 struct nvme_quirk_entry {
 	u_int quirks;
 #define CAM_QUIRK_MAXTAGS 1
 	u_int mintags;
 	u_int maxtags;
 };
 
 /* Not even sure why we need this */
 static periph_init_t nvme_probe_periph_init;
 
 static struct periph_driver nvme_probe_driver =
 {
 	nvme_probe_periph_init, "nvme_probe",
 	TAILQ_HEAD_INITIALIZER(nvme_probe_driver.units), /* generation */ 0,
 	CAM_PERIPH_DRV_EARLY
 };
 
 PERIPHDRIVER_DECLARE(nvme_probe, nvme_probe_driver);
 
 typedef enum {
-	NVME_PROBE_IDENTIFY,
+	NVME_PROBE_IDENTIFY_CD,
+	NVME_PROBE_IDENTIFY_NS,
 	NVME_PROBE_DONE,
-	NVME_PROBE_INVALID,
-	NVME_PROBE_RESET
+	NVME_PROBE_INVALID
 } nvme_probe_action;
 
 static char *nvme_probe_action_text[] = {
-	"NVME_PROBE_IDENTIFY",
+	"NVME_PROBE_IDENTIFY_CD",
+	"NVME_PROBE_IDENTIFY_NS",
 	"NVME_PROBE_DONE",
-	"NVME_PROBE_INVALID",
-	"NVME_PROBE_RESET",
+	"NVME_PROBE_INVALID"
 };
 
 #define NVME_PROBE_SET_ACTION(softc, newaction)	\
 do {									\
 	char **text;							\
 	text = nvme_probe_action_text;					\
 	CAM_DEBUG((softc)->periph->path, CAM_DEBUG_PROBE,		\
 	    ("Probe %s to %s\n", text[(softc)->action],			\
 	    text[(newaction)]));					\
 	(softc)->action = (newaction);					\
 } while(0)
 
 typedef enum {
 	NVME_PROBE_NO_ANNOUNCE	= 0x04
 } nvme_probe_flags;
 
 typedef struct {
 	TAILQ_HEAD(, ccb_hdr) request_ccbs;
+	union {
+		struct nvme_controller_data	cd;
+		struct nvme_namespace_data	ns;
+	};
 	nvme_probe_action	action;
 	nvme_probe_flags	flags;
 	int		restart;
 	struct cam_periph *periph;
 } nvme_probe_softc;
 
 static struct nvme_quirk_entry nvme_quirk_table[] =
 {
 	{
 //		{
 //		  T_ANY, SIP_MEDIA_REMOVABLE|SIP_MEDIA_FIXED,
 //		  /*vendor*/"*", /*product*/"*", /*revision*/"*"
 //		},
 		.quirks = 0, .mintags = 0, .maxtags = 0
 	},
 };
 
 static const int nvme_quirk_table_size =
 	sizeof(nvme_quirk_table) / sizeof(*nvme_quirk_table);
 
 static cam_status	nvme_probe_register(struct cam_periph *periph,
 				      void *arg);
 static void	 nvme_probe_schedule(struct cam_periph *nvme_probe_periph);
 static void	 nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb);
+static void	 nvme_probe_done(struct cam_periph *periph, union ccb *done_ccb);
 static void	 nvme_probe_cleanup(struct cam_periph *periph);
 //static void	 nvme_find_quirk(struct cam_ed *device);
 static void	 nvme_scan_lun(struct cam_periph *periph,
 			       struct cam_path *path, cam_flags flags,
 			       union ccb *ccb);
 static struct cam_ed *
 		 nvme_alloc_device(struct cam_eb *bus, struct cam_et *target,
 				   lun_id_t lun_id);
 static void	 nvme_device_transport(struct cam_path *path);
 static void	 nvme_dev_async(u_int32_t async_code,
 				struct cam_eb *bus,
 				struct cam_et *target,
 				struct cam_ed *device,
 				void *async_arg);
 static void	 nvme_action(union ccb *start_ccb);
 static void	 nvme_announce_periph(struct cam_periph *periph);
 static void	 nvme_proto_announce(struct cam_ed *device);
 static void	 nvme_proto_denounce(struct cam_ed *device);
 static void	 nvme_proto_debug_out(union ccb *ccb);
 
 static struct xpt_xport_ops nvme_xport_ops = {
 	.alloc_device = nvme_alloc_device,
 	.action = nvme_action,
 	.async = nvme_dev_async,
 	.announce = nvme_announce_periph,
 };
 #define NVME_XPT_XPORT(x, X)			\
 static struct xpt_xport nvme_xport_ ## x = {	\
 	.xport = XPORT_ ## X,			\
 	.name = #x,				\
 	.ops = &nvme_xport_ops,			\
 };						\
 CAM_XPT_XPORT(nvme_xport_ ## x);
 
 NVME_XPT_XPORT(nvme, NVME);
 
 #undef NVME_XPT_XPORT
 
 static struct xpt_proto_ops nvme_proto_ops = {
 	.announce = nvme_proto_announce,
 	.denounce = nvme_proto_denounce,
 	.debug_out = nvme_proto_debug_out,
 };
 static struct xpt_proto nvme_proto = {
 	.proto = PROTO_NVME,
 	.name = "nvme",
 	.ops = &nvme_proto_ops,
 };
 CAM_XPT_PROTO(nvme_proto);
 
 static void
 nvme_probe_periph_init()
 {
 
 }
 
 static cam_status
 nvme_probe_register(struct cam_periph *periph, void *arg)
 {
 	union ccb *request_ccb;	/* CCB representing the probe request */
 	nvme_probe_softc *softc;
 
 	request_ccb = (union ccb *)arg;
 	if (request_ccb == NULL) {
 		printf("nvme_probe_register: no probe CCB, "
 		       "can't register device\n");
 		return(CAM_REQ_CMP_ERR);
 	}
 
 	softc = (nvme_probe_softc *)malloc(sizeof(*softc), M_CAMXPT, M_ZERO | M_NOWAIT);
 
 	if (softc == NULL) {
 		printf("nvme_probe_register: Unable to probe new device. "
 		       "Unable to allocate softc\n");
 		return(CAM_REQ_CMP_ERR);
 	}
 	TAILQ_INIT(&softc->request_ccbs);
 	TAILQ_INSERT_TAIL(&softc->request_ccbs, &request_ccb->ccb_h,
 			  periph_links.tqe);
 	softc->flags = 0;
 	periph->softc = softc;
 	softc->periph = periph;
 	softc->action = NVME_PROBE_INVALID;
 	if (cam_periph_acquire(periph) != 0)
 		return (CAM_REQ_CMP_ERR);
 
 	CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe started\n"));
 
 //	nvme_device_transport(periph->path);
 	nvme_probe_schedule(periph);
 
 	return(CAM_REQ_CMP);
 }
 
 static void
 nvme_probe_schedule(struct cam_periph *periph)
 {
 	union ccb *ccb;
 	nvme_probe_softc *softc;
 
 	softc = (nvme_probe_softc *)periph->softc;
 	ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs);
 
-	NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY);
+	NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY_CD);
 
 	if (ccb->crcn.flags & CAM_EXPECT_INQ_CHANGE)
 		softc->flags |= NVME_PROBE_NO_ANNOUNCE;
 	else
 		softc->flags &= ~NVME_PROBE_NO_ANNOUNCE;
 
 	xpt_schedule(periph, CAM_PRIORITY_XPT);
 }
 
 static void
 nvme_probe_start(struct cam_periph *periph, union ccb *start_ccb)
 {
 	struct ccb_nvmeio *nvmeio;
-	struct ccb_scsiio *csio;
 	nvme_probe_softc *softc;
 	struct cam_path *path;
-	const struct nvme_namespace_data *nvme_data;
 	lun_id_t lun;
 
 	CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("nvme_probe_start\n"));
 
 	softc = (nvme_probe_softc *)periph->softc;
 	path = start_ccb->ccb_h.path;
 	nvmeio = &start_ccb->nvmeio;
-	csio = &start_ccb->csio;
-	nvme_data = periph->path->device->nvme_data;
+	lun = xpt_path_lun_id(periph->path);
 
 	if (softc->restart) {
 		softc->restart = 0;
-		if (periph->path->device->flags & CAM_DEV_UNCONFIGURED)
-			NVME_PROBE_SET_ACTION(softc, NVME_PROBE_RESET);
-		else
-			NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY);
+		NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY_CD);
 	}
 
-	/*
-	 * Other transports have to ask their SIM to do a lot of action.
-	 * NVMe doesn't, so don't do the dance. Just do things
-	 * directly.
-	 */
 	switch (softc->action) {
-	case NVME_PROBE_RESET:
-		/* FALLTHROUGH */
-	case NVME_PROBE_IDENTIFY:
-		nvme_device_transport(path);
+	case NVME_PROBE_IDENTIFY_CD:
+		cam_fill_nvmeadmin(nvmeio,
+		    0,			/* retries */
+		    nvme_probe_done,	/* cbfcnp */
+		    CAM_DIR_IN,		/* flags */
+		    (uint8_t *)&softc->cd,	/* data_ptr */
+		    sizeof(softc->cd),		/* dxfer_len */
+		    30 * 1000); /* timeout 30s */
+		nvme_ns_cmd(nvmeio, NVME_OPC_IDENTIFY, 0,
+		    1, 0, 0, 0, 0, 0);
+		break;
+	case NVME_PROBE_IDENTIFY_NS:
+		cam_fill_nvmeadmin(nvmeio,
+		    0,			/* retries */
+		    nvme_probe_done,	/* cbfcnp */
+		    CAM_DIR_IN,		/* flags */
+		    (uint8_t *)&softc->ns,	/* data_ptr */
+		    sizeof(softc->ns),		/* dxfer_len */
+		    30 * 1000); /* timeout 30s */
+		nvme_ns_cmd(nvmeio, NVME_OPC_IDENTIFY, lun,
+		    0, 0, 0, 0, 0, 0);
+		break;
+	default:
+		panic("nvme_probe_start: invalid action state 0x%x\n", softc->action);
+	}
+	start_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
+	xpt_action(start_ccb);
+}
+
+static void
+nvme_probe_done(struct cam_periph *periph, union ccb *done_ccb)
+{
+	struct nvme_namespace_data *nvme_data;
+	struct nvme_controller_data *nvme_cdata;
+	nvme_probe_softc *softc;
+	struct cam_path *path;
+	cam_status status;
+	u_int32_t  priority;
+	int found = 1;
+
+	CAM_DEBUG(done_ccb->ccb_h.path, CAM_DEBUG_TRACE, ("nvme_probe_done\n"));
+
+	softc = (nvme_probe_softc *)periph->softc;
+	path = done_ccb->ccb_h.path;
+	priority = done_ccb->ccb_h.pinfo.priority;
+
+	if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+		if (cam_periph_error(done_ccb,
+			0, softc->restart ? (SF_NO_RECOVERY | SF_NO_RETRY) : 0
+		    ) == ERESTART) {
+out:
+			/* Drop freeze taken due to CAM_DEV_QFREEZE flag set. */
+			cam_release_devq(path, 0, 0, 0, FALSE);
+			return;
+		}
+		if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) {
+			/* Don't wedge the queue */
+			xpt_release_devq(path, /*count*/1, /*run_queue*/TRUE);
+		}
+		status = done_ccb->ccb_h.status & CAM_STATUS_MASK;
+
 		/*
-		 * Test for lun == CAM_LUN_WILDCARD is lame, but
-		 * appears to be necessary here. XXX
+		 * If we get to this point, we got an error status back
+		 * from the inquiry and the error status doesn't require
+		 * automatically retrying the command.  Therefore, the
+		 * inquiry failed.  If we had inquiry information before
+		 * for this device, but this latest inquiry command failed,
+		 * the device has probably gone away.  If this device isn't
+		 * already marked unconfigured, notify the peripheral
+		 * drivers that this device is no more.
 		 */
-		lun = xpt_path_lun_id(periph->path);
-		if (lun == CAM_LUN_WILDCARD ||
-		    periph->path->device->flags & CAM_DEV_UNCONFIGURED) {
+device_fail:	if ((path->device->flags & CAM_DEV_UNCONFIGURED) == 0)
+			xpt_async(AC_LOST_DEVICE, path, NULL);
+		NVME_PROBE_SET_ACTION(softc, NVME_PROBE_INVALID);
+		found = 0;
+		goto done;
+	}
+	if (softc->restart)
+		goto done;
+	switch (softc->action) {
+	case NVME_PROBE_IDENTIFY_CD:
+		nvme_controller_data_swapbytes(&softc->cd);
+
+		nvme_cdata = path->device->nvme_cdata;
+		if (nvme_cdata == NULL) {
+			nvme_cdata = malloc(sizeof(*nvme_cdata), M_CAMXPT,
+			    M_NOWAIT);
+			if (nvme_cdata == NULL) {
+				xpt_print(path, "Can't allocate memory");
+				goto device_fail;
+			}
+		}
+		bcopy(&softc->cd, nvme_cdata, sizeof(*nvme_cdata));
+		path->device->nvme_cdata = nvme_cdata;
+
+//		nvme_find_quirk(path->device);
+		nvme_device_transport(path);
+		NVME_PROBE_SET_ACTION(softc, NVME_PROBE_IDENTIFY_NS);
+		xpt_release_ccb(done_ccb);
+		xpt_schedule(periph, priority);
+		goto out;
+	case NVME_PROBE_IDENTIFY_NS:
+		nvme_namespace_data_swapbytes(&softc->ns);
+
+		/* Check that the namespace exists. */
+		if (softc->ns.nsze == 0)
+			goto device_fail;
+
+		nvme_data = path->device->nvme_data;
+		if (nvme_data == NULL) {
+			nvme_data = malloc(sizeof(*nvme_data), M_CAMXPT,
+			    M_NOWAIT);
+			if (nvme_data == NULL) {
+				xpt_print(path, "Can't allocate memory");
+				goto device_fail;
+			}
+		}
+		bcopy(&softc->ns, nvme_data, sizeof(*nvme_data));
+		path->device->nvme_data = nvme_data;
+
+		if (periph->path->device->flags & CAM_DEV_UNCONFIGURED) {
 			path->device->flags &= ~CAM_DEV_UNCONFIGURED;
 			xpt_acquire_device(path->device);
-			start_ccb->ccb_h.func_code = XPT_GDEV_TYPE;
-			xpt_action(start_ccb);
-			xpt_async(AC_FOUND_DEVICE, path, start_ccb);
+			done_ccb->ccb_h.func_code = XPT_GDEV_TYPE;
+			xpt_action(done_ccb);
+			xpt_async(AC_FOUND_DEVICE, path, done_ccb);
 		}
 		NVME_PROBE_SET_ACTION(softc, NVME_PROBE_DONE);
 		break;
 	default:
-		panic("nvme_probe_start: invalid action state 0x%x\n", softc->action);
+		panic("nvme_probe_done: invalid action state 0x%x\n", softc->action);
 	}
-	/*
-	 * Probing is now done. We need to complete any lingering items
-	 * in the queue, though there shouldn't be any.
-	 */
-	xpt_release_ccb(start_ccb);
+done:
+	if (softc->restart) {
+		softc->restart = 0;
+		xpt_release_ccb(done_ccb);
+		nvme_probe_schedule(periph);
+		goto out;
+	}
+	xpt_release_ccb(done_ccb);
 	CAM_DEBUG(periph->path, CAM_DEBUG_PROBE, ("Probe completed\n"));
-	while ((start_ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs))) {
+	while ((done_ccb = (union ccb *)TAILQ_FIRST(&softc->request_ccbs))) {
 		TAILQ_REMOVE(&softc->request_ccbs,
-		    &start_ccb->ccb_h, periph_links.tqe);
-		start_ccb->ccb_h.status = CAM_REQ_CMP;
-		xpt_done(start_ccb);
+		    &done_ccb->ccb_h, periph_links.tqe);
+		done_ccb->ccb_h.status = found ? CAM_REQ_CMP : CAM_REQ_CMP_ERR;
+		xpt_done(done_ccb);
 	}
+	/* Drop freeze taken due to CAM_DEV_QFREEZE flag set. */
+	cam_release_devq(path, 0, 0, 0, FALSE);
 	cam_periph_invalidate(periph);
 	cam_periph_release_locked(periph);
 }
 
 static void
 nvme_probe_cleanup(struct cam_periph *periph)
 {
 
 	free(periph->softc, M_CAMXPT);
 }
 
 #if 0
 /* XXX should be used, don't delete */
 static void
 nvme_find_quirk(struct cam_ed *device)
 {
 	struct nvme_quirk_entry *quirk;
 	caddr_t	match;
 
 	match = cam_quirkmatch((caddr_t)&device->nvme_data,
 			       (caddr_t)nvme_quirk_table,
 			       nvme_quirk_table_size,
 			       sizeof(*nvme_quirk_table), nvme_identify_match);
 
 	if (match == NULL)
 		panic("xpt_find_quirk: device didn't match wildcard entry!!");
 
 	quirk = (struct nvme_quirk_entry *)match;
 	device->quirk = quirk;
 	if (quirk->quirks & CAM_QUIRK_MAXTAGS) {
 		device->mintags = quirk->mintags;
 		device->maxtags = quirk->maxtags;
 	}
 }
 #endif
 
 static void
 nvme_scan_lun(struct cam_periph *periph, struct cam_path *path,
 	     cam_flags flags, union ccb *request_ccb)
 {
 	struct ccb_pathinq cpi;
 	cam_status status;
 	struct cam_periph *old_periph;
 	int lock;
 
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun\n"));
 
 	xpt_path_inq(&cpi, path);
 
 	if (cpi.ccb_h.status != CAM_REQ_CMP) {
 		if (request_ccb != NULL) {
 			request_ccb->ccb_h.status = cpi.ccb_h.status;
 			xpt_done(request_ccb);
 		}
 		return;
 	}
 
 	if (xpt_path_lun_id(path) == CAM_LUN_WILDCARD) {
 		CAM_DEBUG(path, CAM_DEBUG_TRACE, ("nvme_scan_lun ignoring bus\n"));
 		request_ccb->ccb_h.status = CAM_REQ_CMP;	/* XXX signal error ? */
 		xpt_done(request_ccb);
 		return;
 	}
 
 	lock = (xpt_path_owned(path) == 0);
 	if (lock)
 		xpt_path_lock(path);
 	if ((old_periph = cam_periph_find(path, "nvme_probe")) != NULL) {
 		if ((old_periph->flags & CAM_PERIPH_INVALID) == 0) {
 			nvme_probe_softc *softc;
 
 			softc = (nvme_probe_softc *)old_periph->softc;
 			TAILQ_INSERT_TAIL(&softc->request_ccbs,
 				&request_ccb->ccb_h, periph_links.tqe);
 			softc->restart = 1;
 			CAM_DEBUG(path, CAM_DEBUG_TRACE,
 			    ("restarting nvme_probe device\n"));
 		} else {
 			request_ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 			CAM_DEBUG(path, CAM_DEBUG_TRACE,
 			    ("Failing to restart nvme_probe device\n"));
 			xpt_done(request_ccb);
 		}
 	} else {
 		CAM_DEBUG(path, CAM_DEBUG_TRACE,
 		    ("Adding nvme_probe device\n"));
 		status = cam_periph_alloc(nvme_probe_register, NULL, nvme_probe_cleanup,
 					  nvme_probe_start, "nvme_probe",
 					  CAM_PERIPH_BIO,
 					  request_ccb->ccb_h.path, NULL, 0,
 					  request_ccb);
 
 		if (status != CAM_REQ_CMP) {
 			xpt_print(path, "xpt_scan_lun: cam_alloc_periph "
 			    "returned an error, can't continue probe\n");
 			request_ccb->ccb_h.status = status;
 			xpt_done(request_ccb);
 		}
 	}
 	if (lock)
 		xpt_path_unlock(path);
 }
 
 static struct cam_ed *
 nvme_alloc_device(struct cam_eb *bus, struct cam_et *target, lun_id_t lun_id)
 {
 	struct nvme_quirk_entry *quirk;
 	struct cam_ed *device;
 
 	device = xpt_alloc_device(bus, target, lun_id);
 	if (device == NULL)
 		return (NULL);
 
 	/*
 	 * Take the default quirk entry until we have inquiry
 	 * data from nvme and can determine a better quirk to use.
 	 */
 	quirk = &nvme_quirk_table[nvme_quirk_table_size - 1];
 	device->quirk = (void *)quirk;
 	device->mintags = 0;
 	device->maxtags = 0;
 	device->inq_flags = 0;
 	device->queue_flags = 0;
 	device->device_id = NULL;	/* XXX Need to set this somewhere */
 	device->device_id_len = 0;
 	device->serial_num = NULL;	/* XXX Need to set this somewhere */
 	device->serial_num_len = 0;
 	return (device);
 }
 
 static void
 nvme_device_transport(struct cam_path *path)
 {
 	struct ccb_pathinq cpi;
 	struct ccb_trans_settings cts;
 	/* XXX get data from nvme namespace and other info ??? */
 
 	/* Get transport information from the SIM */
 	xpt_path_inq(&cpi, path);
 
 	path->device->transport = cpi.transport;
 	path->device->transport_version = cpi.transport_version;
 
 	path->device->protocol = cpi.protocol;
 	path->device->protocol_version = cpi.protocol_version;
 
 	/* Tell the controller what we think */
 	xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NONE);
 	cts.ccb_h.func_code = XPT_SET_TRAN_SETTINGS;
 	cts.type = CTS_TYPE_CURRENT_SETTINGS;
 	cts.transport = path->device->transport;
 	cts.transport_version = path->device->transport_version;
 	cts.protocol = path->device->protocol;
 	cts.protocol_version = path->device->protocol_version;
 	cts.proto_specific.valid = 0;
 	cts.xport_specific.valid = 0;
 	xpt_action((union ccb *)&cts);
 }
 
 static void
 nvme_dev_advinfo(union ccb *start_ccb)
 {
 	struct cam_ed *device;
 	struct ccb_dev_advinfo *cdai;
 	off_t amt; 
 
 	start_ccb->ccb_h.status = CAM_REQ_INVALID;
 	device = start_ccb->ccb_h.path->device;
 	cdai = &start_ccb->cdai;
 	switch(cdai->buftype) {
 	case CDAI_TYPE_SCSI_DEVID:
 		if (cdai->flags & CDAI_FLAG_STORE)
 			return;
 		cdai->provsiz = device->device_id_len;
 		if (device->device_id_len == 0)
 			break;
 		amt = device->device_id_len;
 		if (cdai->provsiz > cdai->bufsiz)
 			amt = cdai->bufsiz;
 		memcpy(cdai->buf, device->device_id, amt);
 		break;
 	case CDAI_TYPE_SERIAL_NUM:
 		if (cdai->flags & CDAI_FLAG_STORE)
 			return;
 		cdai->provsiz = device->serial_num_len;
 		if (device->serial_num_len == 0)
 			break;
 		amt = device->serial_num_len;
 		if (cdai->provsiz > cdai->bufsiz)
 			amt = cdai->bufsiz;
 		memcpy(cdai->buf, device->serial_num, amt);
 		break;
 	case CDAI_TYPE_PHYS_PATH:
 		if (cdai->flags & CDAI_FLAG_STORE) {
 			if (device->physpath != NULL)
 				free(device->physpath, M_CAMXPT);
 			device->physpath_len = cdai->bufsiz;
 			/* Clear existing buffer if zero length */
 			if (cdai->bufsiz == 0)
 				break;
 			device->physpath = malloc(cdai->bufsiz, M_CAMXPT, M_NOWAIT);
 			if (device->physpath == NULL) {
 				start_ccb->ccb_h.status = CAM_REQ_ABORTED;
 				return;
 			}
 			memcpy(device->physpath, cdai->buf, cdai->bufsiz);
 		} else {
 			cdai->provsiz = device->physpath_len;
 			if (device->physpath_len == 0)
 				break;
 			amt = device->physpath_len;
 			if (cdai->provsiz > cdai->bufsiz)
 				amt = cdai->bufsiz;
 			memcpy(cdai->buf, device->physpath, amt);
 		}
 		break;
 	case CDAI_TYPE_NVME_CNTRL:
 		if (cdai->flags & CDAI_FLAG_STORE)
 			return;
 		amt = sizeof(struct nvme_controller_data);
 		cdai->provsiz = amt;
 		if (amt > cdai->bufsiz)
 			amt = cdai->bufsiz;
 		memcpy(cdai->buf, device->nvme_cdata, amt);
 		break;
 	case CDAI_TYPE_NVME_NS:
 		if (cdai->flags & CDAI_FLAG_STORE)
 			return;
 		amt = sizeof(struct nvme_namespace_data);
 		cdai->provsiz = amt;
 		if (amt > cdai->bufsiz)
 			amt = cdai->bufsiz;
 		memcpy(cdai->buf, device->nvme_data, amt);
 		break;
 	default:
 		return;
 	}
 	start_ccb->ccb_h.status = CAM_REQ_CMP;
 
 	if (cdai->flags & CDAI_FLAG_STORE) {
 		xpt_async(AC_ADVINFO_CHANGED, start_ccb->ccb_h.path,
 			  (void *)(uintptr_t)cdai->buftype);
 	}
 }
 
 static void
 nvme_action(union ccb *start_ccb)
 {
 	CAM_DEBUG(start_ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("nvme_action: func= %#x\n", start_ccb->ccb_h.func_code));
 
 	switch (start_ccb->ccb_h.func_code) {
 	case XPT_SCAN_BUS:
 	case XPT_SCAN_TGT:
 	case XPT_SCAN_LUN:
 		nvme_scan_lun(start_ccb->ccb_h.path->periph,
 			      start_ccb->ccb_h.path, start_ccb->crcn.flags,
 			      start_ccb);
 		break;
 	case XPT_DEV_ADVINFO:
 		nvme_dev_advinfo(start_ccb);
 		break;
 
 	default:
 		xpt_action_default(start_ccb);
 		break;
 	}
 }
 
 /*
  * Handle any per-device event notifications that require action by the XPT.
  */
 static void
 nvme_dev_async(u_int32_t async_code, struct cam_eb *bus, struct cam_et *target,
 	      struct cam_ed *device, void *async_arg)
 {
 
 	/*
 	 * We only need to handle events for real devices.
 	 */
 	if (target->target_id == CAM_TARGET_WILDCARD
 	 || device->lun_id == CAM_LUN_WILDCARD)
 		return;
 
 	if (async_code == AC_LOST_DEVICE &&
 	    (device->flags & CAM_DEV_UNCONFIGURED) == 0) {
 		device->flags |= CAM_DEV_UNCONFIGURED;
 		xpt_release_device(device);
 	}
 }
 
 static void
 nvme_announce_periph(struct cam_periph *periph)
 {
 	struct	ccb_pathinq cpi;
 	struct	ccb_trans_settings cts;
 	struct	cam_path *path = periph->path;
 	struct ccb_trans_settings_nvme	*nvmex;
 
 	cam_periph_assert(periph, MA_OWNED);
 
 	/* Ask the SIM for connection details */
 	xpt_setup_ccb(&cts.ccb_h, path, CAM_PRIORITY_NORMAL);
 	cts.ccb_h.func_code = XPT_GET_TRAN_SETTINGS;
 	cts.type = CTS_TYPE_CURRENT_SETTINGS;
 	xpt_action((union ccb*)&cts);
 	if ((cts.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
 		return;
 	nvmex = &cts.xport_specific.nvme;
 
 	/* Ask the SIM for its base transfer speed */
 	xpt_path_inq(&cpi, periph->path);
 	printf("%s%d: nvme version %d.%d x%d (max x%d) lanes PCIe Gen%d (max Gen%d) link",
 	    periph->periph_name, periph->unit_number,
 	    NVME_MAJOR(nvmex->spec),
 	    NVME_MINOR(nvmex->spec),
 	    nvmex->lanes, nvmex->max_lanes,
 	    nvmex->speed, nvmex->max_speed);
 	printf("\n");
 }
 
 static void
 nvme_proto_announce(struct cam_ed *device)
 {
 	struct sbuf	sb;
 	char		buffer[120];
 
 	sbuf_new(&sb, buffer, sizeof(buffer), SBUF_FIXEDLEN);
 	nvme_print_ident(device->nvme_cdata, device->nvme_data, &sb);
 	sbuf_finish(&sb);
 	sbuf_putbuf(&sb);
 }
 
 static void
 nvme_proto_denounce(struct cam_ed *device)
 {
 
 	nvme_proto_announce(device);
 }
 
 static void
 nvme_proto_debug_out(union ccb *ccb)
 {
 	char cdb_str[(sizeof(struct nvme_command) * 3) + 1];
 
 	if (ccb->ccb_h.func_code != XPT_NVME_IO)
 		return;
 
 	CAM_DEBUG(ccb->ccb_h.path,
 	    CAM_DEBUG_CDB,("%s. NCB: %s\n", nvme_op_string(&ccb->nvmeio.cmd),
 		nvme_cmd_string(&ccb->nvmeio.cmd, cdb_str, sizeof(cdb_str))));
 }
 
Index: head/sys/dev/nvme/nvme.c
===================================================================
--- head/sys/dev/nvme/nvme.c	(revision 334199)
+++ head/sys/dev/nvme/nvme.c	(revision 334200)
@@ -1,488 +1,506 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 
 #include <vm/uma.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
 struct nvme_consumer {
 	uint32_t		id;
 	nvme_cons_ns_fn_t	ns_fn;
 	nvme_cons_ctrlr_fn_t	ctrlr_fn;
 	nvme_cons_async_fn_t	async_fn;
 	nvme_cons_fail_fn_t	fail_fn;
 };
 
 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
 #define	INVALID_CONSUMER_ID	0xFFFF
 
 uma_zone_t	nvme_request_zone;
 int32_t		nvme_retry_count;
 
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
 static int    nvme_probe(device_t);
 static int    nvme_attach(device_t);
 static int    nvme_detach(device_t);
 static int    nvme_shutdown(device_t);
 static int    nvme_modevent(module_t mod, int type, void *arg);
 
 static devclass_t nvme_devclass;
 
 static device_method_t nvme_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,     nvme_probe),
 	DEVMETHOD(device_attach,    nvme_attach),
 	DEVMETHOD(device_detach,    nvme_detach),
 	DEVMETHOD(device_shutdown,  nvme_shutdown),
 	{ 0, 0 }
 };
 
 static driver_t nvme_pci_driver = {
 	"nvme",
 	nvme_pci_methods,
 	sizeof(struct nvme_controller),
 };
 
 DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent, 0);
 MODULE_VERSION(nvme, 1);
 MODULE_DEPEND(nvme, cam, 1, 1, 1);
 
 static struct _pcsid
 {
 	uint32_t	devid;
 	int		match_subdevice;
 	uint16_t	subdevice;
 	const char	*desc;
 	uint32_t	quirks;
 } pci_ids[] = {
 	{ 0x01118086,		0, 0, "NVMe Controller"  },
 	{ IDT32_PCI_ID,		0, 0, "IDT NVMe Controller (32 channel)"  },
 	{ IDT8_PCI_ID,		0, 0, "IDT NVMe Controller (8 channel)" },
 	{ 0x09538086,		1, 0x3702, "DC P3700 SSD" },
 	{ 0x09538086,		1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
 	{ 0x09538086,		1, 0x3704, "DC P3500 SSD [Add-in Card]" },
 	{ 0x09538086,		1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
 	{ 0x09538086,		1, 0x3709, "DC P3600 SSD [Add-in Card]" },
 	{ 0x09538086,		1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
 	{ 0x00031c58,		0, 0, "HGST SN100",	QUIRK_DELAY_B4_CHK_RDY },
 	{ 0x00231c58,		0, 0, "WDC SN200",	QUIRK_DELAY_B4_CHK_RDY },
 	{ 0x05401c5f,		0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
 	{ 0xa821144d,		0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
 	{ 0xa822144d,		0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
 	{ 0x00000000,		0, 0, NULL  }
 };
 
 static int
 nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
 {
 	if (devid != ep->devid)
 		return 0;
 
 	if (!ep->match_subdevice)
 		return 1;
 
 	if (subdevice == ep->subdevice)
 		return 1;
 	else
 		return 0;
 }
 
 static int
 nvme_probe (device_t device)
 {
 	struct _pcsid	*ep;
 	uint32_t	devid;
 	uint16_t	subdevice;
 
 	devid = pci_get_devid(device);
 	subdevice = pci_get_subdevice(device);
 	ep = pci_ids;
 
 	while (ep->devid) {
 		if (nvme_match(devid, subdevice, ep))
 			break;
 		++ep;
 	}
 
 	if (ep->desc) {
 		device_set_desc(device, ep->desc);
 		return (BUS_PROBE_DEFAULT);
 	}
 
 #if defined(PCIS_STORAGE_NVM)
 	if (pci_get_class(device)    == PCIC_STORAGE &&
 	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
 	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
 		device_set_desc(device, "Generic NVMe Device");
 		return (BUS_PROBE_GENERIC);
 	}
 #endif
 
 	return (ENXIO);
 }
 
 static void
 nvme_init(void)
 {
 	uint32_t	i;
 
 	nvme_request_zone = uma_zcreate("nvme_request",
 	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
 		nvme_consumer[i].id = INVALID_CONSUMER_ID;
 }
 
 SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
 
 static void
 nvme_uninit(void)
 {
 	uma_zdestroy(nvme_request_zone);
 }
 
 SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
 
 static void
 nvme_load(void)
 {
 }
 
 static void
 nvme_unload(void)
 {
 }
 
 static int
 nvme_shutdown(device_t dev)
 {
 	struct nvme_controller	*ctrlr;
 
 	ctrlr = DEVICE2SOFTC(dev);
 	nvme_ctrlr_shutdown(ctrlr);
 
 	return (0);
 }
 
 static int
 nvme_modevent(module_t mod, int type, void *arg)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		nvme_load();
 		break;
 	case MOD_UNLOAD:
 		nvme_unload();
 		break;
 	default:
 		break;
 	}
 
 	return (0);
 }
 
 void
 nvme_dump_command(struct nvme_command *cmd)
 {
 	uint8_t opc, fuse;
 
 	opc = (cmd->opc_fuse >> NVME_CMD_OPC_SHIFT) & NVME_CMD_OPC_MASK;
 	fuse = (cmd->opc_fuse >> NVME_CMD_FUSE_SHIFT) & NVME_CMD_FUSE_MASK;
 
 	printf(
 "opc:%x f:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
 	    opc, fuse, cmd->cid, le32toh(cmd->nsid),
 	    cmd->rsvd2, cmd->rsvd3,
 	    (uintmax_t)le64toh(cmd->mptr), (uintmax_t)le64toh(cmd->prp1), (uintmax_t)le64toh(cmd->prp2),
 	    le32toh(cmd->cdw10), le32toh(cmd->cdw11), le32toh(cmd->cdw12),
 	    le32toh(cmd->cdw13), le32toh(cmd->cdw14), le32toh(cmd->cdw15));
 }
 
 void
 nvme_dump_completion(struct nvme_completion *cpl)
 {
 	uint8_t p, sc, sct, m, dnr;
 	uint16_t status;
 
 	status = le16toh(cpl->status);
 
 	p = NVME_STATUS_GET_P(status);
 	sc = NVME_STATUS_GET_SC(status);
 	sct = NVME_STATUS_GET_SCT(status);
 	m = NVME_STATUS_GET_M(status);
 	dnr = NVME_STATUS_GET_DNR(status);
 
 	printf("cdw0:%08x sqhd:%04x sqid:%04x "
 	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
 	    le32toh(cpl->cdw0), le16toh(cpl->sqhd), le16toh(cpl->sqid),
 	    cpl->cid, p, sc, sct, m, dnr);
 }
 
 static int
 nvme_attach(device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 	int			status;
 	struct _pcsid		*ep;
 	uint32_t		devid;
 	uint16_t		subdevice;
 
 	devid = pci_get_devid(dev);
 	subdevice = pci_get_subdevice(dev);
 	ep = pci_ids;
 	while (ep->devid) {
 		if (nvme_match(devid, subdevice, ep))
 			break;
 		++ep;
 	}
 	ctrlr->quirks = ep->quirks;
 
 	status = nvme_ctrlr_construct(ctrlr, dev);
 
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
 	/*
 	 * Enable busmastering so the completion status messages can
 	 * be busmastered back to the host.
 	 */
 	pci_enable_busmaster(dev);
 
 	/*
 	 * Reset controller twice to ensure we do a transition from cc.en==1
 	 *  to cc.en==0.  This is because we don't really know what status
 	 *  the controller was left in when boot handed off to OS.
 	 */
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
 	ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook;
 	ctrlr->config_hook.ich_arg = ctrlr;
 
 	config_intrhook_establish(&ctrlr->config_hook);
 
 	return (0);
 }
 
 static int
 nvme_detach (device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 
 	nvme_ctrlr_destruct(ctrlr, dev);
 	pci_disable_busmaster(dev);
 	return (0);
 }
 
 static void
 nvme_notify(struct nvme_consumer *cons,
 	    struct nvme_controller *ctrlr)
 {
 	struct nvme_namespace	*ns;
 	void			*ctrlr_cookie;
 	int			cmpset, ns_idx;
 
 	/*
 	 * The consumer may register itself after the nvme devices
 	 *  have registered with the kernel, but before the
 	 *  driver has completed initialization.  In that case,
 	 *  return here, and when initialization completes, the
 	 *  controller will make sure the consumer gets notified.
 	 */
 	if (!ctrlr->is_initialized)
 		return;
 
 	cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1);
 
 	if (cmpset == 0)
 		return;
 
 	if (cons->ctrlr_fn != NULL)
 		ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
 	else
 		ctrlr_cookie = NULL;
 	ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
 	if (ctrlr->is_failed) {
 		if (cons->fail_fn != NULL)
 			(*cons->fail_fn)(ctrlr_cookie);
 		/*
 		 * Do not notify consumers about the namespaces of a
 		 *  failed controller.
 		 */
 		return;
 	}
 	for (ns_idx = 0; ns_idx < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); ns_idx++) {
 		ns = &ctrlr->ns[ns_idx];
 		if (ns->data.nsze == 0)
 			continue;
 		if (cons->ns_fn != NULL)
 			ns->cons_cookie[cons->id] =
 			    (*cons->ns_fn)(ns, ctrlr_cookie);
 	}
 }
 
 void
 nvme_notify_new_controller(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
 			nvme_notify(&nvme_consumer[i], ctrlr);
 		}
 	}
 }
 
 static void
 nvme_notify_new_consumer(struct nvme_consumer *cons)
 {
 	device_t		*devlist;
 	struct nvme_controller	*ctrlr;
 	int			dev_idx, devcount;
 
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
 		return;
 
 	for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
 		ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
 		nvme_notify(cons, ctrlr);
 	}
 
 	free(devlist, M_TEMP);
 }
 
 void
 nvme_notify_async_consumers(struct nvme_controller *ctrlr,
 			    const struct nvme_completion *async_cpl,
 			    uint32_t log_page_id, void *log_page_buffer,
 			    uint32_t log_page_size)
 {
 	struct nvme_consumer	*cons;
 	uint32_t		i;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		cons = &nvme_consumer[i];
 		if (cons->id != INVALID_CONSUMER_ID && cons->async_fn != NULL)
 			(*cons->async_fn)(ctrlr->cons_cookie[i], async_cpl,
 			    log_page_id, log_page_buffer, log_page_size);
 	}
 }
 
 void
 nvme_notify_fail_consumers(struct nvme_controller *ctrlr)
 {
 	struct nvme_consumer	*cons;
 	uint32_t		i;
 
 	/*
 	 * This controller failed during initialization (i.e. IDENTIFY
 	 *  command failed or timed out).  Do not notify any nvme
 	 *  consumers of the failure here, since the consumer does not
 	 *  even know about the controller yet.
 	 */
 	if (!ctrlr->is_initialized)
 		return;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		cons = &nvme_consumer[i];
 		if (cons->id != INVALID_CONSUMER_ID && cons->fail_fn != NULL)
 			cons->fail_fn(ctrlr->cons_cookie[i]);
 	}
 }
 
+void
+nvme_notify_ns(struct nvme_controller *ctrlr, int nsid)
+{
+	struct nvme_consumer	*cons;
+	struct nvme_namespace	*ns = &ctrlr->ns[nsid - 1];
+	uint32_t		i;
+
+	if (!ctrlr->is_initialized)
+		return;
+
+	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
+		cons = &nvme_consumer[i];
+		if (cons->id != INVALID_CONSUMER_ID && cons->ns_fn != NULL)
+			ns->cons_cookie[cons->id] =
+			    (*cons->ns_fn)(ns, ctrlr->cons_cookie[cons->id]);
+	}
+}
+
 struct nvme_consumer *
 nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
 		       nvme_cons_async_fn_t async_fn,
 		       nvme_cons_fail_fn_t fail_fn)
 {
 	int i;
 
 	/*
 	 * TODO: add locking around consumer registration.  Not an issue
 	 *  right now since we only have one nvme consumer - nvd(4).
 	 */
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
 		if (nvme_consumer[i].id == INVALID_CONSUMER_ID) {
 			nvme_consumer[i].id = i;
 			nvme_consumer[i].ns_fn = ns_fn;
 			nvme_consumer[i].ctrlr_fn = ctrlr_fn;
 			nvme_consumer[i].async_fn = async_fn;
 			nvme_consumer[i].fail_fn = fail_fn;
 
 			nvme_notify_new_consumer(&nvme_consumer[i]);
 			return (&nvme_consumer[i]);
 		}
 
 	printf("nvme(4): consumer not registered - no slots available\n");
 	return (NULL);
 }
 
 void
 nvme_unregister_consumer(struct nvme_consumer *consumer)
 {
 
 	consumer->id = INVALID_CONSUMER_ID;
 }
 
 void
 nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_completion_poll_status	*status = arg;
 
 	/*
 	 * Copy status into the argument passed by the caller, so that
 	 *  the caller can check the status to determine if the
 	 *  the request passed or failed.
 	 */
 	memcpy(&status->cpl, cpl, sizeof(*cpl));
 	atomic_store_rel_int(&status->done, 1);
 }
Index: head/sys/dev/nvme/nvme.h
===================================================================
--- head/sys/dev/nvme/nvme.h	(revision 334199)
+++ head/sys/dev/nvme/nvme.h	(revision 334200)
@@ -1,1487 +1,1502 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2013 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __NVME_H__
 #define __NVME_H__
 
 #ifdef _KERNEL
 #include <sys/types.h>
 #endif
 
 #include <sys/param.h>
 #include <sys/endian.h>
 
 #define	NVME_PASSTHROUGH_CMD		_IOWR('n', 0, struct nvme_pt_command)
 #define	NVME_RESET_CONTROLLER		_IO('n', 1)
 
 #define	NVME_IO_TEST			_IOWR('n', 100, struct nvme_io_test)
 #define	NVME_BIO_TEST			_IOWR('n', 101, struct nvme_io_test)
 
 /*
  * Macros to deal with NVME revisions, as defined VS register
  */
 #define NVME_REV(x, y)			(((x) << 16) | ((y) << 8))
 #define NVME_MAJOR(r)			(((r) >> 16) & 0xffff)
 #define NVME_MINOR(r)			(((r) >> 8) & 0xff)
 
 /*
  * Use to mark a command to apply to all namespaces, or to retrieve global
  *  log pages.
  */
 #define NVME_GLOBAL_NAMESPACE_TAG	((uint32_t)0xFFFFFFFF)
 
 /* Cap nvme to 1MB transfers driver explodes with larger sizes */
 #define NVME_MAX_XFER_SIZE		(MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
 
 /* Register field definitions */
 #define NVME_CAP_LO_REG_MQES_SHIFT			(0)
 #define NVME_CAP_LO_REG_MQES_MASK			(0xFFFF)
 #define NVME_CAP_LO_REG_CQR_SHIFT			(16)
 #define NVME_CAP_LO_REG_CQR_MASK			(0x1)
 #define NVME_CAP_LO_REG_AMS_SHIFT			(17)
 #define NVME_CAP_LO_REG_AMS_MASK			(0x3)
 #define NVME_CAP_LO_REG_TO_SHIFT			(24)
 #define NVME_CAP_LO_REG_TO_MASK				(0xFF)
 
 #define NVME_CAP_HI_REG_DSTRD_SHIFT			(0)
 #define NVME_CAP_HI_REG_DSTRD_MASK			(0xF)
 #define NVME_CAP_HI_REG_CSS_NVM_SHIFT			(5)
 #define NVME_CAP_HI_REG_CSS_NVM_MASK			(0x1)
 #define NVME_CAP_HI_REG_MPSMIN_SHIFT			(16)
 #define NVME_CAP_HI_REG_MPSMIN_MASK			(0xF)
 #define NVME_CAP_HI_REG_MPSMAX_SHIFT			(20)
 #define NVME_CAP_HI_REG_MPSMAX_MASK			(0xF)
 
 #define NVME_CC_REG_EN_SHIFT				(0)
 #define NVME_CC_REG_EN_MASK				(0x1)
 #define NVME_CC_REG_CSS_SHIFT				(4)
 #define NVME_CC_REG_CSS_MASK				(0x7)
 #define NVME_CC_REG_MPS_SHIFT				(7)
 #define NVME_CC_REG_MPS_MASK				(0xF)
 #define NVME_CC_REG_AMS_SHIFT				(11)
 #define NVME_CC_REG_AMS_MASK				(0x7)
 #define NVME_CC_REG_SHN_SHIFT				(14)
 #define NVME_CC_REG_SHN_MASK				(0x3)
 #define NVME_CC_REG_IOSQES_SHIFT			(16)
 #define NVME_CC_REG_IOSQES_MASK				(0xF)
 #define NVME_CC_REG_IOCQES_SHIFT			(20)
 #define NVME_CC_REG_IOCQES_MASK				(0xF)
 
 #define NVME_CSTS_REG_RDY_SHIFT				(0)
 #define NVME_CSTS_REG_RDY_MASK				(0x1)
 #define NVME_CSTS_REG_CFS_SHIFT				(1)
 #define NVME_CSTS_REG_CFS_MASK				(0x1)
 #define NVME_CSTS_REG_SHST_SHIFT			(2)
 #define NVME_CSTS_REG_SHST_MASK				(0x3)
 
 #define NVME_CSTS_GET_SHST(csts)			(((csts) >> NVME_CSTS_REG_SHST_SHIFT) & NVME_CSTS_REG_SHST_MASK)
 
 #define NVME_AQA_REG_ASQS_SHIFT				(0)
 #define NVME_AQA_REG_ASQS_MASK				(0xFFF)
 #define NVME_AQA_REG_ACQS_SHIFT				(16)
 #define NVME_AQA_REG_ACQS_MASK				(0xFFF)
 
 /* Command field definitions */
 
 #define NVME_CMD_OPC_SHIFT				(0)
 #define NVME_CMD_OPC_MASK				(0xFF)
 #define NVME_CMD_FUSE_SHIFT				(8)
 #define NVME_CMD_FUSE_MASK				(0x3)
 
-#define NVME_CMD_SET_OPC(opc)				(htole16(((opc) & NVME_CMD_OPC_MASK) << NVME_CMD_OPC_SHIFT))
+#define NVME_CMD_SET_OPC(opc)				(htole16(((uint16_t)(opc) & NVME_CMD_OPC_MASK) << NVME_CMD_OPC_SHIFT))
 
 #define NVME_STATUS_P_SHIFT				(0)
 #define NVME_STATUS_P_MASK				(0x1)
 #define NVME_STATUS_SC_SHIFT				(1)
 #define NVME_STATUS_SC_MASK				(0xFF)
 #define NVME_STATUS_SCT_SHIFT				(9)
 #define NVME_STATUS_SCT_MASK				(0x7)
 #define NVME_STATUS_M_SHIFT				(14)
 #define NVME_STATUS_M_MASK				(0x1)
 #define NVME_STATUS_DNR_SHIFT				(15)
 #define NVME_STATUS_DNR_MASK				(0x1)
 
 #define NVME_STATUS_GET_P(st)				(((st) >> NVME_STATUS_P_SHIFT) & NVME_STATUS_P_MASK)
 #define NVME_STATUS_GET_SC(st)				(((st) >> NVME_STATUS_SC_SHIFT) & NVME_STATUS_SC_MASK)
 #define NVME_STATUS_GET_SCT(st)				(((st) >> NVME_STATUS_SCT_SHIFT) & NVME_STATUS_SCT_MASK)
 #define NVME_STATUS_GET_M(st)				(((st) >> NVME_STATUS_M_SHIFT) & NVME_STATUS_M_MASK)
 #define NVME_STATUS_GET_DNR(st)				(((st) >> NVME_STATUS_DNR_SHIFT) & NVME_STATUS_DNR_MASK)
 
 #define NVME_PWR_ST_MPS_SHIFT				(0)
 #define NVME_PWR_ST_MPS_MASK				(0x1)
 #define NVME_PWR_ST_NOPS_SHIFT				(1)
 #define NVME_PWR_ST_NOPS_MASK				(0x1)
 #define NVME_PWR_ST_RRT_SHIFT				(0)
 #define NVME_PWR_ST_RRT_MASK				(0x1F)
 #define NVME_PWR_ST_RRL_SHIFT				(0)
 #define NVME_PWR_ST_RRL_MASK				(0x1F)
 #define NVME_PWR_ST_RWT_SHIFT				(0)
 #define NVME_PWR_ST_RWT_MASK				(0x1F)
 #define NVME_PWR_ST_RWL_SHIFT				(0)
 #define NVME_PWR_ST_RWL_MASK				(0x1F)
 #define NVME_PWR_ST_IPS_SHIFT				(6)
 #define NVME_PWR_ST_IPS_MASK				(0x3)
 #define NVME_PWR_ST_APW_SHIFT				(0)
 #define NVME_PWR_ST_APW_MASK				(0x7)
 #define NVME_PWR_ST_APS_SHIFT				(6)
 #define NVME_PWR_ST_APS_MASK				(0x3)
 
 /** Controller Multi-path I/O and Namespace Sharing Capabilities */
 /* More then one port */
 #define NVME_CTRLR_DATA_MIC_MPORTS_SHIFT		(0)
 #define NVME_CTRLR_DATA_MIC_MPORTS_MASK			(0x1)
 /* More then one controller */
 #define NVME_CTRLR_DATA_MIC_MCTRLRS_SHIFT		(1)
 #define NVME_CTRLR_DATA_MIC_MCTRLRS_MASK		(0x1)
 /* SR-IOV Virtual Function */
 #define NVME_CTRLR_DATA_MIC_SRIOVVF_SHIFT		(2)
 #define NVME_CTRLR_DATA_MIC_SRIOVVF_MASK		(0x1)
 
 /** OACS - optional admin command support */
 /* supports security send/receive commands */
 #define NVME_CTRLR_DATA_OACS_SECURITY_SHIFT		(0)
 #define NVME_CTRLR_DATA_OACS_SECURITY_MASK		(0x1)
 /* supports format nvm command */
 #define NVME_CTRLR_DATA_OACS_FORMAT_SHIFT		(1)
 #define NVME_CTRLR_DATA_OACS_FORMAT_MASK		(0x1)
 /* supports firmware activate/download commands */
 #define NVME_CTRLR_DATA_OACS_FIRMWARE_SHIFT		(2)
 #define NVME_CTRLR_DATA_OACS_FIRMWARE_MASK		(0x1)
 /* supports namespace management commands */
 #define NVME_CTRLR_DATA_OACS_NSMGMT_SHIFT		(3)
 #define NVME_CTRLR_DATA_OACS_NSMGMT_MASK		(0x1)
 /* supports Device Self-test command */
 #define NVME_CTRLR_DATA_OACS_SELFTEST_SHIFT		(4)
 #define NVME_CTRLR_DATA_OACS_SELFTEST_MASK		(0x1)
 /* supports Directives */
 #define NVME_CTRLR_DATA_OACS_DIRECTIVES_SHIFT		(5)
 #define NVME_CTRLR_DATA_OACS_DIRECTIVES_MASK		(0x1)
 /* supports NVMe-MI Send/Receive */
 #define NVME_CTRLR_DATA_OACS_NVMEMI_SHIFT		(6)
 #define NVME_CTRLR_DATA_OACS_NVMEMI_MASK		(0x1)
 /* supports Virtualization Management */
 #define NVME_CTRLR_DATA_OACS_VM_SHIFT			(7)
 #define NVME_CTRLR_DATA_OACS_VM_MASK			(0x1)
 /* supports Doorbell Buffer Config */
 #define NVME_CTRLR_DATA_OACS_DBBUFFER_SHIFT		(8)
 #define NVME_CTRLR_DATA_OACS_DBBUFFER_MASK		(0x1)
 
 /** firmware updates */
 /* first slot is read-only */
 #define NVME_CTRLR_DATA_FRMW_SLOT1_RO_SHIFT		(0)
 #define NVME_CTRLR_DATA_FRMW_SLOT1_RO_MASK		(0x1)
 /* number of firmware slots */
 #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_SHIFT		(1)
 #define NVME_CTRLR_DATA_FRMW_NUM_SLOTS_MASK		(0x7)
 
 /** log page attributes */
 /* per namespace smart/health log page */
 #define NVME_CTRLR_DATA_LPA_NS_SMART_SHIFT		(0)
 #define NVME_CTRLR_DATA_LPA_NS_SMART_MASK		(0x1)
 
 /** AVSCC - admin vendor specific command configuration */
 /* admin vendor specific commands use spec format */
 #define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_SHIFT		(0)
 #define NVME_CTRLR_DATA_AVSCC_SPEC_FORMAT_MASK		(0x1)
 
 /** Autonomous Power State Transition Attributes */
 /* Autonomous Power State Transitions supported */
 #define NVME_CTRLR_DATA_APSTA_APST_SUPP_SHIFT		(0)
 #define NVME_CTRLR_DATA_APSTA_APST_SUPP_MASK		(0x1)
 
 /** submission queue entry size */
 #define NVME_CTRLR_DATA_SQES_MIN_SHIFT			(0)
 #define NVME_CTRLR_DATA_SQES_MIN_MASK			(0xF)
 #define NVME_CTRLR_DATA_SQES_MAX_SHIFT			(4)
 #define NVME_CTRLR_DATA_SQES_MAX_MASK			(0xF)
 
 /** completion queue entry size */
 #define NVME_CTRLR_DATA_CQES_MIN_SHIFT			(0)
 #define NVME_CTRLR_DATA_CQES_MIN_MASK			(0xF)
 #define NVME_CTRLR_DATA_CQES_MAX_SHIFT			(4)
 #define NVME_CTRLR_DATA_CQES_MAX_MASK			(0xF)
 
 /** optional nvm command support */
 #define NVME_CTRLR_DATA_ONCS_COMPARE_SHIFT		(0)
 #define NVME_CTRLR_DATA_ONCS_COMPARE_MASK		(0x1)
 #define NVME_CTRLR_DATA_ONCS_WRITE_UNC_SHIFT		(1)
 #define NVME_CTRLR_DATA_ONCS_WRITE_UNC_MASK		(0x1)
 #define NVME_CTRLR_DATA_ONCS_DSM_SHIFT			(2)
 #define NVME_CTRLR_DATA_ONCS_DSM_MASK			(0x1)
 #define NVME_CTRLR_DATA_ONCS_WRZERO_SHIFT		(3)
 #define NVME_CTRLR_DATA_ONCS_WRZERO_MASK		(0x1)
 #define NVME_CTRLR_DATA_ONCS_SAVEFEAT_SHIFT		(4)
 #define NVME_CTRLR_DATA_ONCS_SAVEFEAT_MASK		(0x1)
 #define NVME_CTRLR_DATA_ONCS_RESERV_SHIFT		(5)
 #define NVME_CTRLR_DATA_ONCS_RESERV_MASK		(0x1)
 #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_SHIFT		(6)
 #define NVME_CTRLR_DATA_ONCS_TIMESTAMP_MASK		(0x1)
 
 /** Fused Operation Support */
 #define NVME_CTRLR_DATA_FUSES_CNW_SHIFT		(0)
 #define NVME_CTRLR_DATA_FUSES_CNW_MASK		(0x1)
 
 /** Format NVM Attributes */
 #define NVME_CTRLR_DATA_FNA_FORMAT_ALL_SHIFT		(0)
 #define NVME_CTRLR_DATA_FNA_FORMAT_ALL_MASK		(0x1)
 #define NVME_CTRLR_DATA_FNA_ERASE_ALL_SHIFT		(1)
 #define NVME_CTRLR_DATA_FNA_ERASE_ALL_MASK		(0x1)
 #define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_SHIFT		(2)
 #define NVME_CTRLR_DATA_FNA_CRYPTO_ERASE_MASK		(0x1)
 
 /** volatile write cache */
 #define NVME_CTRLR_DATA_VWC_PRESENT_SHIFT		(0)
 #define NVME_CTRLR_DATA_VWC_PRESENT_MASK		(0x1)
 
 /** namespace features */
 /* thin provisioning */
 #define NVME_NS_DATA_NSFEAT_THIN_PROV_SHIFT		(0)
 #define NVME_NS_DATA_NSFEAT_THIN_PROV_MASK		(0x1)
 /* NAWUN, NAWUPF, and NACWU fields are valid */
 #define NVME_NS_DATA_NSFEAT_NA_FIELDS_SHIFT		(1)
 #define NVME_NS_DATA_NSFEAT_NA_FIELDS_MASK		(0x1)
 /* Deallocated or Unwritten Logical Block errors supported */
 #define NVME_NS_DATA_NSFEAT_DEALLOC_SHIFT		(2)
 #define NVME_NS_DATA_NSFEAT_DEALLOC_MASK		(0x1)
 /* NGUID and EUI64 fields are not reusable */
 #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_SHIFT		(3)
 #define NVME_NS_DATA_NSFEAT_NO_ID_REUSE_MASK		(0x1)
 
 /** formatted lba size */
 #define NVME_NS_DATA_FLBAS_FORMAT_SHIFT			(0)
 #define NVME_NS_DATA_FLBAS_FORMAT_MASK			(0xF)
 #define NVME_NS_DATA_FLBAS_EXTENDED_SHIFT		(4)
 #define NVME_NS_DATA_FLBAS_EXTENDED_MASK		(0x1)
 
 /** metadata capabilities */
 /* metadata can be transferred as part of data prp list */
 #define NVME_NS_DATA_MC_EXTENDED_SHIFT			(0)
 #define NVME_NS_DATA_MC_EXTENDED_MASK			(0x1)
 /* metadata can be transferred with separate metadata pointer */
 #define NVME_NS_DATA_MC_POINTER_SHIFT			(1)
 #define NVME_NS_DATA_MC_POINTER_MASK			(0x1)
 
 /** end-to-end data protection capabilities */
 /* protection information type 1 */
 #define NVME_NS_DATA_DPC_PIT1_SHIFT			(0)
 #define NVME_NS_DATA_DPC_PIT1_MASK			(0x1)
 /* protection information type 2 */
 #define NVME_NS_DATA_DPC_PIT2_SHIFT			(1)
 #define NVME_NS_DATA_DPC_PIT2_MASK			(0x1)
 /* protection information type 3 */
 #define NVME_NS_DATA_DPC_PIT3_SHIFT			(2)
 #define NVME_NS_DATA_DPC_PIT3_MASK			(0x1)
 /* first eight bytes of metadata */
 #define NVME_NS_DATA_DPC_MD_START_SHIFT			(3)
 #define NVME_NS_DATA_DPC_MD_START_MASK			(0x1)
 /* last eight bytes of metadata */
 #define NVME_NS_DATA_DPC_MD_END_SHIFT			(4)
 #define NVME_NS_DATA_DPC_MD_END_MASK			(0x1)
 
 /** end-to-end data protection type settings */
 /* protection information type */
 #define NVME_NS_DATA_DPS_PIT_SHIFT			(0)
 #define NVME_NS_DATA_DPS_PIT_MASK			(0x7)
 /* 1 == protection info transferred at start of metadata */
 /* 0 == protection info transferred at end of metadata */
 #define NVME_NS_DATA_DPS_MD_START_SHIFT			(3)
 #define NVME_NS_DATA_DPS_MD_START_MASK			(0x1)
 
 /** Namespace Multi-path I/O and Namespace Sharing Capabilities */
 /* the namespace may be attached to two or more controllers */
 #define NVME_NS_DATA_NMIC_MAY_BE_SHARED_SHIFT		(0)
 #define NVME_NS_DATA_NMIC_MAY_BE_SHARED_MASK		(0x1)
 
 /** Reservation Capabilities */
 /* Persist Through Power Loss */
 #define NVME_NS_DATA_RESCAP_PTPL_SHIFT		(0)
 #define NVME_NS_DATA_RESCAP_PTPL_MASK		(0x1)
 /* supports the Write Exclusive */
 #define NVME_NS_DATA_RESCAP_WR_EX_SHIFT		(1)
 #define NVME_NS_DATA_RESCAP_WR_EX_MASK		(0x1)
 /* supports the Exclusive Access */
 #define NVME_NS_DATA_RESCAP_EX_AC_SHIFT		(2)
 #define NVME_NS_DATA_RESCAP_EX_AC_MASK		(0x1)
 /* supports the Write Exclusive – Registrants Only */
 #define NVME_NS_DATA_RESCAP_WR_EX_RO_SHIFT	(3)
 #define NVME_NS_DATA_RESCAP_WR_EX_RO_MASK	(0x1)
 /* supports the Exclusive Access - Registrants Only */
 #define NVME_NS_DATA_RESCAP_EX_AC_RO_SHIFT	(4)
 #define NVME_NS_DATA_RESCAP_EX_AC_RO_MASK	(0x1)
 /* supports the Write Exclusive – All Registrants */
 #define NVME_NS_DATA_RESCAP_WR_EX_AR_SHIFT	(5)
 #define NVME_NS_DATA_RESCAP_WR_EX_AR_MASK	(0x1)
 /* supports the Exclusive Access - All Registrants */
 #define NVME_NS_DATA_RESCAP_EX_AC_AR_SHIFT	(6)
 #define NVME_NS_DATA_RESCAP_EX_AC_AR_MASK	(0x1)
 /* Ignore Existing Key is used as defined in revision 1.3 or later */
 #define NVME_NS_DATA_RESCAP_IEKEY13_SHIFT	(7)
 #define NVME_NS_DATA_RESCAP_IEKEY13_MASK	(0x1)
 
 /** Format Progress Indicator */
 /* percentage of the Format NVM command that remains to be completed */
 #define NVME_NS_DATA_FPI_PERC_SHIFT		(0)
 #define NVME_NS_DATA_FPI_PERC_MASK		(0x7f)
 /* namespace supports the Format Progress Indicator */
 #define NVME_NS_DATA_FPI_SUPP_SHIFT		(7)
 #define NVME_NS_DATA_FPI_SUPP_MASK		(0x1)
 
 /** lba format support */
 /* metadata size */
 #define NVME_NS_DATA_LBAF_MS_SHIFT			(0)
 #define NVME_NS_DATA_LBAF_MS_MASK			(0xFFFF)
 /* lba data size */
 #define NVME_NS_DATA_LBAF_LBADS_SHIFT			(16)
 #define NVME_NS_DATA_LBAF_LBADS_MASK			(0xFF)
 /* relative performance */
 #define NVME_NS_DATA_LBAF_RP_SHIFT			(24)
 #define NVME_NS_DATA_LBAF_RP_MASK			(0x3)
 
 enum nvme_critical_warning_state {
 	NVME_CRIT_WARN_ST_AVAILABLE_SPARE		= 0x1,
 	NVME_CRIT_WARN_ST_TEMPERATURE			= 0x2,
 	NVME_CRIT_WARN_ST_DEVICE_RELIABILITY		= 0x4,
 	NVME_CRIT_WARN_ST_READ_ONLY			= 0x8,
 	NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP	= 0x10,
 };
 #define NVME_CRIT_WARN_ST_RESERVED_MASK			(0xE0)
 
 /* slot for current FW */
 #define NVME_FIRMWARE_PAGE_AFI_SLOT_SHIFT		(0)
 #define NVME_FIRMWARE_PAGE_AFI_SLOT_MASK		(0x7)
 
 /* CC register SHN field values */
 enum shn_value {
 	NVME_SHN_NORMAL		= 0x1,
 	NVME_SHN_ABRUPT		= 0x2,
 };
 
 /* CSTS register SHST field values */
 enum shst_value {
 	NVME_SHST_NORMAL	= 0x0,
 	NVME_SHST_OCCURRING	= 0x1,
 	NVME_SHST_COMPLETE	= 0x2,
 };
 
 struct nvme_registers
 {
 	/** controller capabilities */
 	uint32_t		cap_lo;
 	uint32_t		cap_hi;
 
 	uint32_t		vs;	/* version */
 	uint32_t		intms;	/* interrupt mask set */
 	uint32_t		intmc;	/* interrupt mask clear */
 
 	/** controller configuration */
 	uint32_t		cc;
 
 	uint32_t		reserved1;
 
 	/** controller status */
 	uint32_t		csts;
 
 	uint32_t		reserved2;
 
 	/** admin queue attributes */
 	uint32_t		aqa;
 
 	uint64_t		asq;	/* admin submission queue base addr */
 	uint64_t		acq;	/* admin completion queue base addr */
 	uint32_t		reserved3[0x3f2];
 
 	struct {
 	    uint32_t		sq_tdbl; /* submission queue tail doorbell */
 	    uint32_t		cq_hdbl; /* completion queue head doorbell */
 	} doorbell[1] __packed;
 } __packed;
 
 _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers");
 
 struct nvme_command
 {
 	/* dword 0 */
 	uint16_t opc_fuse;	/* opcode, fused operation */
 	uint16_t cid;		/* command identifier */
 
 	/* dword 1 */
 	uint32_t nsid;		/* namespace identifier */
 
 	/* dword 2-3 */
 	uint32_t rsvd2;
 	uint32_t rsvd3;
 
 	/* dword 4-5 */
 	uint64_t mptr;		/* metadata pointer */
 
 	/* dword 6-7 */
 	uint64_t prp1;		/* prp entry 1 */
 
 	/* dword 8-9 */
 	uint64_t prp2;		/* prp entry 2 */
 
 	/* dword 10-15 */
 	uint32_t cdw10;		/* command-specific */
 	uint32_t cdw11;		/* command-specific */
 	uint32_t cdw12;		/* command-specific */
 	uint32_t cdw13;		/* command-specific */
 	uint32_t cdw14;		/* command-specific */
 	uint32_t cdw15;		/* command-specific */
 } __packed;
 
 _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command");
 
 struct nvme_completion {
 
 	/* dword 0 */
 	uint32_t		cdw0;	/* command-specific */
 
 	/* dword 1 */
 	uint32_t		rsvd1;
 
 	/* dword 2 */
 	uint16_t		sqhd;	/* submission queue head pointer */
 	uint16_t		sqid;	/* submission queue identifier */
 
 	/* dword 3 */
 	uint16_t		cid;	/* command identifier */
 	uint16_t		status;
 } __packed;
 
 _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion");
 
 struct nvme_dsm_range {
 	uint32_t attributes;
 	uint32_t length;
 	uint64_t starting_lba;
 } __packed;
 
 /* Largest DSM Trim that can be done */
 #define NVME_MAX_DSM_TRIM		4096
 
 _Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage");
 
 /* status code types */
 enum nvme_status_code_type {
 	NVME_SCT_GENERIC		= 0x0,
 	NVME_SCT_COMMAND_SPECIFIC	= 0x1,
 	NVME_SCT_MEDIA_ERROR		= 0x2,
 	/* 0x3-0x6 - reserved */
 	NVME_SCT_VENDOR_SPECIFIC	= 0x7,
 };
 
 /* generic command status codes */
 enum nvme_generic_command_status_code {
 	NVME_SC_SUCCESS				= 0x00,
 	NVME_SC_INVALID_OPCODE			= 0x01,
 	NVME_SC_INVALID_FIELD			= 0x02,
 	NVME_SC_COMMAND_ID_CONFLICT		= 0x03,
 	NVME_SC_DATA_TRANSFER_ERROR		= 0x04,
 	NVME_SC_ABORTED_POWER_LOSS		= 0x05,
 	NVME_SC_INTERNAL_DEVICE_ERROR		= 0x06,
 	NVME_SC_ABORTED_BY_REQUEST		= 0x07,
 	NVME_SC_ABORTED_SQ_DELETION		= 0x08,
 	NVME_SC_ABORTED_FAILED_FUSED		= 0x09,
 	NVME_SC_ABORTED_MISSING_FUSED		= 0x0a,
 	NVME_SC_INVALID_NAMESPACE_OR_FORMAT	= 0x0b,
 	NVME_SC_COMMAND_SEQUENCE_ERROR		= 0x0c,
 	NVME_SC_INVALID_SGL_SEGMENT_DESCR	= 0x0d,
 	NVME_SC_INVALID_NUMBER_OF_SGL_DESCR	= 0x0e,
 	NVME_SC_DATA_SGL_LENGTH_INVALID		= 0x0f,
 	NVME_SC_METADATA_SGL_LENGTH_INVALID	= 0x10,
 	NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID	= 0x11,
 	NVME_SC_INVALID_USE_OF_CMB		= 0x12,
 	NVME_SC_PRP_OFFET_INVALID		= 0x13,
 	NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED	= 0x14,
 	NVME_SC_OPERATION_DENIED		= 0x15,
 	NVME_SC_SGL_OFFSET_INVALID		= 0x16,
 	/* 0x17 - reserved */
 	NVME_SC_HOST_ID_INCONSISTENT_FORMAT	= 0x18,
 	NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED	= 0x19,
 	NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID	= 0x1a,
 	NVME_SC_ABORTED_DUE_TO_PREEMPT		= 0x1b,
 	NVME_SC_SANITIZE_FAILED			= 0x1c,
 	NVME_SC_SANITIZE_IN_PROGRESS		= 0x1d,
 	NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID	= 0x1e,
 	NVME_SC_NOT_SUPPORTED_IN_CMB		= 0x1f,
 
 	NVME_SC_LBA_OUT_OF_RANGE		= 0x80,
 	NVME_SC_CAPACITY_EXCEEDED		= 0x81,
 	NVME_SC_NAMESPACE_NOT_READY		= 0x82,
 	NVME_SC_RESERVATION_CONFLICT		= 0x83,
 	NVME_SC_FORMAT_IN_PROGRESS		= 0x84,
 };
 
 /* command specific status codes */
 enum nvme_command_specific_status_code {
 	NVME_SC_COMPLETION_QUEUE_INVALID	= 0x00,
 	NVME_SC_INVALID_QUEUE_IDENTIFIER	= 0x01,
 	NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED	= 0x02,
 	NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED	= 0x03,
 	/* 0x04 - reserved */
 	NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
 	NVME_SC_INVALID_FIRMWARE_SLOT		= 0x06,
 	NVME_SC_INVALID_FIRMWARE_IMAGE		= 0x07,
 	NVME_SC_INVALID_INTERRUPT_VECTOR	= 0x08,
 	NVME_SC_INVALID_LOG_PAGE		= 0x09,
 	NVME_SC_INVALID_FORMAT			= 0x0a,
 	NVME_SC_FIRMWARE_REQUIRES_RESET		= 0x0b,
 	NVME_SC_INVALID_QUEUE_DELETION		= 0x0c,
 	NVME_SC_FEATURE_NOT_SAVEABLE		= 0x0d,
 	NVME_SC_FEATURE_NOT_CHANGEABLE		= 0x0e,
 	NVME_SC_FEATURE_NOT_NS_SPECIFIC		= 0x0f,
 	NVME_SC_FW_ACT_REQUIRES_NVMS_RESET	= 0x10,
 	NVME_SC_FW_ACT_REQUIRES_RESET		= 0x11,
 	NVME_SC_FW_ACT_REQUIRES_TIME		= 0x12,
 	NVME_SC_FW_ACT_PROHIBITED		= 0x13,
 	NVME_SC_OVERLAPPING_RANGE		= 0x14,
 	NVME_SC_NS_INSUFFICIENT_CAPACITY	= 0x15,
 	NVME_SC_NS_ID_UNAVAILABLE		= 0x16,
 	/* 0x17 - reserved */
 	NVME_SC_NS_ALREADY_ATTACHED		= 0x18,
 	NVME_SC_NS_IS_PRIVATE			= 0x19,
 	NVME_SC_NS_NOT_ATTACHED			= 0x1a,
 	NVME_SC_THIN_PROV_NOT_SUPPORTED		= 0x1b,
 	NVME_SC_CTRLR_LIST_INVALID		= 0x1c,
 	NVME_SC_SELT_TEST_IN_PROGRESS		= 0x1d,
 	NVME_SC_BOOT_PART_WRITE_PROHIB		= 0x1e,
 	NVME_SC_INVALID_CTRLR_ID		= 0x1f,
 	NVME_SC_INVALID_SEC_CTRLR_STATE		= 0x20,
 	NVME_SC_INVALID_NUM_OF_CTRLR_RESRC	= 0x21,
 	NVME_SC_INVALID_RESOURCE_ID		= 0x22,
 
 	NVME_SC_CONFLICTING_ATTRIBUTES		= 0x80,
 	NVME_SC_INVALID_PROTECTION_INFO		= 0x81,
 	NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE	= 0x82,
 };
 
 /* media error status codes */
 enum nvme_media_error_status_code {
 	NVME_SC_WRITE_FAULTS			= 0x80,
 	NVME_SC_UNRECOVERED_READ_ERROR		= 0x81,
 	NVME_SC_GUARD_CHECK_ERROR		= 0x82,
 	NVME_SC_APPLICATION_TAG_CHECK_ERROR	= 0x83,
 	NVME_SC_REFERENCE_TAG_CHECK_ERROR	= 0x84,
 	NVME_SC_COMPARE_FAILURE			= 0x85,
 	NVME_SC_ACCESS_DENIED			= 0x86,
 	NVME_SC_DEALLOCATED_OR_UNWRITTEN	= 0x87,
 };
 
 /* admin opcodes */
 enum nvme_admin_opcode {
 	NVME_OPC_DELETE_IO_SQ			= 0x00,
 	NVME_OPC_CREATE_IO_SQ			= 0x01,
 	NVME_OPC_GET_LOG_PAGE			= 0x02,
 	/* 0x03 - reserved */
 	NVME_OPC_DELETE_IO_CQ			= 0x04,
 	NVME_OPC_CREATE_IO_CQ			= 0x05,
 	NVME_OPC_IDENTIFY			= 0x06,
 	/* 0x07 - reserved */
 	NVME_OPC_ABORT				= 0x08,
 	NVME_OPC_SET_FEATURES			= 0x09,
 	NVME_OPC_GET_FEATURES			= 0x0a,
 	/* 0x0b - reserved */
 	NVME_OPC_ASYNC_EVENT_REQUEST		= 0x0c,
 	NVME_OPC_NAMESPACE_MANAGEMENT		= 0x0d,
 	/* 0x0e-0x0f - reserved */
 	NVME_OPC_FIRMWARE_ACTIVATE		= 0x10,
 	NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD	= 0x11,
 	NVME_OPC_DEVICE_SELF_TEST		= 0x14,
 	NVME_OPC_NAMESPACE_ATTACHMENT		= 0x15,
 	NVME_OPC_KEEP_ALIVE			= 0x18,
 	NVME_OPC_DIRECTIVE_SEND			= 0x19,
 	NVME_OPC_DIRECTIVE_RECEIVE		= 0x1a,
 	NVME_OPC_VIRTUALIZATION_MANAGEMENT	= 0x1c,
 	NVME_OPC_NVME_MI_SEND			= 0x1d,
 	NVME_OPC_NVME_MI_RECEIVE		= 0x1e,
 	NVME_OPC_DOORBELL_BUFFER_CONFIG		= 0x7c,
 
 	NVME_OPC_FORMAT_NVM			= 0x80,
 	NVME_OPC_SECURITY_SEND			= 0x81,
 	NVME_OPC_SECURITY_RECEIVE		= 0x82,
 	NVME_OPC_SANITIZE			= 0x84,
 };
 
 /* nvme nvm opcodes */
 enum nvme_nvm_opcode {
 	NVME_OPC_FLUSH				= 0x00,
 	NVME_OPC_WRITE				= 0x01,
 	NVME_OPC_READ				= 0x02,
 	/* 0x03 - reserved */
 	NVME_OPC_WRITE_UNCORRECTABLE		= 0x04,
 	NVME_OPC_COMPARE			= 0x05,
 	/* 0x06 - reserved */
 	NVME_OPC_WRITE_ZEROES			= 0x08,
 	/* 0x07 - reserved */
 	NVME_OPC_DATASET_MANAGEMENT		= 0x09,
 	/* 0x0a-0x0c - reserved */
 	NVME_OPC_RESERVATION_REGISTER		= 0x0d,
 	NVME_OPC_RESERVATION_REPORT		= 0x0e,
 	/* 0x0f-0x10 - reserved */
 	NVME_OPC_RESERVATION_ACQUIRE		= 0x11,
 	/* 0x12-0x14 - reserved */
 	NVME_OPC_RESERVATION_RELEASE		= 0x15,
 };
 
 enum nvme_feature {
 	/* 0x00 - reserved */
 	NVME_FEAT_ARBITRATION			= 0x01,
 	NVME_FEAT_POWER_MANAGEMENT		= 0x02,
 	NVME_FEAT_LBA_RANGE_TYPE		= 0x03,
 	NVME_FEAT_TEMPERATURE_THRESHOLD		= 0x04,
 	NVME_FEAT_ERROR_RECOVERY		= 0x05,
 	NVME_FEAT_VOLATILE_WRITE_CACHE		= 0x06,
 	NVME_FEAT_NUMBER_OF_QUEUES		= 0x07,
 	NVME_FEAT_INTERRUPT_COALESCING		= 0x08,
 	NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
 	NVME_FEAT_WRITE_ATOMICITY		= 0x0A,
 	NVME_FEAT_ASYNC_EVENT_CONFIGURATION	= 0x0B,
 	NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C,
 	NVME_FEAT_HOST_MEMORY_BUFFER		= 0x0D,
 	NVME_FEAT_TIMESTAMP			= 0x0E,
 	NVME_FEAT_KEEP_ALIVE_TIMER		= 0x0F,
 	NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT	= 0x10,
 	NVME_FEAT_NON_OP_POWER_STATE_CONFIG	= 0x11,
 	/* 0x12-0x77 - reserved */
 	/* 0x78-0x7f - NVMe Management Interface */
 	NVME_FEAT_SOFTWARE_PROGRESS_MARKER	= 0x80,
 	/* 0x81-0xBF - command set specific (reserved) */
 	/* 0xC0-0xFF - vendor specific */
 };
 
 enum nvme_dsm_attribute {
 	NVME_DSM_ATTR_INTEGRAL_READ		= 0x1,
 	NVME_DSM_ATTR_INTEGRAL_WRITE		= 0x2,
 	NVME_DSM_ATTR_DEALLOCATE		= 0x4,
 };
 
 enum nvme_activate_action {
 	NVME_AA_REPLACE_NO_ACTIVATE		= 0x0,
 	NVME_AA_REPLACE_ACTIVATE		= 0x1,
 	NVME_AA_ACTIVATE			= 0x2,
 };
 
 struct nvme_power_state {
 	/** Maximum Power */
 	uint16_t	mp;			/* Maximum Power */
 	uint8_t		ps_rsvd1;
 	uint8_t		mps_nops;		/* Max Power Scale, Non-Operational State */
 
 	uint32_t	enlat;			/* Entry Latency */
 	uint32_t	exlat;			/* Exit Latency */
 
 	uint8_t		rrt;			/* Relative Read Throughput */
 	uint8_t		rrl;			/* Relative Read Latency */
 	uint8_t		rwt;			/* Relative Write Throughput */
 	uint8_t		rwl;			/* Relative Write Latency */
 
 	uint16_t	idlp;			/* Idle Power */
 	uint8_t		ips;			/* Idle Power Scale */
 	uint8_t		ps_rsvd8;
 
 	uint16_t	actp;			/* Active Power */
 	uint8_t		apw_aps;		/* Active Power Workload, Active Power Scale */
 	uint8_t		ps_rsvd10[9];
 } __packed;
 
 _Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state");
 
 #define NVME_SERIAL_NUMBER_LENGTH	20
 #define NVME_MODEL_NUMBER_LENGTH	40
 #define NVME_FIRMWARE_REVISION_LENGTH	8
 
 struct nvme_controller_data {
 
 	/* bytes 0-255: controller capabilities and features */
 
 	/** pci vendor id */
 	uint16_t		vid;
 
 	/** pci subsystem vendor id */
 	uint16_t		ssvid;
 
 	/** serial number */
 	uint8_t			sn[NVME_SERIAL_NUMBER_LENGTH];
 
 	/** model number */
 	uint8_t			mn[NVME_MODEL_NUMBER_LENGTH];
 
 	/** firmware revision */
 	uint8_t			fr[NVME_FIRMWARE_REVISION_LENGTH];
 
 	/** recommended arbitration burst */
 	uint8_t			rab;
 
 	/** ieee oui identifier */
 	uint8_t			ieee[3];
 
 	/** multi-interface capabilities */
 	uint8_t			mic;
 
 	/** maximum data transfer size */
 	uint8_t			mdts;
 
 	/** Controller ID */
 	uint16_t		ctrlr_id;
 
 	/** Version */
 	uint32_t		ver;
 
 	/** RTD3 Resume Latency */
 	uint32_t		rtd3r;
 
 	/** RTD3 Enter Latency */
 	uint32_t		rtd3e;
 
 	/** Optional Asynchronous Events Supported */
 	uint32_t		oaes;	/* bitfield really */
 
 	/** Controller Attributes */
 	uint32_t		ctratt;	/* bitfield really */
 
 	uint8_t			reserved1[12];
 
 	/** FRU Globally Unique Identifier */
 	uint8_t			fguid[16];
 
 	uint8_t			reserved2[128];
 
 	/* bytes 256-511: admin command set attributes */
 
 	/** optional admin command support */
 	uint16_t		oacs;
 
 	/** abort command limit */
 	uint8_t			acl;
 
 	/** asynchronous event request limit */
 	uint8_t			aerl;
 
 	/** firmware updates */
 	uint8_t			frmw;
 
 	/** log page attributes */
 	uint8_t			lpa;
 
 	/** error log page entries */
 	uint8_t			elpe;
 
 	/** number of power states supported */
 	uint8_t			npss;
 
 	/** admin vendor specific command configuration */
 	uint8_t			avscc;
 
 	/** Autonomous Power State Transition Attributes */
 	uint8_t			apsta;
 
 	/** Warning Composite Temperature Threshold */
 	uint16_t		wctemp;
 
 	/** Critical Composite Temperature Threshold */
 	uint16_t		cctemp;
 
 	/** Maximum Time for Firmware Activation */
 	uint16_t		mtfa;
 
 	/** Host Memory Buffer Preferred Size */
 	uint32_t		hmpre;
 
 	/** Host Memory Buffer Minimum Size */
 	uint32_t		hmmin;
 
 	/** Name space capabilities  */
 	struct {
 		/* if nsmgmt, report tnvmcap and unvmcap */
 		uint8_t    tnvmcap[16];
 		uint8_t    unvmcap[16];
 	} __packed untncap;
 
 	/** Replay Protected Memory Block Support */
 	uint32_t		rpmbs; /* Really a bitfield */
 
 	/** Extended Device Self-test Time */
 	uint16_t		edstt;
 
 	/** Device Self-test Options */
 	uint8_t			dsto; /* Really a bitfield */
 
 	/** Firmware Update Granularity */
 	uint8_t			fwug;
 
 	/** Keep Alive Support */
 	uint16_t		kas;
 
 	/** Host Controlled Thermal Management Attributes */
 	uint16_t		hctma; /* Really a bitfield */
 
 	/** Minimum Thermal Management Temperature */
 	uint16_t		mntmt;
 
 	/** Maximum Thermal Management Temperature */
 	uint16_t		mxtmt;
 
 	/** Sanitize Capabilities */
 	uint32_t		sanicap; /* Really a bitfield */
 
 	uint8_t			reserved3[180];
 	/* bytes 512-703: nvm command set attributes */
 
 	/** submission queue entry size */
 	uint8_t			sqes;
 
 	/** completion queue entry size */
 	uint8_t			cqes;
 
 	/** Maximum Outstanding Commands */
 	uint16_t		maxcmd;
 
 	/** number of namespaces */
 	uint32_t		nn;
 
 	/** optional nvm command support */
 	uint16_t		oncs;
 
 	/** fused operation support */
 	uint16_t		fuses;
 
 	/** format nvm attributes */
 	uint8_t			fna;
 
 	/** volatile write cache */
 	uint8_t			vwc;
 
 	/** Atomic Write Unit Normal */
 	uint16_t		awun;
 
 	/** Atomic Write Unit Power Fail */
 	uint16_t		awupf;
 
 	/** NVM Vendor Specific Command Configuration */
 	uint8_t			nvscc;
 	uint8_t			reserved5;
 
 	/** Atomic Compare & Write Unit */
 	uint16_t		acwu;
 	uint16_t		reserved6;
 
 	/** SGL Support */
 	uint32_t		sgls;
 
 	/* bytes 540-767: Reserved */
 	uint8_t			reserved7[228];
 
 	/** NVM Subsystem NVMe Qualified Name */
 	uint8_t			subnqn[256];
 
 	/* bytes 1024-1791: Reserved */
 	uint8_t			reserved8[768];
 
 	/* bytes 1792-2047: NVMe over Fabrics specification */
 	uint8_t			reserved9[256];
 
 	/* bytes 2048-3071: power state descriptors */
 	struct nvme_power_state power_state[32];
 
 	/* bytes 3072-4095: vendor specific */
 	uint8_t			vs[1024];
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data");
 
 struct nvme_namespace_data {
 
 	/** namespace size */
 	uint64_t		nsze;
 
 	/** namespace capacity */
 	uint64_t		ncap;
 
 	/** namespace utilization */
 	uint64_t		nuse;
 
 	/** namespace features */
 	uint8_t			nsfeat;
 
 	/** number of lba formats */
 	uint8_t			nlbaf;
 
 	/** formatted lba size */
 	uint8_t			flbas;
 
 	/** metadata capabilities */
 	uint8_t			mc;
 
 	/** end-to-end data protection capabilities */
 	uint8_t			dpc;
 
 	/** end-to-end data protection type settings */
 	uint8_t			dps;
 
 	/** Namespace Multi-path I/O and Namespace Sharing Capabilities */
 	uint8_t			nmic;
 
 	/** Reservation Capabilities */
 	uint8_t			rescap;
 
 	/** Format Progress Indicator */
 	uint8_t			fpi;
 
 	/** Deallocate Logical Block Features */
 	uint8_t			dlfeat;
 
 	/** Namespace Atomic Write Unit Normal  */
 	uint16_t		nawun;
 
 	/** Namespace Atomic Write Unit Power Fail */
 	uint16_t		nawupf;
 
 	/** Namespace Atomic Compare & Write Unit */
 	uint16_t		nacwu;
 
 	/** Namespace Atomic Boundary Size Normal */
 	uint16_t		nabsn;
 
 	/** Namespace Atomic Boundary Offset */
 	uint16_t		nabo;
 
 	/** Namespace Atomic Boundary Size Power Fail */
 	uint16_t		nabspf;
 
 	/** Namespace Optimal IO Boundary */
 	uint16_t		noiob;
 
 	/** NVM Capacity */
 	uint8_t			nvmcap[16];
 
 	/* bytes 64-103: Reserved */
 	uint8_t			reserved5[40];
 
 	/** Namespace Globally Unique Identifier */
 	uint8_t			nguid[16];
 
 	/** IEEE Extended Unique Identifier */
 	uint8_t			eui64[8];
 
 	/** lba format support */
 	uint32_t		lbaf[16];
 
 	uint8_t			reserved6[192];
 
 	uint8_t			vendor_specific[3712];
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data");
 
 enum nvme_log_page {
 
 	/* 0x00 - reserved */
 	NVME_LOG_ERROR			= 0x01,
 	NVME_LOG_HEALTH_INFORMATION	= 0x02,
 	NVME_LOG_FIRMWARE_SLOT		= 0x03,
 	NVME_LOG_CHANGED_NAMESPACE	= 0x04,
 	NVME_LOG_COMMAND_EFFECT		= 0x05,
 	/* 0x06-0x7F - reserved */
 	/* 0x80-0xBF - I/O command set specific */
 	NVME_LOG_RES_NOTIFICATION	= 0x80,
 	/* 0xC0-0xFF - vendor specific */
 
 	/*
 	 * The following are Intel Specific log pages, but they seem
 	 * to be widely implemented.
 	 */
 	INTEL_LOG_READ_LAT_LOG		= 0xc1,
 	INTEL_LOG_WRITE_LAT_LOG		= 0xc2,
 	INTEL_LOG_TEMP_STATS		= 0xc5,
 	INTEL_LOG_ADD_SMART		= 0xca,
 	INTEL_LOG_DRIVE_MKT_NAME	= 0xdd,
 
 	/*
 	 * HGST log page, with lots ofs sub pages.
 	 */
 	HGST_INFO_LOG			= 0xc1,
 };
 
 struct nvme_error_information_entry {
 
 	uint64_t		error_count;
 	uint16_t		sqid;
 	uint16_t		cid;
 	uint16_t		status;
 	uint16_t		error_location;
 	uint64_t		lba;
 	uint32_t		nsid;
 	uint8_t			vendor_specific;
 	uint8_t			reserved[35];
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry");
 
 struct nvme_health_information_page {
 
 	uint8_t			critical_warning;
 	uint16_t		temperature;
 	uint8_t			available_spare;
 	uint8_t			available_spare_threshold;
 	uint8_t			percentage_used;
 
 	uint8_t			reserved[26];
 
 	/*
 	 * Note that the following are 128-bit values, but are
 	 *  defined as an array of 2 64-bit values.
 	 */
 	/* Data Units Read is always in 512-byte units. */
 	uint64_t		data_units_read[2];
 	/* Data Units Written is always in 512-byte units. */
 	uint64_t		data_units_written[2];
 	/* For NVM command set, this includes Compare commands. */
 	uint64_t		host_read_commands[2];
 	uint64_t		host_write_commands[2];
 	/* Controller Busy Time is reported in minutes. */
 	uint64_t		controller_busy_time[2];
 	uint64_t		power_cycles[2];
 	uint64_t		power_on_hours[2];
 	uint64_t		unsafe_shutdowns[2];
 	uint64_t		media_errors[2];
 	uint64_t		num_error_info_log_entries[2];
 	uint32_t		warning_temp_time;
 	uint32_t		error_temp_time;
 	uint16_t		temp_sensor[8];
 
 	uint8_t			reserved2[296];
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page");
 
 struct nvme_firmware_page {
 
 	uint8_t			afi;
 	uint8_t			reserved[7];
 	uint64_t		revision[7]; /* revisions for 7 slots */
 	uint8_t			reserved2[448];
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page");
 
+struct nvme_ns_list {
+	uint32_t		ns[1024];
+} __packed __aligned(4);
+
+_Static_assert(sizeof(struct nvme_ns_list) == 4096, "bad size for nvme_ns_list");
+
 struct intel_log_temp_stats
 {
 	uint64_t	current;
 	uint64_t	overtemp_flag_last;
 	uint64_t	overtemp_flag_life;
 	uint64_t	max_temp;
 	uint64_t	min_temp;
 	uint64_t	_rsvd[5];
 	uint64_t	max_oper_temp;
 	uint64_t	min_oper_temp;
 	uint64_t	est_offset;
 } __packed __aligned(4);
 
 _Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats");
 
 #define NVME_TEST_MAX_THREADS	128
 
 struct nvme_io_test {
 
 	enum nvme_nvm_opcode	opc;
 	uint32_t		size;
 	uint32_t		time;	/* in seconds */
 	uint32_t		num_threads;
 	uint32_t		flags;
 	uint64_t		io_completed[NVME_TEST_MAX_THREADS];
 };
 
 enum nvme_io_test_flags {
 
 	/*
 	 * Specifies whether dev_refthread/dev_relthread should be
 	 *  called during NVME_BIO_TEST.  Ignored for other test
 	 *  types.
 	 */
 	NVME_TEST_FLAG_REFTHREAD =	0x1,
 };
 
 struct nvme_pt_command {
 
 	/*
 	 * cmd is used to specify a passthrough command to a controller or
 	 *  namespace.
 	 *
 	 * The following fields from cmd may be specified by the caller:
 	 *	* opc  (opcode)
 	 *	* nsid (namespace id) - for admin commands only
 	 *	* cdw10-cdw15
 	 *
 	 * Remaining fields must be set to 0 by the caller.
 	 */
 	struct nvme_command	cmd;
 
 	/*
 	 * cpl returns completion status for the passthrough command
 	 *  specified by cmd.
 	 *
 	 * The following fields will be filled out by the driver, for
 	 *  consumption by the caller:
 	 *	* cdw0
 	 *	* status (except for phase)
 	 *
 	 * Remaining fields will be set to 0 by the driver.
 	 */
 	struct nvme_completion	cpl;
 
 	/* buf is the data buffer associated with this passthrough command. */
 	void *			buf;
 
 	/*
 	 * len is the length of the data buffer associated with this
 	 *  passthrough command.
 	 */
 	uint32_t		len;
 
 	/*
 	 * is_read = 1 if the passthrough command will read data into the
 	 *  supplied buffer from the controller.
 	 *
 	 * is_read = 0 if the passthrough command will write data from the
 	 *  supplied buffer to the controller.
 	 */
 	uint32_t		is_read;
 
 	/*
 	 * driver_lock is used by the driver only.  It must be set to 0
 	 *  by the caller.
 	 */
 	struct mtx *		driver_lock;
 };
 
 #define nvme_completion_is_error(cpl)					\
 	(NVME_STATUS_GET_SC((cpl)->status) != 0 || NVME_STATUS_GET_SCT((cpl)->status) != 0)
 
 void	nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
 
 #ifdef _KERNEL
 
 struct bio;
 
 struct nvme_namespace;
 struct nvme_controller;
 struct nvme_consumer;
 
 typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
 
 typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
 typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
 typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
 				     uint32_t, void *, uint32_t);
 typedef void (*nvme_cons_fail_fn_t)(void *);
 
 enum nvme_namespace_flags {
 	NVME_NS_DEALLOCATE_SUPPORTED	= 0x1,
 	NVME_NS_FLUSH_SUPPORTED		= 0x2,
 };
 
 int	nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
 				   struct nvme_pt_command *pt,
 				   uint32_t nsid, int is_user_buffer,
 				   int is_admin_cmd);
 
 /* Admin functions */
 void	nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
 				   uint8_t feature, uint32_t cdw11,
 				   void *payload, uint32_t payload_size,
 				   nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
 				   uint8_t feature, uint32_t cdw11,
 				   void *payload, uint32_t payload_size,
 				   nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
 				    uint8_t log_page, uint32_t nsid,
 				    void *payload, uint32_t payload_size,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 
 /* NVM I/O functions */
 int	nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
 			  uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
 			  void *cb_arg);
 int	nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
 			      nvme_cb_fn_t cb_fn, void *cb_arg);
 int	nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
 			 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
 			 void *cb_arg);
 int	nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
 			      nvme_cb_fn_t cb_fn, void *cb_arg);
 int	nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
 			       uint8_t num_ranges, nvme_cb_fn_t cb_fn,
 			       void *cb_arg);
 int	nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
 			  void *cb_arg);
 int	nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset,
 		     size_t len);
 
 /* Registration functions */
 struct nvme_consumer *	nvme_register_consumer(nvme_cons_ns_fn_t    ns_fn,
 					       nvme_cons_ctrlr_fn_t ctrlr_fn,
 					       nvme_cons_async_fn_t async_fn,
 					       nvme_cons_fail_fn_t  fail_fn);
 void		nvme_unregister_consumer(struct nvme_consumer *consumer);
 
 /* Controller helper functions */
 device_t	nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
 const struct nvme_controller_data *
 		nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
 
 /* Namespace helper functions */
 uint32_t	nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
 uint32_t	nvme_ns_get_sector_size(struct nvme_namespace *ns);
 uint64_t	nvme_ns_get_num_sectors(struct nvme_namespace *ns);
 uint64_t	nvme_ns_get_size(struct nvme_namespace *ns);
 uint32_t	nvme_ns_get_flags(struct nvme_namespace *ns);
 const char *	nvme_ns_get_serial_number(struct nvme_namespace *ns);
 const char *	nvme_ns_get_model_number(struct nvme_namespace *ns);
 const struct nvme_namespace_data *
 		nvme_ns_get_data(struct nvme_namespace *ns);
 uint32_t	nvme_ns_get_stripesize(struct nvme_namespace *ns);
 
 int	nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
 			    nvme_cb_fn_t cb_fn);
 
 /*
  * Command building helper functions -- shared with CAM
  * These functions assume allocator zeros out cmd structure
  * CAM's xpt_get_ccb and the request allocator for nvme both
  * do zero'd allocations.
  */
 static inline
 void	nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid)
 {
 
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_FLUSH);
 	cmd->nsid = htole32(nsid);
 }
 
 static inline
 void	nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid,
     uint64_t lba, uint32_t count)
 {
 	cmd->opc_fuse = NVME_CMD_SET_OPC(rwcmd);
 	cmd->nsid = htole32(nsid);
 	cmd->cdw10 = htole32(lba & 0xffffffffu);
 	cmd->cdw11 = htole32(lba >> 32);
 	cmd->cdw12 = htole32(count-1);
 }
 
 static inline
 void	nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid,
     uint64_t lba, uint32_t count)
 {
 	nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count);
 }
 
 static inline
 void	nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid,
     uint64_t lba, uint32_t count)
 {
 	nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count);
 }
 
 static inline
 void	nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid,
     uint32_t num_ranges)
 {
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DATASET_MANAGEMENT);
 	cmd->nsid = htole32(nsid);
 	cmd->cdw10 = htole32(num_ranges - 1);
 	cmd->cdw11 = htole32(NVME_DSM_ATTR_DEALLOCATE);
 }
 
 extern int nvme_use_nvd;
 
 #endif /* _KERNEL */
 
 /* Endianess conversion functions for NVMe structs */
 static inline
 void	nvme_completion_swapbytes(struct nvme_completion *s)
 {
 
 	s->cdw0 = le32toh(s->cdw0);
 	/* omit rsvd1 */
 	s->sqhd = le16toh(s->sqhd);
 	s->sqid = le16toh(s->sqid);
 	/* omit cid */
 	s->status = le16toh(s->status);
 }
 
 static inline
 void	nvme_power_state_swapbytes(struct nvme_power_state *s)
 {
 
 	s->mp = le16toh(s->mp);
 	s->enlat = le32toh(s->enlat);
 	s->exlat = le32toh(s->exlat);
 	s->idlp = le16toh(s->idlp);
 	s->actp = le16toh(s->actp);
 }
 
 static inline
 void	nvme_controller_data_swapbytes(struct nvme_controller_data *s)
 {
 	int i;
 
 	s->vid = le16toh(s->vid);
 	s->ssvid = le16toh(s->ssvid);
 	s->ctrlr_id = le16toh(s->ctrlr_id);
 	s->ver = le32toh(s->ver);
 	s->rtd3r = le32toh(s->rtd3r);
 	s->rtd3e = le32toh(s->rtd3e);
 	s->oaes = le32toh(s->oaes);
 	s->ctratt = le32toh(s->ctratt);
 	s->oacs = le16toh(s->oacs);
 	s->wctemp = le16toh(s->wctemp);
 	s->cctemp = le16toh(s->cctemp);
 	s->mtfa = le16toh(s->mtfa);
 	s->hmpre = le32toh(s->hmpre);
 	s->hmmin = le32toh(s->hmmin);
 	s->rpmbs = le32toh(s->rpmbs);
 	s->edstt = le16toh(s->edstt);
 	s->kas = le16toh(s->kas);
 	s->hctma = le16toh(s->hctma);
 	s->mntmt = le16toh(s->mntmt);
 	s->mxtmt = le16toh(s->mxtmt);
 	s->sanicap = le32toh(s->sanicap);
 	s->maxcmd = le16toh(s->maxcmd);
 	s->nn = le32toh(s->nn);
 	s->oncs = le16toh(s->oncs);
 	s->fuses = le16toh(s->fuses);
 	s->awun = le16toh(s->awun);
 	s->awupf = le16toh(s->awupf);
 	s->acwu = le16toh(s->acwu);
 	s->sgls = le32toh(s->sgls);
 	for (i = 0; i < 32; i++)
 		nvme_power_state_swapbytes(&s->power_state[i]);
 }
 
 static inline
 void	nvme_namespace_data_swapbytes(struct nvme_namespace_data *s)
 {
 	int i;
 
 	s->nsze = le64toh(s->nsze);
 	s->ncap = le64toh(s->ncap);
 	s->nuse = le64toh(s->nuse);
 	s->nawun = le16toh(s->nawun);
 	s->nawupf = le16toh(s->nawupf);
 	s->nacwu = le16toh(s->nacwu);
 	s->nabsn = le16toh(s->nabsn);
 	s->nabo = le16toh(s->nabo);
 	s->nabspf = le16toh(s->nabspf);
 	s->noiob = le16toh(s->noiob);
 	for (i = 0; i < 16; i++)
 		s->lbaf[i] = le32toh(s->lbaf[i]);
 }
 
 static inline
 void	nvme_error_information_entry_swapbytes(struct nvme_error_information_entry *s)
 {
 
 	s->error_count = le64toh(s->error_count);
 	s->sqid = le16toh(s->sqid);
 	s->cid = le16toh(s->cid);
 	s->status = le16toh(s->status);
 	s->error_location = le16toh(s->error_location);
 	s->lba = le64toh(s->lba);
 	s->nsid = le32toh(s->nsid);
 }
 
 static inline
 void	nvme_le128toh(void *p)
 {
 #if _BYTE_ORDER != _LITTLE_ENDIAN
 	/* Swap 16 bytes in place */
 	char *tmp = (char*)p;
 	char b;
 	int i;
 	for (i = 0; i < 8; i++) {
 		b = tmp[i];
 		tmp[i] = tmp[15-i];
 		tmp[15-i] = b;
 	}
 #else
 	(void)p;
 #endif
 }
 
 static inline
 void	nvme_health_information_page_swapbytes(struct nvme_health_information_page *s)
 {
 	int i;
 
 	s->temperature = le16toh(s->temperature);
 	nvme_le128toh((void *)s->data_units_read);
 	nvme_le128toh((void *)s->data_units_written);
 	nvme_le128toh((void *)s->host_read_commands);
 	nvme_le128toh((void *)s->host_write_commands);
 	nvme_le128toh((void *)s->controller_busy_time);
 	nvme_le128toh((void *)s->power_cycles);
 	nvme_le128toh((void *)s->power_on_hours);
 	nvme_le128toh((void *)s->unsafe_shutdowns);
 	nvme_le128toh((void *)s->media_errors);
 	nvme_le128toh((void *)s->num_error_info_log_entries);
 	s->warning_temp_time = le32toh(s->warning_temp_time);
 	s->error_temp_time = le32toh(s->error_temp_time);
 	for (i = 0; i < 8; i++)
 		s->temp_sensor[i] = le16toh(s->temp_sensor[i]);
 }
 
 
 static inline
 void	nvme_firmware_page_swapbytes(struct nvme_firmware_page *s)
 {
 	int i;
 
 	for (i = 0; i < 7; i++)
 		s->revision[i] = le64toh(s->revision[i]);
+}
+
+static inline
+void	nvme_ns_list_swapbytes(struct nvme_ns_list *s)
+{
+	int i;
+
+	for (i = 0; i < 1024; i++)
+		s->ns[i] = le32toh(s->ns[i]);
 }
 
 static inline
 void	intel_log_temp_stats_swapbytes(struct intel_log_temp_stats *s)
 {
 
 	s->current = le64toh(s->current);
 	s->overtemp_flag_last = le64toh(s->overtemp_flag_last);
 	s->overtemp_flag_life = le64toh(s->overtemp_flag_life);
 	s->max_temp = le64toh(s->max_temp);
 	s->min_temp = le64toh(s->min_temp);
 	/* omit _rsvd[] */
 	s->max_oper_temp = le64toh(s->max_oper_temp);
 	s->min_oper_temp = le64toh(s->min_oper_temp);
 	s->est_offset = le64toh(s->est_offset);
 }
 
 #endif /* __NVME_H__ */
Index: head/sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	(revision 334199)
+++ head/sys/dev/nvme/nvme_ctrlr.c	(revision 334200)
@@ -1,1390 +1,1414 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2016 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cam.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 #include <sys/endian.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
 #define B4_CHK_RDY_DELAY_MS	2300		/* work around controller bug */
 
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 						struct nvme_async_event_request *aer);
 static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
 
 static int
 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->resource_id = PCIR_BAR(0);
 
 	ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
 	    &ctrlr->resource_id, RF_ACTIVE);
 
 	if(ctrlr->resource == NULL) {
 		nvme_printf(ctrlr, "unable to allocate pci resource\n");
 		return (ENOMEM);
 	}
 
 	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
 	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
 	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
 
 	/*
 	 * The NVMe spec allows for the MSI-X table to be placed behind
 	 *  BAR 4/5, separate from the control/doorbell registers.  Always
 	 *  try to map this bar, because it must be mapped prior to calling
 	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
 	 *  bus_alloc_resource() will just return NULL which is OK.
 	 */
 	ctrlr->bar4_resource_id = PCIR_BAR(4);
 	ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
 	    &ctrlr->bar4_resource_id, RF_ACTIVE);
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
 {
 	struct nvme_qpair	*qpair;
 	uint32_t		num_entries;
 	int			error;
 
 	qpair = &ctrlr->adminq;
 
 	num_entries = NVME_ADMIN_ENTRIES;
 	TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
 	/*
 	 * If admin_entries was overridden to an invalid value, revert it
 	 *  back to our default value.
 	 */
 	if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
 	    num_entries > NVME_MAX_ADMIN_ENTRIES) {
 		nvme_printf(ctrlr, "invalid hw.nvme.admin_entries=%d "
 		    "specified\n", num_entries);
 		num_entries = NVME_ADMIN_ENTRIES;
 	}
 
 	/*
 	 * The admin queue's max xfer size is treated differently than the
 	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
 	 */
 	error = nvme_qpair_construct(qpair, 
 				     0, /* qpair ID */
 				     0, /* vector */
 				     num_entries,
 				     NVME_ADMIN_TRACKERS,
 				     ctrlr);
 	return (error);
 }
 
 static int
 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_qpair	*qpair;
 	uint32_t		cap_lo;
 	uint16_t		mqes;
 	int			i, error, num_entries, num_trackers;
 
 	num_entries = NVME_IO_ENTRIES;
 	TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
 
 	/*
 	 * NVMe spec sets a hard limit of 64K max entries, but
 	 *  devices may specify a smaller limit, so we need to check
 	 *  the MQES field in the capabilities register.
 	 */
 	cap_lo = nvme_mmio_read_4(ctrlr, cap_lo);
 	mqes = (cap_lo >> NVME_CAP_LO_REG_MQES_SHIFT) & NVME_CAP_LO_REG_MQES_MASK;
 	num_entries = min(num_entries, mqes + 1);
 
 	num_trackers = NVME_IO_TRACKERS;
 	TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
 
 	num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
 	num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
 	/*
 	 * No need to have more trackers than entries in the submit queue.
 	 *  Note also that for a queue size of N, we can only have (N-1)
 	 *  commands outstanding, hence the "-1" here.
 	 */
 	num_trackers = min(num_trackers, (num_entries-1));
 
 	/*
 	 * Our best estimate for the maximum number of I/Os that we should
 	 * noramlly have in flight at one time. This should be viewed as a hint,
 	 * not a hard limit and will need to be revisitted when the upper layers
 	 * of the storage system grows multi-queue support.
 	 */
 	ctrlr->max_hw_pend_io = num_trackers * ctrlr->num_io_queues * 3 / 4;
 
 	/*
 	 * This was calculated previously when setting up interrupts, but
 	 *  a controller could theoretically support fewer I/O queues than
 	 *  MSI-X vectors.  So calculate again here just to be safe.
 	 */
 	ctrlr->num_cpus_per_ioq = howmany(mp_ncpus, ctrlr->num_io_queues);
 
 	ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
 	    M_NVME, M_ZERO | M_WAITOK);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		qpair = &ctrlr->ioq[i];
 
 		/*
 		 * Admin queue has ID=0. IO queues start at ID=1 -
 		 *  hence the 'i+1' here.
 		 *
 		 * For I/O queues, use the controller-wide max_xfer_size
 		 *  calculated in nvme_attach().
 		 */
 		error = nvme_qpair_construct(qpair,
 				     i+1, /* qpair ID */
 				     ctrlr->msix_enabled ? i+1 : 0, /* vector */
 				     num_entries,
 				     num_trackers,
 				     ctrlr);
 		if (error)
 			return (error);
 
 		/*
 		 * Do not bother binding interrupts if we only have one I/O
 		 *  interrupt thread for this controller.
 		 */
 		if (ctrlr->num_io_queues > 1)
 			bus_bind_intr(ctrlr->dev, qpair->res,
 			    i * ctrlr->num_cpus_per_ioq);
 	}
 
 	return (0);
 }
 
 static void
 nvme_ctrlr_fail(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	ctrlr->is_failed = TRUE;
 	nvme_qpair_fail(&ctrlr->adminq);
 	if (ctrlr->ioq != NULL) {
 		for (i = 0; i < ctrlr->num_io_queues; i++)
 			nvme_qpair_fail(&ctrlr->ioq[i]);
 	}
 	nvme_notify_fail_consumers(ctrlr);
 }
 
 void
 nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 
 	mtx_lock(&ctrlr->lock);
 	STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
 	mtx_unlock(&ctrlr->lock);
 	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
 }
 
 static void
 nvme_ctrlr_fail_req_task(void *arg, int pending)
 {
 	struct nvme_controller	*ctrlr = arg;
 	struct nvme_request	*req;
 
 	mtx_lock(&ctrlr->lock);
 	while ((req = STAILQ_FIRST(&ctrlr->fail_req)) != NULL) {
 		STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq);
 		mtx_unlock(&ctrlr->lock);
 		nvme_qpair_manual_complete_request(req->qpair, req,
 		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
 		mtx_lock(&ctrlr->lock);
 	}
 	mtx_unlock(&ctrlr->lock);
 }
 
 static int
 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val)
 {
 	int ms_waited;
 	uint32_t csts;
 
 	csts = nvme_mmio_read_4(ctrlr, csts);
 
 	ms_waited = 0;
 	while (((csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK) != desired_val) {
 		if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
 			nvme_printf(ctrlr, "controller ready did not become %d "
 			    "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms);
 			return (ENXIO);
 		}
 		DELAY(1000);
 		csts = nvme_mmio_read_4(ctrlr, csts);
 	}
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_disable(struct nvme_controller *ctrlr)
 {
 	uint32_t cc;
 	uint32_t csts;
 	uint8_t  en, rdy;
 	int err;
 
 	cc = nvme_mmio_read_4(ctrlr, cc);
 	csts = nvme_mmio_read_4(ctrlr, csts);
 
 	en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
 	rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
 
 	/*
 	 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1
 	 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when
 	 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY
 	 * isn't the desired value. Short circuit if we're already disabled.
 	 */
 	if (en == 1) {
 		if (rdy == 0) {
 			/* EN == 1, wait for  RDY == 1 or fail */
 			err = nvme_ctrlr_wait_for_ready(ctrlr, 1);
 			if (err != 0)
 				return (err);
 		}
 	} else {
 		/* EN == 0 already wait for RDY == 0 */
 		if (rdy == 0)
 			return (0);
 		else
 			return (nvme_ctrlr_wait_for_ready(ctrlr, 0));
 	}
 
 	cc &= ~NVME_CC_REG_EN_MASK;
 	nvme_mmio_write_4(ctrlr, cc, cc);
 	/*
 	 * Some drives have issues with accessing the mmio after we
 	 * disable, so delay for a bit after we write the bit to
 	 * cope with these issues.
 	 */
 	if (ctrlr->quirks & QUIRK_DELAY_B4_CHK_RDY)
 		pause("nvmeR", B4_CHK_RDY_DELAY_MS * hz / 1000);
 	return (nvme_ctrlr_wait_for_ready(ctrlr, 0));
 }
 
 static int
 nvme_ctrlr_enable(struct nvme_controller *ctrlr)
 {
 	uint32_t	cc;
 	uint32_t	csts;
 	uint32_t	aqa;
 	uint32_t	qsize;
 	uint8_t		en, rdy;
 	int		err;
 
 	cc = nvme_mmio_read_4(ctrlr, cc);
 	csts = nvme_mmio_read_4(ctrlr, csts);
 
 	en = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
 	rdy = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
 
 	/*
 	 * See note in nvme_ctrlr_disable. Short circuit if we're already enabled.
 	 */
 	if (en == 1) {
 		if (rdy == 1)
 			return (0);
 		else
 			return (nvme_ctrlr_wait_for_ready(ctrlr, 1));
 	} else {
 		/* EN == 0 already wait for RDY == 0 or fail */
 		err = nvme_ctrlr_wait_for_ready(ctrlr, 0);
 		if (err != 0)
 			return (err);
 	}
 
 	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
 	DELAY(5000);
 	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
 	DELAY(5000);
 
 	/* acqs and asqs are 0-based. */
 	qsize = ctrlr->adminq.num_entries - 1;
 
 	aqa = 0;
 	aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT;
 	aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT;
 	nvme_mmio_write_4(ctrlr, aqa, aqa);
 	DELAY(5000);
 
 	/* Initialization values for CC */
 	cc = 0;
 	cc |= 1 << NVME_CC_REG_EN_SHIFT;
 	cc |= 0 << NVME_CC_REG_CSS_SHIFT;
 	cc |= 0 << NVME_CC_REG_AMS_SHIFT;
 	cc |= 0 << NVME_CC_REG_SHN_SHIFT;
 	cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */
 	cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */
 
 	/* This evaluates to 0, which is according to spec. */
 	cc |= (PAGE_SIZE >> 13) << NVME_CC_REG_MPS_SHIFT;
 
 	nvme_mmio_write_4(ctrlr, cc, cc);
 
 	return (nvme_ctrlr_wait_for_ready(ctrlr, 1));
 }
 
 int
 nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
 {
 	int i, err;
 
 	nvme_admin_qpair_disable(&ctrlr->adminq);
 	/*
 	 * I/O queues are not allocated before the initial HW
 	 *  reset, so do not try to disable them.  Use is_initialized
 	 *  to determine if this is the initial HW reset.
 	 */
 	if (ctrlr->is_initialized) {
 		for (i = 0; i < ctrlr->num_io_queues; i++)
 			nvme_io_qpair_disable(&ctrlr->ioq[i]);
 	}
 
 	DELAY(100*1000);
 
 	err = nvme_ctrlr_disable(ctrlr);
 	if (err != 0)
 		return err;
 	return (nvme_ctrlr_enable(ctrlr));
 }
 
 void
 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
 {
 	int cmpset;
 
 	cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
 
 	if (cmpset == 0 || ctrlr->is_failed)
 		/*
 		 * Controller is already resetting or has failed.  Return
 		 *  immediately since there is no need to kick off another
 		 *  reset in these cases.
 		 */
 		return;
 
 	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
 }
 
 static int
 nvme_ctrlr_identify(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 
 	status.done = 0;
 	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
 	    nvme_completion_poll_cb, &status);
 	while (!atomic_load_acq_int(&status.done))
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_identify_controller failed!\n");
 		return (ENXIO);
 	}
 
 	/* Convert data to host endian */
 	nvme_controller_data_swapbytes(&ctrlr->cdata);
 
 	/*
 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
 	 *  controller supports.
 	 */
 	if (ctrlr->cdata.mdts > 0)
 		ctrlr->max_xfer_size = min(ctrlr->max_xfer_size,
 		    ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 	int					cq_allocated, sq_allocated;
 
 	status.done = 0;
 	nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
 	    nvme_completion_poll_cb, &status);
 	while (!atomic_load_acq_int(&status.done))
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_ctrlr_set_num_qpairs failed!\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Data in cdw0 is 0-based.
 	 * Lower 16-bits indicate number of submission queues allocated.
 	 * Upper 16-bits indicate number of completion queues allocated.
 	 */
 	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
 	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
 
 	/*
 	 * Controller may allocate more queues than we requested,
 	 *  so use the minimum of the number requested and what was
 	 *  actually allocated.
 	 */
 	ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated);
 	ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated);
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 	struct nvme_qpair			*qpair;
 	int					i;
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		qpair = &ctrlr->ioq[i];
 
 		status.done = 0;
 		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
 		    nvme_completion_poll_cb, &status);
 		while (!atomic_load_acq_int(&status.done))
 			pause("nvme", 1);
 		if (nvme_completion_is_error(&status.cpl)) {
 			nvme_printf(ctrlr, "nvme_create_io_cq failed!\n");
 			return (ENXIO);
 		}
 
 		status.done = 0;
 		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
 		    nvme_completion_poll_cb, &status);
 		while (!atomic_load_acq_int(&status.done))
 			pause("nvme", 1);
 		if (nvme_completion_is_error(&status.cpl)) {
 			nvme_printf(ctrlr, "nvme_create_io_sq failed!\n");
 			return (ENXIO);
 		}
 	}
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_destroy_qpair(struct nvme_controller *ctrlr, struct nvme_qpair *qpair)
 {
 	struct nvme_completion_poll_status	status;
 
 	status.done = 0;
 	nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair,
 	    nvme_completion_poll_cb, &status);
 	while (!atomic_load_acq_int(&status.done))
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_destroy_io_sq failed!\n");
 		return (ENXIO);
 	}
 
 	status.done = 0;
 	nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair,
 	    nvme_completion_poll_cb, &status);
 	while (!atomic_load_acq_int(&status.done))
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_destroy_io_cq failed!\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
 {
 	struct nvme_namespace	*ns;
 	uint32_t 		i;
 
 	for (i = 0; i < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); i++) {
 		ns = &ctrlr->ns[i];
 		nvme_ns_construct(ns, i+1, ctrlr);
 	}
 
 	return (0);
 }
 
 static boolean_t
 is_log_page_id_valid(uint8_t page_id)
 {
 
 	switch (page_id) {
 	case NVME_LOG_ERROR:
 	case NVME_LOG_HEALTH_INFORMATION:
 	case NVME_LOG_FIRMWARE_SLOT:
+	case NVME_LOG_CHANGED_NAMESPACE:
 		return (TRUE);
 	}
 
 	return (FALSE);
 }
 
 static uint32_t
 nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id)
 {
 	uint32_t	log_page_size;
 
 	switch (page_id) {
 	case NVME_LOG_ERROR:
 		log_page_size = min(
 		    sizeof(struct nvme_error_information_entry) *
 		    (ctrlr->cdata.elpe + 1), NVME_MAX_AER_LOG_SIZE);
 		break;
 	case NVME_LOG_HEALTH_INFORMATION:
 		log_page_size = sizeof(struct nvme_health_information_page);
 		break;
 	case NVME_LOG_FIRMWARE_SLOT:
 		log_page_size = sizeof(struct nvme_firmware_page);
 		break;
+	case NVME_LOG_CHANGED_NAMESPACE:
+		log_page_size = sizeof(struct nvme_ns_list);
+		break;
 	default:
 		log_page_size = 0;
 		break;
 	}
 
 	return (log_page_size);
 }
 
 static void
 nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
     uint8_t state)
 {
 
 	if (state & NVME_CRIT_WARN_ST_AVAILABLE_SPARE)
 		nvme_printf(ctrlr, "available spare space below threshold\n");
 
 	if (state & NVME_CRIT_WARN_ST_TEMPERATURE)
 		nvme_printf(ctrlr, "temperature above threshold\n");
 
 	if (state & NVME_CRIT_WARN_ST_DEVICE_RELIABILITY)
 		nvme_printf(ctrlr, "device reliability degraded\n");
 
 	if (state & NVME_CRIT_WARN_ST_READ_ONLY)
 		nvme_printf(ctrlr, "media placed in read only mode\n");
 
 	if (state & NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP)
 		nvme_printf(ctrlr, "volatile memory backup device failed\n");
 
 	if (state & NVME_CRIT_WARN_ST_RESERVED_MASK)
 		nvme_printf(ctrlr,
 		    "unknown critical warning(s): state = 0x%02x\n", state);
 }
 
 static void
 nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request		*aer = arg;
 	struct nvme_health_information_page	*health_info;
+	struct nvme_ns_list			*nsl;
 	struct nvme_error_information_entry	*err;
 	int i;
 
 	/*
 	 * If the log page fetch for some reason completed with an error,
 	 *  don't pass log page data to the consumers.  In practice, this case
 	 *  should never happen.
 	 */
 	if (nvme_completion_is_error(cpl))
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, NULL, 0);
 	else {
 		/* Convert data to host endian */
 		switch (aer->log_page_id) {
 		case NVME_LOG_ERROR:
 			err = (struct nvme_error_information_entry *)aer->log_page_buffer;
 			for (i = 0; i < (aer->ctrlr->cdata.elpe + 1); i++)
 				nvme_error_information_entry_swapbytes(err++);
 			break;
 		case NVME_LOG_HEALTH_INFORMATION:
 			nvme_health_information_page_swapbytes(
 			    (struct nvme_health_information_page *)aer->log_page_buffer);
 			break;
 		case NVME_LOG_FIRMWARE_SLOT:
 			nvme_firmware_page_swapbytes(
 			    (struct nvme_firmware_page *)aer->log_page_buffer);
 			break;
+		case NVME_LOG_CHANGED_NAMESPACE:
+			nvme_ns_list_swapbytes(
+			    (struct nvme_ns_list *)aer->log_page_buffer);
+			break;
 		case INTEL_LOG_TEMP_STATS:
 			intel_log_temp_stats_swapbytes(
 			    (struct intel_log_temp_stats *)aer->log_page_buffer);
 			break;
 		default:
 			break;
 		}
 
 		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
 			health_info = (struct nvme_health_information_page *)
 			    aer->log_page_buffer;
 			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
 			    health_info->critical_warning);
 			/*
 			 * Critical warnings reported through the
 			 *  SMART/health log page are persistent, so
 			 *  clear the associated bits in the async event
 			 *  config so that we do not receive repeated
 			 *  notifications for the same event.
 			 */
 			aer->ctrlr->async_event_config &=
 			    ~health_info->critical_warning;
 			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
 			    aer->ctrlr->async_event_config, NULL, NULL);
+		} else if (aer->log_page_id == NVME_LOG_CHANGED_NAMESPACE &&
+		    !nvme_use_nvd) {
+			nsl = (struct nvme_ns_list *)aer->log_page_buffer;
+			for (i = 0; i < nitems(nsl->ns) && nsl->ns[i] != 0; i++) {
+				if (nsl->ns[i] > NVME_MAX_NAMESPACES)
+					break;
+				nvme_notify_ns(aer->ctrlr, nsl->ns[i]);
+			}
 		}
 
 
 		/*
 		 * Pass the cpl data from the original async event completion,
 		 *  not the log page fetch.
 		 */
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
 	}
 
 	/*
 	 * Repost another asynchronous event request to replace the one
 	 *  that just completed.
 	 */
 	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
 }
 
 static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
 
 	if (nvme_completion_is_error(cpl)) {
 		/*
 		 *  Do not retry failed async event requests.  This avoids
 		 *  infinite loops where a new async event request is submitted
 		 *  to replace the one just failed, only to fail again and
 		 *  perpetuate the loop.
 		 */
 		return;
 	}
 
 	/* Associated log page is in bits 23:16 of completion entry dw0. */
 	aer->log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
 
-	nvme_printf(aer->ctrlr, "async event occurred (log page id=0x%x)\n",
+	nvme_printf(aer->ctrlr, "async event occurred (type 0x%x, info 0x%02x,"
+	    " page 0x%02x)\n", (cpl->cdw0 & 0x03), (cpl->cdw0 & 0xFF00) >> 8,
 	    aer->log_page_id);
 
 	if (is_log_page_id_valid(aer->log_page_id)) {
 		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
 		    aer->log_page_id);
 		memcpy(&aer->cpl, cpl, sizeof(*cpl));
 		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
 		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
 		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
 		    aer);
 		/* Wait to notify consumers until after log page is fetched. */
 	} else {
 		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
 		    NULL, 0);
 
 		/*
 		 * Repost another asynchronous event request to replace the one
 		 *  that just completed.
 		 */
 		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
 	}
 }
 
 static void
 nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
     struct nvme_async_event_request *aer)
 {
 	struct nvme_request *req;
 
 	aer->ctrlr = ctrlr;
 	req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer);
 	aer->req = req;
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
 	 *  nature never be timed out.
 	 */
 	req->timeout = FALSE;
 	req->cmd.opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ASYNC_EVENT_REQUEST);
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 static void
 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 	struct nvme_async_event_request		*aer;
 	uint32_t				i;
 
-	ctrlr->async_event_config = 0xFF;
-	ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_RESERVED_MASK;
+	ctrlr->async_event_config = NVME_CRIT_WARN_ST_AVAILABLE_SPARE |
+	    NVME_CRIT_WARN_ST_DEVICE_RELIABILITY |
+	    NVME_CRIT_WARN_ST_READ_ONLY |
+	    NVME_CRIT_WARN_ST_VOLATILE_MEMORY_BACKUP;
+	if (ctrlr->cdata.ver >= NVME_REV(1, 2))
+		ctrlr->async_event_config |= 0x300;
 
 	status.done = 0;
 	nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
 	    0, NULL, 0, nvme_completion_poll_cb, &status);
 	while (!atomic_load_acq_int(&status.done))
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl) ||
 	    (status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
 	    (status.cpl.cdw0 & 0xFFFF) == 0x0000) {
 		nvme_printf(ctrlr, "temperature threshold not supported\n");
-		ctrlr->async_event_config &= ~NVME_CRIT_WARN_ST_TEMPERATURE;
-	}
+	} else
+		ctrlr->async_event_config |= NVME_CRIT_WARN_ST_TEMPERATURE;
 
 	nvme_ctrlr_cmd_set_async_event_config(ctrlr,
 	    ctrlr->async_event_config, NULL, NULL);
 
 	/* aerl is a zero-based value, so we need to add 1 here. */
 	ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
 
 	for (i = 0; i < ctrlr->num_aers; i++) {
 		aer = &ctrlr->aer[i];
 		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
 	}
 }
 
 static void
 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->int_coal_time = 0;
 	TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
 	    &ctrlr->int_coal_time);
 
 	ctrlr->int_coal_threshold = 0;
 	TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
 	    &ctrlr->int_coal_threshold);
 
 	nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
 	    ctrlr->int_coal_threshold, NULL, NULL);
 }
 
 static void
 nvme_ctrlr_start(void *ctrlr_arg)
 {
 	struct nvme_controller *ctrlr = ctrlr_arg;
 	uint32_t old_num_io_queues;
 	int i;
 
 	/*
 	 * Only reset adminq here when we are restarting the
 	 *  controller after a reset.  During initialization,
 	 *  we have already submitted admin commands to get
 	 *  the number of I/O queues supported, so cannot reset
 	 *  the adminq again here.
 	 */
 	if (ctrlr->is_resetting) {
 		nvme_qpair_reset(&ctrlr->adminq);
 	}
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_qpair_reset(&ctrlr->ioq[i]);
 
 	nvme_admin_qpair_enable(&ctrlr->adminq);
 
 	if (nvme_ctrlr_identify(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
 	/*
 	 * The number of qpairs are determined during controller initialization,
 	 *  including using NVMe SET_FEATURES/NUMBER_OF_QUEUES to determine the
 	 *  HW limit.  We call SET_FEATURES again here so that it gets called
 	 *  after any reset for controllers that depend on the driver to
 	 *  explicit specify how many queues it will use.  This value should
 	 *  never change between resets, so panic if somehow that does happen.
 	 */
 	if (ctrlr->is_resetting) {
 		old_num_io_queues = ctrlr->num_io_queues;
 		if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
 			nvme_ctrlr_fail(ctrlr);
 			return;
 		}
 
 		if (old_num_io_queues != ctrlr->num_io_queues) {
 			panic("num_io_queues changed from %u to %u",
 			      old_num_io_queues, ctrlr->num_io_queues);
 		}
 	}
 
 	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
 	nvme_ctrlr_configure_aer(ctrlr);
 	nvme_ctrlr_configure_int_coalescing(ctrlr);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_io_qpair_enable(&ctrlr->ioq[i]);
 }
 
 void
 nvme_ctrlr_start_config_hook(void *arg)
 {
 	struct nvme_controller *ctrlr = arg;
 
 	nvme_qpair_reset(&ctrlr->adminq);
 	nvme_admin_qpair_enable(&ctrlr->adminq);
 
 	if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
 	    nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
 		nvme_ctrlr_start(ctrlr);
 	else
 		nvme_ctrlr_fail(ctrlr);
 
 	nvme_sysctl_initialize_ctrlr(ctrlr);
 	config_intrhook_disestablish(&ctrlr->config_hook);
 
 	ctrlr->is_initialized = 1;
 	nvme_notify_new_controller(ctrlr);
 }
 
 static void
 nvme_ctrlr_reset_task(void *arg, int pending)
 {
 	struct nvme_controller	*ctrlr = arg;
 	int			status;
 
 	nvme_printf(ctrlr, "resetting controller\n");
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	/*
 	 * Use pause instead of DELAY, so that we yield to any nvme interrupt
 	 *  handlers on this CPU that were blocked on a qpair lock. We want
 	 *  all nvme interrupts completed before proceeding with restarting the
 	 *  controller.
 	 *
 	 * XXX - any way to guarantee the interrupt handlers have quiesced?
 	 */
 	pause("nvmereset", hz / 10);
 	if (status == 0)
 		nvme_ctrlr_start(ctrlr);
 	else
 		nvme_ctrlr_fail(ctrlr);
 
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
 /*
  * Poll all the queues enabled on the device for completion.
  */
 void
 nvme_ctrlr_poll(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	nvme_qpair_process_completions(&ctrlr->adminq);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		if (ctrlr->ioq && ctrlr->ioq[i].cpl)
 			nvme_qpair_process_completions(&ctrlr->ioq[i]);
 }
 
 /*
  * Poll the single-vector intertrupt case: num_io_queues will be 1 and
  * there's only a single vector. While we're polling, we mask further
  * interrupts in the controller.
  */
 void
 nvme_ctrlr_intx_handler(void *arg)
 {
 	struct nvme_controller *ctrlr = arg;
 
 	nvme_mmio_write_4(ctrlr, intms, 1);
 	nvme_ctrlr_poll(ctrlr);
 	nvme_mmio_write_4(ctrlr, intmc, 1);
 }
 
 static int
 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->msix_enabled = 0;
 	ctrlr->num_io_queues = 1;
 	ctrlr->num_cpus_per_ioq = mp_ncpus;
 	ctrlr->rid = 0;
 	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
 	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
 
 	if (ctrlr->res == NULL) {
 		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
 		return (ENOMEM);
 	}
 
 	bus_setup_intr(ctrlr->dev, ctrlr->res,
 	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
 	    ctrlr, &ctrlr->tag);
 
 	if (ctrlr->tag == NULL) {
 		nvme_printf(ctrlr, "unable to setup intx handler\n");
 		return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static void
 nvme_pt_done(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_pt_command *pt = arg;
 	struct mtx *mtx = pt->driver_lock;
 	uint16_t status;
 
 	bzero(&pt->cpl, sizeof(pt->cpl));
 	pt->cpl.cdw0 = cpl->cdw0;
 
 	status = cpl->status;
 	status &= ~NVME_STATUS_P_MASK;
 	pt->cpl.status = status;
 
 	mtx_lock(mtx);
 	pt->driver_lock = NULL;
 	wakeup(pt);
 	mtx_unlock(mtx);
 }
 
 int
 nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
     struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer,
     int is_admin_cmd)
 {
 	struct nvme_request	*req;
 	struct mtx		*mtx;
 	struct buf		*buf = NULL;
 	int			ret = 0;
 	vm_offset_t		addr, end;
 
 	if (pt->len > 0) {
 		/*
 		 * vmapbuf calls vm_fault_quick_hold_pages which only maps full
 		 * pages. Ensure this request has fewer than MAXPHYS bytes when
 		 * extended to full pages.
 		 */
 		addr = (vm_offset_t)pt->buf;
 		end = round_page(addr + pt->len);
 		addr = trunc_page(addr);
 		if (end - addr > MAXPHYS)
 			return EIO;
 
 		if (pt->len > ctrlr->max_xfer_size) {
 			nvme_printf(ctrlr, "pt->len (%d) "
 			    "exceeds max_xfer_size (%d)\n", pt->len,
 			    ctrlr->max_xfer_size);
 			return EIO;
 		}
 		if (is_user_buffer) {
 			/*
 			 * Ensure the user buffer is wired for the duration of
 			 *  this passthrough command.
 			 */
 			PHOLD(curproc);
 			buf = getpbuf(NULL);
 			buf->b_data = pt->buf;
 			buf->b_bufsize = pt->len;
 			buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 			if (vmapbuf(buf, 1) < 0) {
 #else
 			if (vmapbuf(buf) < 0) {
 #endif
 				ret = EFAULT;
 				goto err;
 			}
 			req = nvme_allocate_request_vaddr(buf->b_data, pt->len, 
 			    nvme_pt_done, pt);
 		} else
 			req = nvme_allocate_request_vaddr(pt->buf, pt->len,
 			    nvme_pt_done, pt);
 	} else
 		req = nvme_allocate_request_null(nvme_pt_done, pt);
 
 	/* Assume userspace already converted to little-endian */
 	req->cmd.opc_fuse = pt->cmd.opc_fuse;
 	req->cmd.cdw10 = pt->cmd.cdw10;
 	req->cmd.cdw11 = pt->cmd.cdw11;
 	req->cmd.cdw12 = pt->cmd.cdw12;
 	req->cmd.cdw13 = pt->cmd.cdw13;
 	req->cmd.cdw14 = pt->cmd.cdw14;
 	req->cmd.cdw15 = pt->cmd.cdw15;
 
 	req->cmd.nsid = htole32(nsid);
 
 	mtx = mtx_pool_find(mtxpool_sleep, pt);
 	pt->driver_lock = mtx;
 
 	if (is_admin_cmd)
 		nvme_ctrlr_submit_admin_request(ctrlr, req);
 	else
 		nvme_ctrlr_submit_io_request(ctrlr, req);
 
 	mtx_lock(mtx);
 	while (pt->driver_lock != NULL)
 		mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0);
 	mtx_unlock(mtx);
 
 err:
 	if (buf != NULL) {
 		relpbuf(buf, NULL);
 		PRELE(curproc);
 	}
 
 	return (ret);
 }
 
 static int
 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
     struct thread *td)
 {
 	struct nvme_controller			*ctrlr;
 	struct nvme_pt_command			*pt;
 
 	ctrlr = cdev->si_drv1;
 
 	switch (cmd) {
 	case NVME_RESET_CONTROLLER:
 		nvme_ctrlr_reset(ctrlr);
 		break;
 	case NVME_PASSTHROUGH_CMD:
 		pt = (struct nvme_pt_command *)arg;
 		return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, le32toh(pt->cmd.nsid),
 		    1 /* is_user_buffer */, 1 /* is_admin_cmd */));
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 static struct cdevsw nvme_ctrlr_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	0,
 	.d_ioctl =	nvme_ctrlr_ioctl
 };
 
 static void
 nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
 {
 	device_t	dev;
 	int		per_cpu_io_queues;
 	int		min_cpus_per_ioq;
 	int		num_vectors_requested, num_vectors_allocated;
 	int		num_vectors_available;
 
 	dev = ctrlr->dev;
 	min_cpus_per_ioq = 1;
 	TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
 
 	if (min_cpus_per_ioq < 1) {
 		min_cpus_per_ioq = 1;
 	} else if (min_cpus_per_ioq > mp_ncpus) {
 		min_cpus_per_ioq = mp_ncpus;
 	}
 
 	per_cpu_io_queues = 1;
 	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
 
 	if (per_cpu_io_queues == 0) {
 		min_cpus_per_ioq = mp_ncpus;
 	}
 
 	ctrlr->force_intx = 0;
 	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
 
 	/*
 	 * FreeBSD currently cannot allocate more than about 190 vectors at
 	 *  boot, meaning that systems with high core count and many devices
 	 *  requesting per-CPU interrupt vectors will not get their full
 	 *  allotment.  So first, try to allocate as many as we may need to
 	 *  understand what is available, then immediately release them.
 	 *  Then figure out how many of those we will actually use, based on
 	 *  assigning an equal number of cores to each I/O queue.
 	 */
 
 	/* One vector for per core I/O queue, plus one vector for admin queue. */
 	num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
 	if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
 		num_vectors_available = 0;
 	}
 	pci_release_msi(dev);
 
 	if (ctrlr->force_intx || num_vectors_available < 2) {
 		nvme_ctrlr_configure_intx(ctrlr);
 		return;
 	}
 
 	/*
 	 * Do not use all vectors for I/O queues - one must be saved for the
 	 *  admin queue.
 	 */
 	ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
 	    howmany(mp_ncpus, num_vectors_available - 1));
 
 	ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
 	num_vectors_requested = ctrlr->num_io_queues + 1;
 	num_vectors_allocated = num_vectors_requested;
 
 	/*
 	 * Now just allocate the number of vectors we need.  This should
 	 *  succeed, since we previously called pci_alloc_msix()
 	 *  successfully returning at least this many vectors, but just to
 	 *  be safe, if something goes wrong just revert to INTx.
 	 */
 	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
 		nvme_ctrlr_configure_intx(ctrlr);
 		return;
 	}
 
 	if (num_vectors_allocated < num_vectors_requested) {
 		pci_release_msi(dev);
 		nvme_ctrlr_configure_intx(ctrlr);
 		return;
 	}
 
 	ctrlr->msix_enabled = 1;
 }
 
 int
 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 {
 	struct make_dev_args	md_args;
 	uint32_t	cap_lo;
 	uint32_t	cap_hi;
 	uint8_t		to;
 	uint8_t		dstrd;
 	uint8_t		mpsmin;
 	int		status, timeout_period;
 
 	ctrlr->dev = dev;
 
 	mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
 
 	status = nvme_ctrlr_allocate_bar(ctrlr);
 
 	if (status != 0)
 		return (status);
 
 	/*
 	 * Software emulators may set the doorbell stride to something
 	 *  other than zero, but this driver is not set up to handle that.
 	 */
 	cap_hi = nvme_mmio_read_4(ctrlr, cap_hi);
 	dstrd = (cap_hi >> NVME_CAP_HI_REG_DSTRD_SHIFT) & NVME_CAP_HI_REG_DSTRD_MASK;
 	if (dstrd != 0)
 		return (ENXIO);
 
 	mpsmin = (cap_hi >> NVME_CAP_HI_REG_MPSMIN_SHIFT) & NVME_CAP_HI_REG_MPSMIN_MASK;
 	ctrlr->min_page_size = 1 << (12 + mpsmin);
 
 	/* Get ready timeout value from controller, in units of 500ms. */
 	cap_lo = nvme_mmio_read_4(ctrlr, cap_lo);
 	to = (cap_lo >> NVME_CAP_LO_REG_TO_SHIFT) & NVME_CAP_LO_REG_TO_MASK;
 	ctrlr->ready_timeout_in_ms = to * 500;
 
 	timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD;
 	TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period);
 	timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD);
 	timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD);
 	ctrlr->timeout_period = timeout_period;
 
 	nvme_retry_count = NVME_DEFAULT_RETRY_COUNT;
 	TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count);
 
 	ctrlr->enable_aborts = 0;
 	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
 
 	nvme_ctrlr_setup_interrupts(ctrlr);
 
 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
 	if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0)
 		return (ENXIO);
 
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
 	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
 
 	ctrlr->is_resetting = 0;
 	ctrlr->is_initialized = 0;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
 	TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
 	STAILQ_INIT(&ctrlr->fail_req);
 	ctrlr->is_failed = FALSE;
 
 	make_dev_args_init(&md_args);
 	md_args.mda_devsw = &nvme_ctrlr_cdevsw;
 	md_args.mda_uid = UID_ROOT;
 	md_args.mda_gid = GID_WHEEL;
 	md_args.mda_mode = 0600;
 	md_args.mda_unit = device_get_unit(dev);
 	md_args.mda_si_drv1 = (void *)ctrlr;
 	status = make_dev_s(&md_args, &ctrlr->cdev, "nvme%d",
 	    device_get_unit(dev));
 	if (status != 0)
 		return (ENXIO);
 
 	return (0);
 }
 
 void
 nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 {
 	int				i;
 
 	if (ctrlr->resource == NULL)
 		goto nores;
+
+	nvme_notify_fail_consumers(ctrlr);
 
 	for (i = 0; i < NVME_MAX_NAMESPACES; i++)
 		nvme_ns_destruct(&ctrlr->ns[i]);
 
 	if (ctrlr->cdev)
 		destroy_dev(ctrlr->cdev);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		nvme_ctrlr_destroy_qpair(ctrlr, &ctrlr->ioq[i]);
 		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
 	}
 	free(ctrlr->ioq, M_NVME);
 
 	nvme_admin_qpair_destroy(&ctrlr->adminq);
 
 	/*
 	 *  Notify the controller of a shutdown, even though this is due to
 	 *   a driver unload, not a system shutdown (this path is not invoked
 	 *   during shutdown).  This ensures the controller receives a
 	 *   shutdown notification in case the system is shutdown before
 	 *   reloading the driver.
 	 */
 	nvme_ctrlr_shutdown(ctrlr);
 
 	nvme_ctrlr_disable(ctrlr);
 
 	if (ctrlr->taskqueue)
 		taskqueue_free(ctrlr->taskqueue);
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
 
 	if (ctrlr->res)
 		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
 		    rman_get_rid(ctrlr->res), ctrlr->res);
 
 	if (ctrlr->msix_enabled)
 		pci_release_msi(dev);
 
 	if (ctrlr->bar4_resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
 	}
 
 	bus_release_resource(dev, SYS_RES_MEMORY,
 	    ctrlr->resource_id, ctrlr->resource);
 
 nores:
 	mtx_destroy(&ctrlr->lock);
 }
 
 void
 nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
 {
 	uint32_t	cc;
 	uint32_t	csts;
 	int		ticks = 0;
 
 	cc = nvme_mmio_read_4(ctrlr, cc);
 	cc &= ~(NVME_CC_REG_SHN_MASK << NVME_CC_REG_SHN_SHIFT);
 	cc |= NVME_SHN_NORMAL << NVME_CC_REG_SHN_SHIFT;
 	nvme_mmio_write_4(ctrlr, cc, cc);
 
 	csts = nvme_mmio_read_4(ctrlr, csts);
 	while ((NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) {
 		pause("nvme shn", 1);
 		csts = nvme_mmio_read_4(ctrlr, csts);
 	}
 	if (NVME_CSTS_GET_SHST(csts) != NVME_SHST_COMPLETE)
 		nvme_printf(ctrlr, "did not complete shutdown within 5 seconds "
 		    "of notification\n");
 }
 
 void
 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 
 	nvme_qpair_submit_request(&ctrlr->adminq, req);
 }
 
 void
 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 	struct nvme_qpair       *qpair;
 
 	qpair = &ctrlr->ioq[curcpu / ctrlr->num_cpus_per_ioq];
 	nvme_qpair_submit_request(qpair, req);
 }
 
 device_t
 nvme_ctrlr_get_device(struct nvme_controller *ctrlr)
 {
 
 	return (ctrlr->dev);
 }
 
 const struct nvme_controller_data *
 nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
 {
 
 	return (&ctrlr->cdata);
 }
Index: head/sys/dev/nvme/nvme_ctrlr_cmd.c
===================================================================
--- head/sys/dev/nvme/nvme_ctrlr_cmd.c	(revision 334199)
+++ head/sys/dev/nvme/nvme_ctrlr_cmd.c	(revision 334200)
@@ -1,327 +1,327 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2013 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "nvme_private.h"
 
 void
 nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload,
 	nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_vaddr(payload,
 	    sizeof(struct nvme_controller_data), cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY);
 
 	/*
 	 * TODO: create an identify command data structure, which
 	 *  includes this CNS bit in cdw10.
 	 */
 	cmd->cdw10 = htole32(1);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint32_t nsid,
 	void *payload, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_vaddr(payload,
 	    sizeof(struct nvme_namespace_data), cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_IDENTIFY);
 
 	/*
 	 * TODO: create an identify command data structure
 	 */
 	cmd->nsid = htole32(nsid);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
     struct nvme_qpair *io_que, uint16_t vector, nvme_cb_fn_t cb_fn,
     void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_CQ);
 
 	/*
 	 * TODO: create a create io completion queue command data
 	 *  structure.
 	 */
 	cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id);
 	/* 0x3 = interrupts enabled | physically contiguous */
 	cmd->cdw11 = htole32((vector << 16) | 0x3);
 	cmd->prp1 = htole64(io_que->cpl_bus_addr);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
     struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_CREATE_IO_SQ);
 
 	/*
 	 * TODO: create a create io submission queue command data
 	 *  structure.
 	 */
 	cmd->cdw10 = htole32(((io_que->num_entries-1) << 16) | io_que->id);
 	/* 0x1 = physically contiguous */
 	cmd->cdw11 = htole32((io_que->id << 16) | 0x1);
 	cmd->prp1 = htole64(io_que->cmd_bus_addr);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr,
     struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_CQ);
 
 	/*
 	 * TODO: create a delete io completion queue command data
 	 *  structure.
 	 */
 	cmd->cdw10 = htole32(io_que->id);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
     struct nvme_qpair *io_que, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_DELETE_IO_SQ);
 
 	/*
 	 * TODO: create a delete io submission queue command data
 	 *  structure.
 	 */
 	cmd->cdw10 = htole32(io_que->id);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
     uint32_t cdw11, void *payload, uint32_t payload_size,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_SET_FEATURES);
 	cmd->cdw10 = htole32(feature);
 	cmd->cdw11 = htole32(cdw11);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature,
     uint32_t cdw11, void *payload, uint32_t payload_size,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_FEATURES);
 	cmd->cdw10 = htole32(feature);
 	cmd->cdw11 = htole32(cdw11);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
     uint32_t num_queues, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	uint32_t cdw11;
 
 	cdw11 = ((num_queues - 1) << 16) | (num_queues - 1);
 	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_NUMBER_OF_QUEUES, cdw11,
 	    NULL, 0, cb_fn, cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
-    uint8_t state, nvme_cb_fn_t cb_fn, void *cb_arg)
+    uint32_t state, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	uint32_t cdw11;
 
 	cdw11 = state;
 	nvme_ctrlr_cmd_set_feature(ctrlr,
 	    NVME_FEAT_ASYNC_EVENT_CONFIGURATION, cdw11, NULL, 0, cb_fn,
 	    cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
     uint32_t microseconds, uint32_t threshold, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	uint32_t cdw11;
 
 	if ((microseconds/100) >= 0x100) {
 		nvme_printf(ctrlr, "invalid coal time %d, disabling\n",
 		    microseconds);
 		microseconds = 0;
 		threshold = 0;
 	}
 
 	if (threshold >= 0x100) {
 		nvme_printf(ctrlr, "invalid threshold %d, disabling\n",
 		    threshold);
 		threshold = 0;
 		microseconds = 0;
 	}
 
 	cdw11 = ((microseconds/100) << 8) | threshold;
 	nvme_ctrlr_cmd_set_feature(ctrlr, NVME_FEAT_INTERRUPT_COALESCING, cdw11,
 	    NULL, 0, cb_fn, cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page,
     uint32_t nsid, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn,
     void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_vaddr(payload, payload_size, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_GET_LOG_PAGE);
 	cmd->nsid = htole32(nsid);
 	cmd->cdw10 = ((payload_size/sizeof(uint32_t)) - 1) << 16;
 	cmd->cdw10 |= log_page;
 	cmd->cdw10 = htole32(cmd->cdw10);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 void
 nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
     struct nvme_error_information_entry *payload, uint32_t num_entries,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 
 	KASSERT(num_entries > 0, ("%s called with num_entries==0\n", __func__));
 
 	/* Controller's error log page entries is 0-based. */
 	KASSERT(num_entries <= (ctrlr->cdata.elpe + 1),
 	    ("%s called with num_entries=%d but (elpe+1)=%d\n", __func__,
 	    num_entries, ctrlr->cdata.elpe + 1));
 
 	if (num_entries > (ctrlr->cdata.elpe + 1))
 		num_entries = ctrlr->cdata.elpe + 1;
 
 	nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_ERROR,
 	    NVME_GLOBAL_NAMESPACE_TAG, payload, sizeof(*payload) * num_entries,
 	    cb_fn, cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
     uint32_t nsid, struct nvme_health_information_page *payload,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 
 	nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_HEALTH_INFORMATION,
 	    nsid, payload, sizeof(*payload), cb_fn, cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
     struct nvme_firmware_page *payload, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 
 	nvme_ctrlr_cmd_get_log_page(ctrlr, NVME_LOG_FIRMWARE_SLOT, 
 	    NVME_GLOBAL_NAMESPACE_TAG, payload, sizeof(*payload), cb_fn,
 	    cb_arg);
 }
 
 void
 nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_null(cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc_fuse = NVME_CMD_SET_OPC(NVME_OPC_ABORT);
 	cmd->cdw10 = htole32((cid << 16) | sqid);
 
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
Index: head/sys/dev/nvme/nvme_private.h
===================================================================
--- head/sys/dev/nvme/nvme_private.h	(revision 334199)
+++ head/sys/dev/nvme/nvme_private.h	(revision 334200)
@@ -1,551 +1,552 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __NVME_PRIVATE_H__
 #define __NVME_PRIVATE_H__
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <vm/uma.h>
 
 #include <machine/bus.h>
 
 #include "nvme.h"
 
 #define DEVICE2SOFTC(dev) ((struct nvme_controller *) device_get_softc(dev))
 
 MALLOC_DECLARE(M_NVME);
 
 #define IDT32_PCI_ID		0x80d0111d /* 32 channel board */
 #define IDT8_PCI_ID		0x80d2111d /* 8 channel board */
 
 /*
  * For commands requiring more than 2 PRP entries, one PRP will be
  *  embedded in the command (prp1), and the rest of the PRP entries
  *  will be in a list pointed to by the command (prp2).  This means
  *  that real max number of PRP entries we support is 32+1, which
  *  results in a max xfer size of 32*PAGE_SIZE.
  */
 #define NVME_MAX_PRP_LIST_ENTRIES	(NVME_MAX_XFER_SIZE / PAGE_SIZE)
 
 #define NVME_ADMIN_TRACKERS	(16)
 #define NVME_ADMIN_ENTRIES	(128)
 /* min and max are defined in admin queue attributes section of spec */
 #define NVME_MIN_ADMIN_ENTRIES	(2)
 #define NVME_MAX_ADMIN_ENTRIES	(4096)
 
 /*
  * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion
  *  queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we
  *  will allow outstanding on an I/O qpair at any time.  The only advantage in
  *  having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping
  *  the contents of the submission and completion queues, it will show a longer
  *  history of data.
  */
 #define NVME_IO_ENTRIES		(256)
 #define NVME_IO_TRACKERS	(128)
 #define NVME_MIN_IO_TRACKERS	(4)
 #define NVME_MAX_IO_TRACKERS	(1024)
 
 /*
  * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES
  *  for each controller.
  */
 
 #define NVME_INT_COAL_TIME	(0)	/* disabled */
 #define NVME_INT_COAL_THRESHOLD (0)	/* 0-based */
 
 #define NVME_MAX_NAMESPACES	(16)
 #define NVME_MAX_CONSUMERS	(2)
 #define NVME_MAX_ASYNC_EVENTS	(8)
 
 #define NVME_DEFAULT_TIMEOUT_PERIOD	(30)    /* in seconds */
 #define NVME_MIN_TIMEOUT_PERIOD		(5)
 #define NVME_MAX_TIMEOUT_PERIOD		(120)
 
 #define NVME_DEFAULT_RETRY_COUNT	(4)
 
 /* Maximum log page size to fetch for AERs. */
 #define NVME_MAX_AER_LOG_SIZE		(4096)
 
 /*
  * Define CACHE_LINE_SIZE here for older FreeBSD versions that do not define
  *  it.
  */
 #ifndef CACHE_LINE_SIZE
 #define CACHE_LINE_SIZE		(64)
 #endif
 
 /*
  * Use presence of the BIO_UNMAPPED flag to determine whether unmapped I/O
  *  support and the bus_dmamap_load_bio API are available on the target
  *  kernel.  This will ease porting back to earlier stable branches at a
  *  later point.
  */
 #ifdef BIO_UNMAPPED
 #define NVME_UNMAPPED_BIO_SUPPORT
 #endif
 
 extern uma_zone_t	nvme_request_zone;
 extern int32_t		nvme_retry_count;
 
 struct nvme_completion_poll_status {
 
 	struct nvme_completion	cpl;
 	int			done;
 };
 
 #define NVME_REQUEST_VADDR	1
 #define NVME_REQUEST_NULL	2 /* For requests with no payload. */
 #define NVME_REQUEST_UIO	3
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 #define NVME_REQUEST_BIO	4
 #endif
 #define NVME_REQUEST_CCB        5
 
 struct nvme_request {
 
 	struct nvme_command		cmd;
 	struct nvme_qpair		*qpair;
 	union {
 		void			*payload;
 		struct bio		*bio;
 	} u;
 	uint32_t			type;
 	uint32_t			payload_size;
 	boolean_t			timeout;
 	nvme_cb_fn_t			cb_fn;
 	void				*cb_arg;
 	int32_t				retries;
 	STAILQ_ENTRY(nvme_request)	stailq;
 };
 
 struct nvme_async_event_request {
 
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
 	uint8_t				log_page_buffer[NVME_MAX_AER_LOG_SIZE];
 };
 
 struct nvme_tracker {
 
 	TAILQ_ENTRY(nvme_tracker)	tailq;
 	struct nvme_request		*req;
 	struct nvme_qpair		*qpair;
 	struct callout			timer;
 	bus_dmamap_t			payload_dma_map;
 	uint16_t			cid;
 
 	uint64_t			*prp;
 	bus_addr_t			prp_bus_addr;
 };
 
 struct nvme_qpair {
 
 	struct nvme_controller	*ctrlr;
 	uint32_t		id;
 	uint32_t		phase;
 
 	uint16_t		vector;
 	int			rid;
 	struct resource		*res;
 	void 			*tag;
 
 	uint32_t		num_entries;
 	uint32_t		num_trackers;
 	uint32_t		sq_tdbl_off;
 	uint32_t		cq_hdbl_off;
 
 	uint32_t		sq_head;
 	uint32_t		sq_tail;
 	uint32_t		cq_head;
 
 	int64_t			num_cmds;
 	int64_t			num_intr_handler_calls;
 
 	struct nvme_command	*cmd;
 	struct nvme_completion	*cpl;
 
 	bus_dma_tag_t		dma_tag;
 	bus_dma_tag_t		dma_tag_payload;
 
 	bus_dmamap_t		queuemem_map;
 	uint64_t		cmd_bus_addr;
 	uint64_t		cpl_bus_addr;
 
 	TAILQ_HEAD(, nvme_tracker)	free_tr;
 	TAILQ_HEAD(, nvme_tracker)	outstanding_tr;
 	STAILQ_HEAD(, nvme_request)	queued_req;
 
 	struct nvme_tracker	**act_tr;
 
 	boolean_t		is_enabled;
 
 	struct mtx		lock __aligned(CACHE_LINE_SIZE);
 
 } __aligned(CACHE_LINE_SIZE);
 
 struct nvme_namespace {
 
 	struct nvme_controller		*ctrlr;
 	struct nvme_namespace_data	data;
 	uint32_t			id;
 	uint32_t			flags;
 	struct cdev			*cdev;
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
 	uint32_t			stripesize;
 	struct mtx			lock;
 };
 
 /*
  * One of these per allocated PCI device.
  */
 struct nvme_controller {
 
 	device_t		dev;
 
 	struct mtx		lock;
 
 	uint32_t		ready_timeout_in_ms;
 	uint32_t		quirks;
 #define QUIRK_DELAY_B4_CHK_RDY 1		/* Can't touch MMIO on disable */
 
 	bus_space_tag_t		bus_tag;
 	bus_space_handle_t	bus_handle;
 	int			resource_id;
 	struct resource		*resource;
 
 	/*
 	 * The NVMe spec allows for the MSI-X table to be placed in BAR 4/5,
 	 *  separate from the control registers which are in BAR 0/1.  These
 	 *  members track the mapping of BAR 4/5 for that reason.
 	 */
 	int			bar4_resource_id;
 	struct resource		*bar4_resource;
 
 	uint32_t		msix_enabled;
 	uint32_t		force_intx;
 	uint32_t		enable_aborts;
 
 	uint32_t		num_io_queues;
 	uint32_t		num_cpus_per_ioq;
 	uint32_t		max_hw_pend_io;
 
 	/* Fields for tracking progress during controller initialization. */
 	struct intr_config_hook	config_hook;
 	uint32_t		ns_identified;
 	uint32_t		queues_created;
 
 	struct task		reset_task;
 	struct task		fail_req_task;
 	struct taskqueue	*taskqueue;
 
 	/* For shared legacy interrupt. */
 	int			rid;
 	struct resource		*res;
 	void			*tag;
 
 	bus_dma_tag_t		hw_desc_tag;
 	bus_dmamap_t		hw_desc_map;
 
 	/** maximum i/o size in bytes */
 	uint32_t		max_xfer_size;
 
 	/** minimum page size supported by this controller in bytes */
 	uint32_t		min_page_size;
 
 	/** interrupt coalescing time period (in microseconds) */
 	uint32_t		int_coal_time;
 
 	/** interrupt coalescing threshold */
 	uint32_t		int_coal_threshold;
 
 	/** timeout period in seconds */
 	uint32_t		timeout_period;
 
 	struct nvme_qpair	adminq;
 	struct nvme_qpair	*ioq;
 
 	struct nvme_registers		*regs;
 
 	struct nvme_controller_data	cdata;
 	struct nvme_namespace		ns[NVME_MAX_NAMESPACES];
 
 	struct cdev			*cdev;
 
-	/** bit mask of critical warning types currently enabled for async events */
-	uint8_t				async_event_config;
+	/** bit mask of event types currently enabled for async events */
+	uint32_t			async_event_config;
 
 	uint32_t			num_aers;
 	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
 
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
 
 	uint32_t			is_resetting;
 	uint32_t			is_initialized;
 	uint32_t			notification_sent;
 
 	boolean_t			is_failed;
 	STAILQ_HEAD(, nvme_request)	fail_req;
 };
 
 #define nvme_mmio_offsetof(reg)						       \
 	offsetof(struct nvme_registers, reg)
 
 #define nvme_mmio_read_4(sc, reg)					       \
 	bus_space_read_4((sc)->bus_tag, (sc)->bus_handle,		       \
 	    nvme_mmio_offsetof(reg))
 
 #define nvme_mmio_write_4(sc, reg, val)					       \
 	bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,		       \
 	    nvme_mmio_offsetof(reg), val)
 
 #define nvme_mmio_write_8(sc, reg, val)					       \
 	do {								       \
 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
 		    nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); 	       \
 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
 		    nvme_mmio_offsetof(reg)+4,				       \
 		    (val & 0xFFFFFFFF00000000ULL) >> 32);		       \
 	} while (0);
 
 #if __FreeBSD_version < 800054
 #define wmb()	__asm volatile("sfence" ::: "memory")
 #define mb()	__asm volatile("mfence" ::: "memory")
 #endif
 
 #define nvme_printf(ctrlr, fmt, args...)	\
     device_printf(ctrlr->dev, fmt, ##args)
 
 void	nvme_ns_test(struct nvme_namespace *ns, u_long cmd, caddr_t arg);
 
 void	nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr,
 					   void *payload,
 					   nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr,
 					  uint32_t nsid, void *payload,
 					  nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
 						uint32_t microseconds,
 						uint32_t threshold,
 						nvme_cb_fn_t cb_fn,
 						void *cb_arg);
 void	nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
 				      struct nvme_error_information_entry *payload,
 				      uint32_t num_entries, /* 0 = max */
 				      nvme_cb_fn_t cb_fn,
 				      void *cb_arg);
 void	nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
 						   uint32_t nsid,
 						   struct nvme_health_information_page *payload,
 						   nvme_cb_fn_t cb_fn,
 						   void *cb_arg);
 void	nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
 					 struct nvme_firmware_page *payload,
 					 nvme_cb_fn_t cb_fn,
 					 void *cb_arg);
 void	nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que, uint16_t vector,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
 				      uint32_t num_queues, nvme_cb_fn_t cb_fn,
 				      void *cb_arg);
 void	nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
-					      uint8_t state,
+					      uint32_t state,
 					      nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
 			     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg);
 
 void	nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl);
 
 int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
 void	nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev);
 void	nvme_ctrlr_shutdown(struct nvme_controller *ctrlr);
 int	nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr);
 void	nvme_ctrlr_reset(struct nvme_controller *ctrlr);
 /* ctrlr defined as void * to allow use with config_intrhook. */
 void	nvme_ctrlr_start_config_hook(void *ctrlr_arg);
 void	nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
 					struct nvme_request *req);
 void	nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
 				     struct nvme_request *req);
 void	nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
 				       struct nvme_request *req);
 
 int	nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
 			     uint16_t vector, uint32_t num_entries,
 			     uint32_t num_trackers,
 			     struct nvme_controller *ctrlr);
 void	nvme_qpair_submit_tracker(struct nvme_qpair *qpair,
 				  struct nvme_tracker *tr);
 bool	nvme_qpair_process_completions(struct nvme_qpair *qpair);
 void	nvme_qpair_submit_request(struct nvme_qpair *qpair,
 				  struct nvme_request *req);
 void	nvme_qpair_reset(struct nvme_qpair *qpair);
 void	nvme_qpair_fail(struct nvme_qpair *qpair);
 void	nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
 					   struct nvme_request *req,
 					   uint32_t sct, uint32_t sc,
 					   boolean_t print_on_error);
 
 void	nvme_admin_qpair_enable(struct nvme_qpair *qpair);
 void	nvme_admin_qpair_disable(struct nvme_qpair *qpair);
 void	nvme_admin_qpair_destroy(struct nvme_qpair *qpair);
 
 void	nvme_io_qpair_enable(struct nvme_qpair *qpair);
 void	nvme_io_qpair_disable(struct nvme_qpair *qpair);
 void	nvme_io_qpair_destroy(struct nvme_qpair *qpair);
 
 int	nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
 			  struct nvme_controller *ctrlr);
 void	nvme_ns_destruct(struct nvme_namespace *ns);
 
 void	nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr);
 
 void	nvme_dump_command(struct nvme_command *cmd);
 void	nvme_dump_completion(struct nvme_completion *cpl);
 
 static __inline void
 nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 {
 	uint64_t *bus_addr = (uint64_t *)arg;
 
 	if (error != 0)
 		printf("nvme_single_map err %d\n", error);
 	*bus_addr = seg[0].ds_addr;
 }
 
 static __inline struct nvme_request *
 _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
 	if (req != NULL) {
 		req->cb_fn = cb_fn;
 		req->cb_arg = cb_arg;
 		req->timeout = TRUE;
 	}
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_vaddr(void *payload, uint32_t payload_size,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL) {
 		req->type = NVME_REQUEST_VADDR;
 		req->u.payload = payload;
 		req->payload_size = payload_size;
 	}
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL)
 		req->type = NVME_REQUEST_NULL;
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL) {
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 		req->type = NVME_REQUEST_BIO;
 		req->u.bio = bio;
 #else
 		req->type = NVME_REQUEST_VADDR;
 		req->u.payload = bio->bio_data;
 		req->payload_size = bio->bio_bcount;
 #endif
 	}
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_ccb(union ccb *ccb, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL) {
 		req->type = NVME_REQUEST_CCB;
 		req->u.payload = ccb;
 	}
 
 	return (req);
 }
 
 #define nvme_free_request(req)	uma_zfree(nvme_request_zone, req)
 
 void	nvme_notify_async_consumers(struct nvme_controller *ctrlr,
 				    const struct nvme_completion *async_cpl,
 				    uint32_t log_page_id, void *log_page_buffer,
 				    uint32_t log_page_size);
 void	nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
 void	nvme_notify_new_controller(struct nvme_controller *ctrlr);
+void	nvme_notify_ns(struct nvme_controller *ctrlr, int nsid);
 
 void	nvme_ctrlr_intx_handler(void *arg);
 void	nvme_ctrlr_poll(struct nvme_controller *ctrlr);
 
 #endif /* __NVME_PRIVATE_H__ */
Index: head/sys/dev/nvme/nvme_sim.c
===================================================================
--- head/sys/dev/nvme/nvme_sim.c	(revision 334199)
+++ head/sys/dev/nvme/nvme_sim.c	(revision 334200)
@@ -1,432 +1,389 @@
 /*-
  * Copyright (c) 2016 Netflix, Inc
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
-#include <cam/cam_xpt_internal.h>	// Yes, this is wrong.
 #include <cam/cam_debug.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "nvme_private.h"
 
 #define ccb_accb_ptr spriv_ptr0
 #define ccb_ctrlr_ptr spriv_ptr1
 static void	nvme_sim_action(struct cam_sim *sim, union ccb *ccb);
 static void	nvme_sim_poll(struct cam_sim *sim);
 
 #define sim2softc(sim)	((struct nvme_sim_softc *)cam_sim_softc(sim))
-#define sim2ns(sim)	(sim2softc(sim)->s_ns)
 #define sim2ctrlr(sim)	(sim2softc(sim)->s_ctrlr)
 
 struct nvme_sim_softc
 {
 	struct nvme_controller	*s_ctrlr;
-	struct nvme_namespace	*s_ns;
 	struct cam_sim		*s_sim;
 	struct cam_path		*s_path;
 };
 
 static void
 nvme_sim_nvmeio_done(void *ccb_arg, const struct nvme_completion *cpl)
 {
 	union ccb *ccb = (union ccb *)ccb_arg;
 
 	/*
 	 * Let the periph know the completion, and let it sort out what
 	 * it means. Make our best guess, though for the status code.
 	 */
 	memcpy(&ccb->nvmeio.cpl, cpl, sizeof(*cpl));
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	if (nvme_completion_is_error(cpl)) {
 		ccb->ccb_h.status = CAM_REQ_CMP_ERR;
 		xpt_done(ccb);
 	} else {
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done_direct(ccb);
 	}
 }
 
 static void
 nvme_sim_nvmeio(struct cam_sim *sim, union ccb *ccb)
 {
 	struct ccb_nvmeio	*nvmeio = &ccb->nvmeio;
 	struct nvme_request	*req;
 	void			*payload;
 	uint32_t		size;
 	struct nvme_controller *ctrlr;
 
 	ctrlr = sim2ctrlr(sim);
 	payload = nvmeio->data_ptr;
 	size = nvmeio->dxfer_len;
 	/* SG LIST ??? */
 	if ((nvmeio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
 		req = nvme_allocate_request_bio((struct bio *)payload,
 		    nvme_sim_nvmeio_done, ccb);
 	else if ((nvmeio->ccb_h.flags & CAM_DATA_SG) == CAM_DATA_SG)
 		req = nvme_allocate_request_ccb(ccb, nvme_sim_nvmeio_done, ccb);
 	else if (payload == NULL)
 		req = nvme_allocate_request_null(nvme_sim_nvmeio_done, ccb);
 	else
 		req = nvme_allocate_request_vaddr(payload, size,
 		    nvme_sim_nvmeio_done, ccb);
 
 	if (req == NULL) {
 		nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
 		xpt_done(ccb);
 		return;
 	}
 	ccb->ccb_h.status |= CAM_SIM_QUEUED;
 
 	memcpy(&req->cmd, &ccb->nvmeio.cmd, sizeof(ccb->nvmeio.cmd));
 
 	if (ccb->ccb_h.func_code == XPT_NVME_IO)
 		nvme_ctrlr_submit_io_request(ctrlr, req);
 	else
 		nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 static uint32_t
 nvme_link_kBps(struct nvme_controller *ctrlr)
 {
 	uint32_t speed, lanes, link[] = { 1, 250000, 500000, 985000, 1970000 };
 	uint32_t status;
 
 	status = pcie_read_config(ctrlr->dev, PCIER_LINK_STA, 2);
 	speed = status & PCIEM_LINK_STA_SPEED;
 	lanes = (status & PCIEM_LINK_STA_WIDTH) >> 4;
 	/*
 	 * Failsafe on link speed indicator. If it is insane report the number of
 	 * lanes as the speed. Not 100% accurate, but may be diagnostic.
 	 */
 	if (speed >= nitems(link))
 		speed = 0;
 	return link[speed] * lanes;
 }
 
 static void
 nvme_sim_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct nvme_controller *ctrlr;
-	struct nvme_namespace *ns;
 
 	CAM_DEBUG(ccb->ccb_h.path, CAM_DEBUG_TRACE,
 	    ("nvme_sim_action: func= %#x\n",
 		ccb->ccb_h.func_code));
 
-	/*
-	 * XXX when we support multiple namespaces in the base driver we'll need
-	 * to revisit how all this gets stored and saved in the periph driver's
-	 * reserved areas. Right now we store all three in the softc of the sim.
-	 */
-	ns = sim2ns(sim);
 	ctrlr = sim2ctrlr(sim);
 
 	mtx_assert(&ctrlr->lock, MA_OWNED);
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_CALC_GEOMETRY:		/* Calculate Geometry Totally nuts ? XXX */
 		/* 
 		 * Only meaningful for old-school SCSI disks since only the SCSI
 		 * da driver generates them. Reject all these that slip through.
 		 */
 		/*FALLTHROUGH*/
 	case XPT_ABORT:			/* Abort the specified CCB */
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	case XPT_SET_TRAN_SETTINGS:
 		/*
 		 * NVMe doesn't really have different transfer settings, but
 		 * other parts of CAM think failure here is a big deal.
 		 */
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_PATH_INQ:		/* Path routing inquiry */
 	{
 		struct ccb_pathinq	*cpi = &ccb->cpi;
 		device_t		dev = ctrlr->dev;
 
 		/*
 		 * NVMe may have multiple LUNs on the same path. Current generation
 		 * of NVMe devives support only a single name space. Multiple name
 		 * space drives are coming, but it's unclear how we should report
 		 * them up the stack.
 		 */
 		cpi->version_num = 1;
 		cpi->hba_inquiry = 0;
 		cpi->target_sprt = 0;
-		cpi->hba_misc =  PIM_UNMAPPED /* | PIM_NOSCAN */;
+		cpi->hba_misc =  PIM_UNMAPPED | PIM_NOSCAN;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = 0;
 		cpi->max_lun = ctrlr->cdata.nn;
-		cpi->maxio = nvme_ns_get_max_io_xfer_size(ns);
+		cpi->maxio = ctrlr->max_xfer_size;
 		cpi->initiator_id = 0;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = nvme_link_kBps(ctrlr);
 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strlcpy(cpi->hba_vid, "NVMe", HBA_IDLEN);
 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->transport = XPORT_NVME;		/* XXX XPORT_PCIE ? */
 		cpi->transport_version = nvme_mmio_read_4(ctrlr, vs);
 		cpi->protocol = PROTO_NVME;
 		cpi->protocol_version = nvme_mmio_read_4(ctrlr, vs);
-		cpi->xport_specific.nvme.nsid = ns->id;
+		cpi->xport_specific.nvme.nsid = xpt_path_lun_id(ccb->ccb_h.path);
 		cpi->xport_specific.nvme.domain = pci_get_domain(dev);
 		cpi->xport_specific.nvme.bus = pci_get_bus(dev);
 		cpi->xport_specific.nvme.slot = pci_get_slot(dev);
 		cpi->xport_specific.nvme.function = pci_get_function(dev);
 		cpi->xport_specific.nvme.extra = 0;
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_GET_TRAN_SETTINGS:	/* Get transport settings */
 	{
 		struct ccb_trans_settings	*cts;
 		struct ccb_trans_settings_nvme	*nvmep;
 		struct ccb_trans_settings_nvme	*nvmex;
 		device_t dev;
 		uint32_t status, caps;
 
 		dev = ctrlr->dev;
 		cts = &ccb->cts;
 		nvmex = &cts->xport_specific.nvme;
 		nvmep = &cts->proto_specific.nvme;
 
 		status = pcie_read_config(dev, PCIER_LINK_STA, 2);
 		caps = pcie_read_config(dev, PCIER_LINK_CAP, 2);
 		nvmex->valid = CTS_NVME_VALID_SPEC | CTS_NVME_VALID_LINK;
 		nvmex->spec = nvme_mmio_read_4(ctrlr, vs);
 		nvmex->speed = status & PCIEM_LINK_STA_SPEED;
 		nvmex->lanes = (status & PCIEM_LINK_STA_WIDTH) >> 4;
 		nvmex->max_speed = caps & PCIEM_LINK_CAP_MAX_SPEED;
 		nvmex->max_lanes = (caps & PCIEM_LINK_CAP_MAX_WIDTH) >> 4;
 
 		/* XXX these should be something else maybe ? */
 		nvmep->valid = 1;
 		nvmep->spec = nvmex->spec;
 
 		cts->transport = XPORT_NVME;
 		cts->protocol = PROTO_NVME;
 		cts->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_TERM_IO:		/* Terminate the I/O process */
 		/*
 		 * every driver handles this, but nothing generates it. Assume
 		 * it's OK to just say 'that worked'.
 		 */
 		/*FALLTHROUGH*/
 	case XPT_RESET_DEV:		/* Bus Device Reset the specified device */
 	case XPT_RESET_BUS:		/* Reset the specified bus */
 		/*
 		 * NVMe doesn't really support physically resetting the bus. It's part
 		 * of the bus scanning dance, so return sucess to tell the process to
 		 * proceed.
 		 */
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 	case XPT_NVME_IO:		/* Execute the requested I/O operation */
 	case XPT_NVME_ADMIN:		/* or Admin operation */
 		nvme_sim_nvmeio(sim, ccb);
 		return;			/* no done */
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		break;
 	}
 	xpt_done(ccb);
 }
 
 static void
 nvme_sim_poll(struct cam_sim *sim)
 {
 
 	nvme_ctrlr_poll(sim2ctrlr(sim));
 }
 
 static void *
 nvme_sim_new_controller(struct nvme_controller *ctrlr)
 {
+	struct nvme_sim_softc *sc;
 	struct cam_devq *devq;
 	int max_trans;
-	int unit;
-	struct nvme_sim_softc *sc = NULL;
 
 	max_trans = ctrlr->max_hw_pend_io;
-	unit = device_get_unit(ctrlr->dev);
 	devq = cam_simq_alloc(max_trans);
 	if (devq == NULL)
-		return NULL;
+		return (NULL);
 
 	sc = malloc(sizeof(*sc), M_NVME, M_ZERO | M_WAITOK);
-
 	sc->s_ctrlr = ctrlr;
 
 	sc->s_sim = cam_sim_alloc(nvme_sim_action, nvme_sim_poll,
-	    "nvme", sc, unit, &ctrlr->lock, max_trans, max_trans, devq);
+	    "nvme", sc, device_get_unit(ctrlr->dev),
+	    &ctrlr->lock, max_trans, max_trans, devq);
 	if (sc->s_sim == NULL) {
 		printf("Failed to allocate a sim\n");
 		cam_simq_free(devq);
-		free(sc, M_NVME);
-		return NULL;
+		goto err1;
 	}
+	if (xpt_bus_register(sc->s_sim, ctrlr->dev, 0) != CAM_SUCCESS) {
+		printf("Failed to create a bus\n");
+		goto err2;
+	}
+	if (xpt_create_path(&sc->s_path, /*periph*/NULL, cam_sim_path(sc->s_sim),
+	    CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
+		printf("Failed to create a path\n");
+		goto err3;
+	}
 
-	return sc;
+	return (sc);
+
+err3:
+	xpt_bus_deregister(cam_sim_path(sc->s_sim));
+err2:
+	cam_sim_free(sc->s_sim, /*free_devq*/TRUE);
+err1:
+	free(sc, M_NVME);
+	return (NULL);
 }
 
-static void
-nvme_sim_rescan_target(struct nvme_controller *ctrlr, struct cam_path *path)
+static void *
+nvme_sim_new_ns(struct nvme_namespace *ns, void *sc_arg)
 {
+	struct nvme_sim_softc *sc = sc_arg;
+	struct nvme_controller *ctrlr = sc->s_ctrlr;
 	union ccb *ccb;
 
+	mtx_lock(&ctrlr->lock);
+
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		printf("unable to alloc CCB for rescan\n");
-		return;
+		return (NULL);
 	}
 
-	if (xpt_clone_path(&ccb->ccb_h.path, path) != CAM_REQ_CMP) {
-		printf("unable to copy path for rescan\n");
+	if (xpt_create_path(&ccb->ccb_h.path, /*periph*/NULL,
+	    cam_sim_path(sc->s_sim), 0, ns->id) != CAM_REQ_CMP) {
+		printf("unable to create path for rescan\n");
 		xpt_free_ccb(ccb);
-		return;
+		return (NULL);
 	}
 
 	xpt_rescan(ccb);
-}
-	
-static void *
-nvme_sim_new_ns(struct nvme_namespace *ns, void *sc_arg)
-{
-	struct nvme_sim_softc *sc = sc_arg;
-	struct nvme_controller *ctrlr = sc->s_ctrlr;
-	int i;
 
-	sc->s_ns = ns;
-
-	/*
-	 * XXX this is creating one bus per ns, but it should be one
-	 * XXX target per controller, and one LUN per namespace.
-	 * XXX Current drives only support one NS, so there's time
-	 * XXX to fix it later when new drives arrive.
-	 *
-	 * XXX I'm pretty sure the xpt_bus_register() call below is
-	 * XXX like super lame and it really belongs in the sim_new_ctrlr
-	 * XXX callback. Then the create_path below would be pretty close
-	 * XXX to being right. Except we should be per-ns not per-ctrlr
-	 * XXX data.
-	 */
-
-	mtx_lock(&ctrlr->lock);
-/* Create bus */
-
-	/*
-	 * XXX do I need to lock ctrlr->lock ? 
-	 * XXX do I need to lock the path?
-	 * ata and scsi seem to in their code, but their discovery is
-	 * somewhat more asynchronous. We're only every called one at a
-	 * time, and nothing is in parallel.
-	 */
-
-	i = 0;
-	if (xpt_bus_register(sc->s_sim, ctrlr->dev, 0) != CAM_SUCCESS)
-		goto error;
-	i++;
-	if (xpt_create_path(&sc->s_path, /*periph*/NULL, cam_sim_path(sc->s_sim),
-	    1, ns->id) != CAM_REQ_CMP)
-		goto error;
-	i++;
-
-	sc->s_path->device->nvme_data = nvme_ns_get_data(ns);
-	sc->s_path->device->nvme_cdata = nvme_ctrlr_get_data(ns->ctrlr);
-
-/* Scan bus */
-	nvme_sim_rescan_target(ctrlr, sc->s_path);
-
 	mtx_unlock(&ctrlr->lock);
 
-	return ns;
-
-error:
-	switch (i) {
-	case 2:
-		xpt_free_path(sc->s_path);
-	case 1:
-		xpt_bus_deregister(cam_sim_path(sc->s_sim));
-	case 0:
-		cam_sim_free(sc->s_sim, /*free_devq*/TRUE);
-	}
-	mtx_unlock(&ctrlr->lock);
-	return NULL;
+	return (ns);
 }
 
 static void
 nvme_sim_controller_fail(void *ctrlr_arg)
 {
-	/* XXX cleanup XXX */
+	struct nvme_sim_softc *sc = ctrlr_arg;
+	struct nvme_controller *ctrlr = sc->s_ctrlr;
+
+	mtx_lock(&ctrlr->lock);
+	xpt_async(AC_LOST_DEVICE, sc->s_path, NULL);
+	xpt_free_path(sc->s_path);
+	xpt_bus_deregister(cam_sim_path(sc->s_sim));
+	cam_sim_free(sc->s_sim, /*free_devq*/TRUE);
+	mtx_unlock(&ctrlr->lock);
+	free(sc, M_NVME);
 }
 
 struct nvme_consumer *consumer_cookie;
 
 static void
 nvme_sim_init(void)
 {
 	if (nvme_use_nvd)
 		return;
 
 	consumer_cookie = nvme_register_consumer(nvme_sim_new_ns,
 	    nvme_sim_new_controller, NULL, nvme_sim_controller_fail);
 }
 
 SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY,
     nvme_sim_init, NULL);
 
 static void
 nvme_sim_uninit(void)
 {
 	if (nvme_use_nvd)
 		return;
 	/* XXX Cleanup */
 
 	nvme_unregister_consumer(consumer_cookie);
 }
 
 SYSUNINIT(nvme_sim_unregister, SI_SUB_DRIVERS, SI_ORDER_ANY,
     nvme_sim_uninit, NULL);