Index: head/sys/arm64/arm64/busdma_bounce.c
===================================================================
--- head/sys/arm64/arm64/busdma_bounce.c	(revision 347835)
+++ head/sys/arm64/arm64/busdma_bounce.c	(revision 347836)
@@ -1,1330 +1,1358 @@
 /*-
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * Copyright (c) 2015-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Andrew Turner
  * under sponsorship of the FreeBSD Foundation.
  *
  * Portions of this software were developed by Semihalf
  * under sponsorship of the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <arm64/include/bus_dma_impl.h>
 
 #define MAX_BPAGES 4096
 
 enum {
 	BF_COULD_BOUNCE		= 0x01,
 	BF_MIN_ALLOC_COMP	= 0x02,
 	BF_KMEM_ALLOC		= 0x04,
 	BF_COHERENT		= 0x10,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_page_t	datapage;	/* physical page of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct sync_list {
 	vm_offset_t	vaddr;		/* kva of client data */
 	bus_addr_t	paddr;		/* physical address */
 	vm_page_t	pages;		/* starting page of client data */
 	bus_size_t	datacount;	/* client data count */
 };
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 	u_int			flags;
 #define	DMAMAP_COULD_BOUNCE	(1 << 0)
 #define	DMAMAP_FROM_DMAMEM	(1 << 1)
 	int			sync_count;
 	struct sync_list	slist[];
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
+static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf,
+    bus_size_t buflen, int *pagesneeded);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     pmap_t pmap, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if ((flags & BUS_DMA_COHERENT) != 0)
 		newtag->bounce_flags |= BF_COHERENT;
 
 	if (parent != NULL) {
 		if ((newtag->common.filter != NULL ||
 		    (parent->bounce_flags & BF_COULD_BOUNCE) != 0))
 			newtag->bounce_flags |= BF_COULD_BOUNCE;
 
 		/* Copy some flags from the parent */
 		newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT;
 	}
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BF_COULD_BOUNCE;
 
 	if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_DEVBUF);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->bounce_flags |= BF_MIN_ALLOC_COMP;
 	} else
 		error = 0;
 
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
+static bool
+bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+
+	if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0)
+		return (true);
+	return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL));
+}
+
 static bus_dmamap_t
 alloc_dmamap(bus_dma_tag_t dmat, int flags)
 {
 	u_long mapsize;
 	bus_dmamap_t map;
 
 	mapsize = sizeof(*map);
 	mapsize += sizeof(struct sync_list) * dmat->common.nsegments;
 	map = malloc(mapsize, M_DEVBUF, flags | M_ZERO);
 	if (map == NULL)
 		return (NULL);
 
 	/* Initialize the new map */
 	STAILQ_INIT(&map->bpages);
 
 	return (map);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	*mapp = alloc_dmamap(dmat, M_NOWAIT);
 	if (*mapp == NULL) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 		    __func__, dmat, ENOMEM);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->bounce_flags & BF_COULD_BOUNCE) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0) {
 				free(*mapp, M_DEVBUF);
 				return (error);
 			}
 		}
 		bz = dmat->bounce_zone;
 
 		(*mapp)->flags = DMAMAP_COULD_BOUNCE;
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BF_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	else
 		free(*mapp, M_DEVBUF);
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	/* Check we are destroying the correct map type */
 	if ((map->flags & DMAMAP_FROM_DMAMEM) != 0)
 		panic("bounce_bus_dmamap_destroy: Invalid map freed\n");
 
 	if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) {
 		CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY);
 		return (EBUSY);
 	}
 	if (dmat->bounce_zone) {
 		KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
 		    ("%s: Bounce zone when cannot bounce", __func__));
 		dmat->bounce_zone->map_count--;
 	}
 	free(map, M_DEVBUF);
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	/*
 	 * XXX ARM64TODO:
 	 * This bus_dma implementation requires IO-Coherent architecutre.
 	 * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has
 	 * to be implented using non-cacheable memory.
 	 */
 
 	vm_memattr_t attr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else if ((flags & BUS_DMA_COHERENT) != 0 &&
 	    (dmat->bounce_flags & BF_COHERENT) == 0)
 		/*
 		 * If we have a non-coherent tag, and are trying to allocate
 		 * a coherent block of memory it needs to be uncached.
 		 */
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/*
 	 * Create the map, but don't set the could bounce flag as
 	 * this allocation should never bounce;
 	 */
 	*mapp = alloc_dmamap(dmat, mflags);
 	if (*mapp == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	}
 	(*mapp)->flags = DMAMAP_FROM_DMAMEM;
 
 	/*
 	 * Allocate the buffer from the malloc(9) allocator if...
 	 *  - It's small enough to fit into a single power of two sized bucket.
 	 *  - The alignment is less than or equal to the maximum size
 	 *  - The low address requirement is fulfilled.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 *
 	 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
 	 * below is just a quick hack. The exact alignment guarantees
 	 * of malloc(9) need to be nailed down, and the code below
 	 * should be rewritten to take that into account.
 	 *
 	 * In the meantime warn the user if malloc gets it wrong.
 	 */
 	if ((dmat->common.maxsize <= PAGE_SIZE) &&
 	   (dmat->common.alignment <= dmat->common.maxsize) &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags);
 	} else if (dmat->common.nsegments >=
 	    howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary % PAGE_SIZE) == 0) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags,
 		    0ul, dmat->common.lowaddr, attr);
 		dmat->bounce_flags |= BF_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags,
 		    0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ?
 		    dmat->common.alignment : 1ul, dmat->common.boundary, attr);
 		dmat->bounce_flags |= BF_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		free(*mapp, M_DEVBUF);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 
 	/*
 	 * Check the map came from bounce_bus_dmamem_alloc, so the map
 	 * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if ((map->flags & DMAMAP_FROM_DMAMEM) == 0)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0)
 		free(vaddr, M_DEVBUF);
 	else
 		kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
 	free(map, M_DEVBUF);
 	dmat->map_count--;
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
+static bool
+_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen,
+    int *pagesneeded)
+{
+	bus_addr_t curaddr;
+	bus_size_t sgsize;
+	int count;
+
+	/*
+	 * Count the number of bounce pages needed in order to
+	 * complete this transfer
+	 */
+	count = 0;
+	curaddr = buf;
+	while (buflen != 0) {
+		sgsize = MIN(buflen, dmat->common.maxsegsz);
+		if (bus_dma_run_filter(&dmat->common, curaddr)) {
+			sgsize = MIN(sgsize,
+			    PAGE_SIZE - (curaddr & PAGE_MASK));
+			if (pagesneeded == NULL)
+				return (true);
+			count++;
+		}
+		curaddr += sgsize;
+		buflen -= sgsize;
+	}
+
+	if (pagesneeded != NULL)
+		*pagesneeded = count;
+	return (count != 0);
+}
+
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
-	bus_addr_t curaddr;
-	bus_size_t sgsize;
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
-		/*
-		 * Count the number of bounce pages
-		 * needed in order to complete this transfer
-		 */
-		curaddr = buf;
-		while (buflen != 0) {
-			sgsize = MIN(buflen, dmat->common.maxsegsz);
-			if (bus_dma_run_filter(&dmat->common, curaddr)) {
-				sgsize = MIN(sgsize,
-				    PAGE_SIZE - (curaddr & PAGE_MASK));
-				map->pagesneeded++;
-			}
-			curaddr += sgsize;
-			buflen -= sgsize;
-		}
+		_bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded);
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 	bus_size_t sg_len;
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map,
 		    map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct sync_list *sl;
 	bus_size_t sgsize;
 	bus_addr_t curaddr, sl_end;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	sl_end = 0;
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		} else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
 			if (map->sync_count > 0)
 				sl_end = sl->paddr + sl->datacount;
 
 			if (map->sync_count == 0 || curaddr != sl_end) {
 				if (++map->sync_count > dmat->common.nsegments)
 					break;
 				sl++;
 				sl->vaddr = 0;
 				sl->paddr = curaddr;
 				sl->datacount = sgsize;
 				sl->pages = PHYS_TO_VM_PAGE(curaddr);
 				KASSERT(sl->pages != NULL,
 				    ("%s: page at PA:0x%08lx is not in "
 				    "vm_page_array", __func__, curaddr));
 			} else
 				sl->datacount += sgsize;
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct sync_list *sl;
 	bus_size_t sgsize, max_sgsize;
 	bus_addr_t curaddr, sl_pend;
 	vm_offset_t kvaddr, vaddr, sl_vend;
 	int error;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	sl = map->slist + map->sync_count - 1;
 	vaddr = (vm_offset_t)buf;
 	sl_pend = 0;
 	sl_vend = 0;
 
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr,
 			    sgsize);
 		} else if ((dmat->bounce_flags & BF_COHERENT) == 0) {
 			sgsize = MIN(sgsize, max_sgsize);
 			if (map->sync_count > 0) {
 				sl_pend = sl->paddr + sl->datacount;
 				sl_vend = sl->vaddr + sl->datacount;
 			}
 
 			if (map->sync_count == 0 ||
 			    (kvaddr != 0 && kvaddr != sl_vend) ||
 			    (curaddr != sl_pend)) {
 
 				if (++map->sync_count > dmat->common.nsegments)
 					goto cleanup;
 				sl++;
 				sl->vaddr = kvaddr;
 				sl->paddr = curaddr;
 				if (kvaddr != 0) {
 					sl->pages = NULL;
 				} else {
 					sl->pages = PHYS_TO_VM_PAGE(curaddr);
 					KASSERT(sl->pages != NULL,
 					    ("%s: page at PA:0x%08lx is not "
 					    "in vm_page_array", __func__,
 					    curaddr));
 				}
 				sl->datacount = sgsize;
 			} else
 				sl->datacount += sgsize;
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 cleanup:
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if ((map->flags & DMAMAP_COULD_BOUNCE) == 0)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 
 	map->sync_count = 0;
 }
 
 static void
 dma_preread_safe(vm_offset_t va, vm_size_t size)
 {
 	/*
 	 * Write back any partial cachelines immediately before and
 	 * after the DMA region.
 	 */
 	if (va & (dcache_line_size - 1))
 		cpu_dcache_wb_range(va, 1);
 	if ((va + size) & (dcache_line_size - 1))
 		cpu_dcache_wb_range(va + size, 1);
 
 	cpu_dcache_inv_range(va, size);
 }
 
 static void
 dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 	vm_paddr_t pa;
 	vm_offset_t va, tempva;
 	bus_size_t size;
 
 	offset = sl->paddr & PAGE_MASK;
 	m = sl->pages;
 	size = sl->datacount;
 	pa = sl->paddr;
 
 	for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) {
 		tempva = 0;
 		if (sl->vaddr == 0) {
 			len = min(PAGE_SIZE - offset, size);
 			tempva = pmap_quick_enter_page(m);
 			va = tempva | offset;
 			KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset),
 			    ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx",
 			    VM_PAGE_TO_PHYS(m) | offset, pa));
 		} else {
 			len = sl->datacount;
 			va = sl->vaddr;
 		}
 
 		switch (op) {
 		case BUS_DMASYNC_PREWRITE:
 		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 			cpu_dcache_wb_range(va, len);
 			break;
 		case BUS_DMASYNC_PREREAD:
 			/*
 			 * An mbuf may start in the middle of a cacheline. There
 			 * will be no cpu writes to the beginning of that line
 			 * (which contains the mbuf header) while dma is in
 			 * progress.  Handle that case by doing a writeback of
 			 * just the first cacheline before invalidating the
 			 * overall buffer.  Any mbuf in a chain may have this
 			 * misalignment.  Buffers which are not mbufs bounce if
 			 * they are not aligned to a cacheline.
 			 */
 			dma_preread_safe(va, len);
 			break;
 		case BUS_DMASYNC_POSTREAD:
 		case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
 			cpu_dcache_inv_range(va, len);
 			break;
 		default:
 			panic("unsupported combination of sync operations: "
                               "0x%08x\n", op);
 		}
 
 		if (tempva != 0)
 			pmap_quick_remove_page(tempva);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	struct sync_list *sl, *end;
 	vm_offset_t datavaddr, tempvaddr;
 
 	if (op == BUS_DMASYNC_POSTWRITE)
 		return;
 
 	if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 		/*
 		 * Wait for any DMA operations to complete before the bcopy.
 		 */
 		dsb(sy);
 	}
 
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->common.flags,
 		    op);
 
 		if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 			while (bpage != NULL) {
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 
 				bcopy((void *)datavaddr,
 				    (void *)bpage->vaddr, bpage->datacount);
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_wb_range(bpage->vaddr,
 					    bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		} else if ((op & BUS_DMASYNC_PREREAD) != 0) {
 			while (bpage != NULL) {
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_wbinv_range(bpage->vaddr,
 					    bpage->datacount);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 		}
 
 		if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 			while (bpage != NULL) {
 				if ((dmat->bounce_flags & BF_COHERENT) == 0)
 					cpu_dcache_inv_range(bpage->vaddr,
 					    bpage->datacount);
 				tempvaddr = 0;
 				datavaddr = bpage->datavaddr;
 				if (datavaddr == 0) {
 					tempvaddr = pmap_quick_enter_page(
 					    bpage->datapage);
 					datavaddr = tempvaddr | bpage->dataoffs;
 				}
 
 				bcopy((void *)bpage->vaddr,
 				    (void *)datavaddr, bpage->datacount);
 
 				if (tempvaddr != 0)
 					pmap_quick_remove_page(tempvaddr);
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 
 	/*
 	 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.
 	 */
 	if (map->sync_count != 0) {
 		sl = &map->slist[0];
 		end = &map->slist[map->sync_count];
 		CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x "
 		    "performing sync", __func__, dmat, op);
 
 		for ( ; sl != end; ++sl)
 			dma_dcache_sync(sl, op);
 	}
 
 	if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) {
 		/*
 		 * Wait for the bcopy to complete before any DMA operations.
 		 */
 		dsb(sy);
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->common.alignment <= bz->alignment) &&
 		    (dmat->common.lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
 		    M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage = PHYS_TO_VM_PAGE(addr);
 	bpage->dataoffs = addr & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
+	.id_mapped = bounce_bus_dma_id_mapped,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bus_dmamap_load_ma_triv,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync
 };
Index: head/sys/arm64/include/bus_dma.h
===================================================================
--- head/sys/arm64/include/bus_dma.h	(revision 347835)
+++ head/sys/arm64/include/bus_dma.h	(revision 347836)
@@ -1,141 +1,153 @@
 /* $FreeBSD$ */
 
 #ifndef _MACHINE_BUS_DMA_H_
 #define	_MACHINE_BUS_DMA_H_
 
 #define WANT_INLINE_DMAMAP
 #include <sys/bus_dma.h>
 
 #include <machine/bus_dma_impl.h>
 
 /*
+ * Is DMA address 1:1 mapping of physical address
+ */
+static inline bool
+bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+	struct bus_dma_tag_common *tc;
+
+	tc = (struct bus_dma_tag_common *)dmat;
+	return (tc->impl->id_mapped(dmat, buf, buflen));
+}
+
+/*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static inline int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_create(dmat, flags, mapp));
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static inline int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_destroy(dmat, map));
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints listed in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static inline int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp));
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static inline void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	tc->impl->mem_free(dmat, vaddr, map);
 }
 
 /*
  * Release the mapping held by map.
  */
 static inline void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	tc->impl->map_unload(dmat, map);
 }
 
 static inline void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	tc->impl->map_sync(dmat, map, op);
 }
 
 static inline int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs,
 	    segp));
 }
 
 static inline int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma,
     bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 static inline int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs,
 	    segp));
 }
 
 static inline void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	tc->impl->map_waitok(dmat, map, mem, callback, callback_arg);
 }
 
 static inline bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_complete(dmat, map, segs, nsegs, error));
 }
 
 #endif /* !_MACHINE_BUS_DMA_H_ */
Index: head/sys/arm64/include/bus_dma_impl.h
===================================================================
--- head/sys/arm64/include/bus_dma_impl.h	(revision 347835)
+++ head/sys/arm64/include/bus_dma_impl.h	(revision 347836)
@@ -1,96 +1,97 @@
 /*-
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_BUS_DMA_IMPL_H_
 #define	_MACHINE_BUS_DMA_IMPL_H_
 
 struct bus_dma_tag_common {
 	struct bus_dma_impl *impl;
 	struct bus_dma_tag_common *parent;
 	bus_size_t	  alignment;
 	bus_addr_t	  boundary;
 	bus_addr_t	  lowaddr;
 	bus_addr_t	  highaddr;
 	bus_dma_filter_t *filter;
 	void		 *filterarg;
 	bus_size_t	  maxsize;
 	u_int		  nsegments;
 	bus_size_t	  maxsegsz;
 	int		  flags;
 	bus_dma_lock_t	 *lockfunc;
 	void		 *lockfuncarg;
 	int		  ref_count;
 };
 
 struct bus_dma_impl {
 	int (*tag_create)(bus_dma_tag_t parent,
 	    bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr,
 	    bus_addr_t highaddr, bus_dma_filter_t *filter,
 	    void *filterarg, bus_size_t maxsize, int nsegments,
 	    bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
 	    void *lockfuncarg, bus_dma_tag_t *dmat);
 	int (*tag_destroy)(bus_dma_tag_t dmat);
+	bool (*id_mapped)(bus_dma_tag_t, vm_paddr_t, bus_size_t);
 	int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp);
 	int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map);
 	int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags,
 	    bus_dmamap_t *mapp);
 	void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map);
 	int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    vm_paddr_t buf, bus_size_t buflen, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    void *buf, bus_size_t buflen, struct pmap *pmap, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    struct memdesc *mem, bus_dmamap_callback_t *callback,
 	    void *callback_arg);
 	bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    bus_dma_segment_t *segs, int nsegs, int error);
 	void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map);
 	void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    bus_dmasync_op_t op);
 };
 
 void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op);
 int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr);
 int common_bus_dma_tag_create(struct bus_dma_tag_common *parent,
     bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, size_t sz, void **dmat);
 
 extern struct bus_dma_impl bus_dma_bounce_impl;
 
 #endif
Index: head/sys/compat/linuxkpi/common/src/linux_pci.c
===================================================================
--- head/sys/compat/linuxkpi/common/src/linux_pci.c	(revision 347835)
+++ head/sys/compat/linuxkpi/common/src/linux_pci.c	(revision 347836)
@@ -1,817 +1,844 @@
 /*-
  * Copyright (c) 2015-2016 Mellanox Technologies, Ltd.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/pctrie.h>
 #include <sys/rwlock.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/stdarg.h>
 
 #include <linux/kobject.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/cdev.h>
 #include <linux/file.h>
 #include <linux/sysfs.h>
 #include <linux/mm.h>
 #include <linux/io.h>
 #include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <linux/compat.h>
 
 static device_probe_t linux_pci_probe;
 static device_attach_t linux_pci_attach;
 static device_detach_t linux_pci_detach;
 static device_suspend_t linux_pci_suspend;
 static device_resume_t linux_pci_resume;
 static device_shutdown_t linux_pci_shutdown;
 
 static device_method_t pci_methods[] = {
 	DEVMETHOD(device_probe, linux_pci_probe),
 	DEVMETHOD(device_attach, linux_pci_attach),
 	DEVMETHOD(device_detach, linux_pci_detach),
 	DEVMETHOD(device_suspend, linux_pci_suspend),
 	DEVMETHOD(device_resume, linux_pci_resume),
 	DEVMETHOD(device_shutdown, linux_pci_shutdown),
 	DEVMETHOD_END
 };
 
 struct linux_dma_priv {
 	uint64_t	dma_mask;
 	struct mtx	lock;
 	bus_dma_tag_t	dmat;
 	struct pctrie	ptree;
 };
 #define	DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock)
 #define	DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock)
 
 static int
 linux_pdev_dma_init(struct pci_dev *pdev)
 {
 	struct linux_dma_priv *priv;
 	int error;
 
 	priv = malloc(sizeof(*priv), M_DEVBUF, M_WAITOK | M_ZERO);
 	pdev->dev.dma_priv = priv;
 
 	mtx_init(&priv->lock, "lkpi-priv-dma", NULL, MTX_DEF);
 
 	pctrie_init(&priv->ptree);
 
 	/* create a default DMA tag */
 	error = linux_dma_tag_init(&pdev->dev, DMA_BIT_MASK(64));
 	if (error) {
 		mtx_destroy(&priv->lock);
 		free(priv, M_DEVBUF);
 		pdev->dev.dma_priv = NULL;
 	}
 	return (error);
 }
 
 static int
 linux_pdev_dma_uninit(struct pci_dev *pdev)
 {
 	struct linux_dma_priv *priv;
 
 	priv = pdev->dev.dma_priv;
 	if (priv->dmat)
 		bus_dma_tag_destroy(priv->dmat);
 	mtx_destroy(&priv->lock);
 	free(priv, M_DEVBUF);
 	pdev->dev.dma_priv = NULL;
 	return (0);
 }
 
 int
 linux_dma_tag_init(struct device *dev, u64 dma_mask)
 {
 	struct linux_dma_priv *priv;
 	int error;
 
 	priv = dev->dma_priv;
 
 	if (priv->dmat) {
 		if (priv->dma_mask == dma_mask)
 			return (0);
 
 		bus_dma_tag_destroy(priv->dmat);
 	}
 
 	priv->dma_mask = dma_mask;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev->bsddev),
 	    1, 0,			/* alignment, boundary */
 	    dma_mask,			/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filtfunc, filtfuncarg */
 	    BUS_SPACE_MAXSIZE,		/* maxsize */
 	    1,				/* nsegments */
 	    BUS_SPACE_MAXSIZE,		/* maxsegsz */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &priv->dmat);
 	return (-error);
 }
 
 static struct pci_driver *
 linux_pci_find(device_t dev, const struct pci_device_id **idp)
 {
 	const struct pci_device_id *id;
 	struct pci_driver *pdrv;
 	uint16_t vendor;
 	uint16_t device;
 	uint16_t subvendor;
 	uint16_t subdevice;
 
 	vendor = pci_get_vendor(dev);
 	device = pci_get_device(dev);
 	subvendor = pci_get_subvendor(dev);
 	subdevice = pci_get_subdevice(dev);
 
 	spin_lock(&pci_lock);
 	list_for_each_entry(pdrv, &pci_drivers, links) {
 		for (id = pdrv->id_table; id->vendor != 0; id++) {
 			if (vendor == id->vendor &&
 			    (PCI_ANY_ID == id->device || device == id->device) &&
 			    (PCI_ANY_ID == id->subvendor || subvendor == id->subvendor) &&
 			    (PCI_ANY_ID == id->subdevice || subdevice == id->subdevice)) {
 				*idp = id;
 				spin_unlock(&pci_lock);
 				return (pdrv);
 			}
 		}
 	}
 	spin_unlock(&pci_lock);
 	return (NULL);
 }
 
 static int
 linux_pci_probe(device_t dev)
 {
 	const struct pci_device_id *id;
 	struct pci_driver *pdrv;
 
 	if ((pdrv = linux_pci_find(dev, &id)) == NULL)
 		return (ENXIO);
 	if (device_get_driver(dev) != &pdrv->bsddriver)
 		return (ENXIO);
 	device_set_desc(dev, pdrv->name);
 	return (0);
 }
 
 static int
 linux_pci_attach(device_t dev)
 {
 	struct resource_list_entry *rle;
 	struct pci_bus *pbus;
 	struct pci_dev *pdev;
 	struct pci_devinfo *dinfo;
 	struct pci_driver *pdrv;
 	const struct pci_device_id *id;
 	device_t parent;
 	devclass_t devclass;
 	int error;
 
 	linux_set_current(curthread);
 
 	pdrv = linux_pci_find(dev, &id);
 	pdev = device_get_softc(dev);
 
 	parent = device_get_parent(dev);
 	devclass = device_get_devclass(parent);
 	if (pdrv->isdrm) {
 		dinfo = device_get_ivars(parent);
 		device_set_ivars(dev, dinfo);
 	} else {
 		dinfo = device_get_ivars(dev);
 	}
 
 	pdev->dev.parent = &linux_root_device;
 	pdev->dev.bsddev = dev;
 	INIT_LIST_HEAD(&pdev->dev.irqents);
 	pdev->devfn = PCI_DEVFN(pci_get_slot(dev), pci_get_function(dev));
 	pdev->device = dinfo->cfg.device;
 	pdev->vendor = dinfo->cfg.vendor;
 	pdev->subsystem_vendor = dinfo->cfg.subvendor;
 	pdev->subsystem_device = dinfo->cfg.subdevice;
 	pdev->class = pci_get_class(dev);
 	pdev->revision = pci_get_revid(dev);
 	pdev->pdrv = pdrv;
 	kobject_init(&pdev->dev.kobj, &linux_dev_ktype);
 	kobject_set_name(&pdev->dev.kobj, device_get_nameunit(dev));
 	kobject_add(&pdev->dev.kobj, &linux_root_device.kobj,
 	    kobject_name(&pdev->dev.kobj));
 	rle = linux_pci_get_rle(pdev, SYS_RES_IRQ, 0);
 	if (rle != NULL)
 		pdev->dev.irq = rle->start;
 	else
 		pdev->dev.irq = LINUX_IRQ_INVALID;
 	pdev->irq = pdev->dev.irq;
 	error = linux_pdev_dma_init(pdev);
 	if (error)
 		goto out_dma_init;
 
 	pbus = malloc(sizeof(*pbus), M_DEVBUF, M_WAITOK | M_ZERO);
 	pbus->self = pdev;
 	pbus->number = pci_get_bus(dev);
 	pdev->bus = pbus;
 
 	spin_lock(&pci_lock);
 	list_add(&pdev->links, &pci_devices);
 	spin_unlock(&pci_lock);
 
 	error = pdrv->probe(pdev, id);
 	if (error)
 		goto out_probe;
 	return (0);
 
 out_probe:
 	free(pdev->bus, M_DEVBUF);
 	linux_pdev_dma_uninit(pdev);
 out_dma_init:
 	spin_lock(&pci_lock);
 	list_del(&pdev->links);
 	spin_unlock(&pci_lock);
 	put_device(&pdev->dev);
 	return (-error);
 }
 
 static int
 linux_pci_detach(device_t dev)
 {
 	struct pci_dev *pdev;
 
 	linux_set_current(curthread);
 	pdev = device_get_softc(dev);
 
 	pdev->pdrv->remove(pdev);
 
 	free(pdev->bus, M_DEVBUF);
 	linux_pdev_dma_uninit(pdev);
 
 	spin_lock(&pci_lock);
 	list_del(&pdev->links);
 	spin_unlock(&pci_lock);
 	device_set_desc(dev, NULL);
 	put_device(&pdev->dev);
 
 	return (0);
 }
 
 static int
 linux_pci_suspend(device_t dev)
 {
 	const struct dev_pm_ops *pmops;
 	struct pm_message pm = { };
 	struct pci_dev *pdev;
 	int error;
 
 	error = 0;
 	linux_set_current(curthread);
 	pdev = device_get_softc(dev);
 	pmops = pdev->pdrv->driver.pm;
 
 	if (pdev->pdrv->suspend != NULL)
 		error = -pdev->pdrv->suspend(pdev, pm);
 	else if (pmops != NULL && pmops->suspend != NULL) {
 		error = -pmops->suspend(&pdev->dev);
 		if (error == 0 && pmops->suspend_late != NULL)
 			error = -pmops->suspend_late(&pdev->dev);
 	}
 	return (error);
 }
 
 static int
 linux_pci_resume(device_t dev)
 {
 	const struct dev_pm_ops *pmops;
 	struct pci_dev *pdev;
 	int error;
 
 	error = 0;
 	linux_set_current(curthread);
 	pdev = device_get_softc(dev);
 	pmops = pdev->pdrv->driver.pm;
 
 	if (pdev->pdrv->resume != NULL)
 		error = -pdev->pdrv->resume(pdev);
 	else if (pmops != NULL && pmops->resume != NULL) {
 		if (pmops->resume_early != NULL)
 			error = -pmops->resume_early(&pdev->dev);
 		if (error == 0 && pmops->resume != NULL)
 			error = -pmops->resume(&pdev->dev);
 	}
 	return (error);
 }
 
 static int
 linux_pci_shutdown(device_t dev)
 {
 	struct pci_dev *pdev;
 
 	linux_set_current(curthread);
 	pdev = device_get_softc(dev);
 	if (pdev->pdrv->shutdown != NULL)
 		pdev->pdrv->shutdown(pdev);
 	return (0);
 }
 
 static int
 _linux_pci_register_driver(struct pci_driver *pdrv, devclass_t dc)
 {
 	int error;
 
 	linux_set_current(curthread);
 	spin_lock(&pci_lock);
 	list_add(&pdrv->links, &pci_drivers);
 	spin_unlock(&pci_lock);
 	pdrv->bsddriver.name = pdrv->name;
 	pdrv->bsddriver.methods = pci_methods;
 	pdrv->bsddriver.size = sizeof(struct pci_dev);
 
 	mtx_lock(&Giant);
 	error = devclass_add_driver(dc, &pdrv->bsddriver,
 	    BUS_PASS_DEFAULT, &pdrv->bsdclass);
 	mtx_unlock(&Giant);
 	return (-error);
 }
 
 int
 linux_pci_register_driver(struct pci_driver *pdrv)
 {
 	devclass_t dc;
 
 	dc = devclass_find("pci");
 	if (dc == NULL)
 		return (-ENXIO);
 	pdrv->isdrm = false;
 	return (_linux_pci_register_driver(pdrv, dc));
 }
 
 int
 linux_pci_register_drm_driver(struct pci_driver *pdrv)
 {
 	devclass_t dc;
 
 	dc = devclass_create("vgapci");
 	if (dc == NULL)
 		return (-ENXIO);
 	pdrv->isdrm = true;
 	pdrv->name = "drmn";
 	return (_linux_pci_register_driver(pdrv, dc));
 }
 
 void
 linux_pci_unregister_driver(struct pci_driver *pdrv)
 {
 	devclass_t bus;
 
 	bus = devclass_find("pci");
 
 	spin_lock(&pci_lock);
 	list_del(&pdrv->links);
 	spin_unlock(&pci_lock);
 	mtx_lock(&Giant);
 	if (bus != NULL)
 		devclass_delete_driver(bus, &pdrv->bsddriver);
 	mtx_unlock(&Giant);
 }
 
 void
 linux_pci_unregister_drm_driver(struct pci_driver *pdrv)
 {
 	devclass_t bus;
 
 	bus = devclass_find("vgapci");
 
 	spin_lock(&pci_lock);
 	list_del(&pdrv->links);
 	spin_unlock(&pci_lock);
 	mtx_lock(&Giant);
 	if (bus != NULL)
 		devclass_delete_driver(bus, &pdrv->bsddriver);
 	mtx_unlock(&Giant);
 }
 
 CTASSERT(sizeof(dma_addr_t) <= sizeof(uint64_t));
 
 struct linux_dma_obj {
 	void		*vaddr;
 	uint64_t	dma_addr;
 	bus_dmamap_t	dmamap;
 };
 
 static uma_zone_t linux_dma_trie_zone;
 static uma_zone_t linux_dma_obj_zone;
 
 static void
 linux_dma_init(void *arg)
 {
 
 	linux_dma_trie_zone = uma_zcreate("linux_dma_pctrie",
 	    pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL,
 	    UMA_ALIGN_PTR, 0);
 	linux_dma_obj_zone = uma_zcreate("linux_dma_object",
 	    sizeof(struct linux_dma_obj), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 }
 SYSINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_init, NULL);
 
 static void
 linux_dma_uninit(void *arg)
 {
 
 	uma_zdestroy(linux_dma_obj_zone);
 	uma_zdestroy(linux_dma_trie_zone);
 }
 SYSUNINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_uninit, NULL);
 
 static void *
 linux_dma_trie_alloc(struct pctrie *ptree)
 {
 
 	return (uma_zalloc(linux_dma_trie_zone, 0));
 }
 
 static void
 linux_dma_trie_free(struct pctrie *ptree, void *node)
 {
 
 	uma_zfree(linux_dma_trie_zone, node);
 }
 
 
 PCTRIE_DEFINE(LINUX_DMA, linux_dma_obj, dma_addr, linux_dma_trie_alloc,
     linux_dma_trie_free);
 
 void *
 linux_dma_alloc_coherent(struct device *dev, size_t size,
     dma_addr_t *dma_handle, gfp_t flag)
 {
 	struct linux_dma_priv *priv;
 	vm_paddr_t high;
 	size_t align;
 	void *mem;
 
 	if (dev == NULL || dev->dma_priv == NULL) {
 		*dma_handle = 0;
 		return (NULL);
 	}
 	priv = dev->dma_priv;
 	if (priv->dma_mask)
 		high = priv->dma_mask;
 	else if (flag & GFP_DMA32)
 		high = BUS_SPACE_MAXADDR_32BIT;
 	else
 		high = BUS_SPACE_MAXADDR;
 	align = PAGE_SIZE << get_order(size);
 	mem = (void *)kmem_alloc_contig(size, flag, 0, high, align, 0,
 	    VM_MEMATTR_DEFAULT);
 	if (mem != NULL) {
 		*dma_handle = linux_dma_map_phys(dev, vtophys(mem), size);
 		if (*dma_handle == 0) {
 			kmem_free((vm_offset_t)mem, size);
 			mem = NULL;
 		}
 	} else {
 		*dma_handle = 0;
 	}
 	return (mem);
 }
 
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
 dma_addr_t
 linux_dma_map_phys(struct device *dev, vm_paddr_t phys, size_t len)
 {
 	struct linux_dma_priv *priv;
 	struct linux_dma_obj *obj;
 	int error, nseg;
 	bus_dma_segment_t seg;
 
 	priv = dev->dma_priv;
 
+	/*
+	 * If the resultant mapping will be entirely 1:1 with the
+	 * physical address, short-circuit the remainder of the
+	 * bus_dma API.  This avoids tracking collisions in the pctrie
+	 * with the additional benefit of reducing overhead.
+	 */
+	if (bus_dma_id_mapped(priv->dmat, phys, len))
+		return (phys);
+
 	obj = uma_zalloc(linux_dma_obj_zone, 0);
 
 	DMA_PRIV_LOCK(priv);
 	if (bus_dmamap_create(priv->dmat, 0, &obj->dmamap) != 0) {
 		DMA_PRIV_UNLOCK(priv);
 		uma_zfree(linux_dma_obj_zone, obj);
 		return (0);
 	}
 
 	nseg = -1;
 	if (_bus_dmamap_load_phys(priv->dmat, obj->dmamap, phys, len,
 	    BUS_DMA_NOWAIT, &seg, &nseg) != 0) {
 		bus_dmamap_destroy(priv->dmat, obj->dmamap);
 		DMA_PRIV_UNLOCK(priv);
 		uma_zfree(linux_dma_obj_zone, obj);
 		return (0);
 	}
 
 	KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg));
 	obj->dma_addr = seg.ds_addr;
 
 	error = LINUX_DMA_PCTRIE_INSERT(&priv->ptree, obj);
 	if (error != 0) {
 		bus_dmamap_unload(priv->dmat, obj->dmamap);
 		bus_dmamap_destroy(priv->dmat, obj->dmamap);
 		DMA_PRIV_UNLOCK(priv);
 		uma_zfree(linux_dma_obj_zone, obj);
 		return (0);
 	}
 	DMA_PRIV_UNLOCK(priv);
 	return (obj->dma_addr);
 }
+#else
+dma_addr_t
+linux_dma_map_phys(struct device *dev, vm_paddr_t phys, size_t len)
+{
+	return (phys);
+}
+#endif
 
+#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__)
 void
 linux_dma_unmap(struct device *dev, dma_addr_t dma_addr, size_t len)
 {
 	struct linux_dma_priv *priv;
 	struct linux_dma_obj *obj;
 
 	priv = dev->dma_priv;
 
+	if (pctrie_is_empty(&priv->ptree))
+		return;
+
 	DMA_PRIV_LOCK(priv);
 	obj = LINUX_DMA_PCTRIE_LOOKUP(&priv->ptree, dma_addr);
 	if (obj == NULL) {
 		DMA_PRIV_UNLOCK(priv);
 		return;
 	}
 	LINUX_DMA_PCTRIE_REMOVE(&priv->ptree, dma_addr);
 	bus_dmamap_unload(priv->dmat, obj->dmamap);
 	bus_dmamap_destroy(priv->dmat, obj->dmamap);
 	DMA_PRIV_UNLOCK(priv);
 
 	uma_zfree(linux_dma_obj_zone, obj);
 }
+#else
+void
+linux_dma_unmap(struct device *dev, dma_addr_t dma_addr, size_t len)
+{
+}
+#endif
 
 int
 linux_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents,
     enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct linux_dma_priv *priv;
 	struct scatterlist *sg;
 	int i, nseg;
 	bus_dma_segment_t seg;
 
 	priv = dev->dma_priv;
 
 	DMA_PRIV_LOCK(priv);
 
 	/* create common DMA map in the first S/G entry */
 	if (bus_dmamap_create(priv->dmat, 0, &sgl->dma_map) != 0) {
 		DMA_PRIV_UNLOCK(priv);
 		return (0);
 	}
 
 	/* load all S/G list entries */
 	for_each_sg(sgl, sg, nents, i) {
 		nseg = -1;
 		if (_bus_dmamap_load_phys(priv->dmat, sgl->dma_map,
 		    sg_phys(sg), sg->length, BUS_DMA_NOWAIT,
 		    &seg, &nseg) != 0) {
 			bus_dmamap_unload(priv->dmat, sgl->dma_map);
 			bus_dmamap_destroy(priv->dmat, sgl->dma_map);
 			DMA_PRIV_UNLOCK(priv);
 			return (0);
 		}
 		KASSERT(nseg == 0,
 		    ("More than one segment (nseg=%d)", nseg + 1));
 
 		sg_dma_address(sg) = seg.ds_addr;
 	}
 	DMA_PRIV_UNLOCK(priv);
 
 	return (nents);
 }
 
 void
 linux_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
     int nents, enum dma_data_direction dir, struct dma_attrs *attrs)
 {
 	struct linux_dma_priv *priv;
 
 	priv = dev->dma_priv;
 
 	DMA_PRIV_LOCK(priv);
 	bus_dmamap_unload(priv->dmat, sgl->dma_map);
 	bus_dmamap_destroy(priv->dmat, sgl->dma_map);
 	DMA_PRIV_UNLOCK(priv);
 }
 
 struct dma_pool {
 	struct device  *pool_device;
 	uma_zone_t	pool_zone;
 	struct mtx	pool_lock;
 	bus_dma_tag_t	pool_dmat;
 	size_t		pool_entry_size;
 	struct pctrie	pool_ptree;
 };
 
 #define	DMA_POOL_LOCK(pool) mtx_lock(&(pool)->pool_lock)
 #define	DMA_POOL_UNLOCK(pool) mtx_unlock(&(pool)->pool_lock)
 
 static inline int
 dma_pool_obj_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct linux_dma_obj *obj = mem;
 	struct dma_pool *pool = arg;
 	int error, nseg;
 	bus_dma_segment_t seg;
 
 	nseg = -1;
 	DMA_POOL_LOCK(pool);
 	error = _bus_dmamap_load_phys(pool->pool_dmat, obj->dmamap,
 	    vtophys(obj->vaddr), pool->pool_entry_size, BUS_DMA_NOWAIT,
 	    &seg, &nseg);
 	DMA_POOL_UNLOCK(pool);
 	if (error != 0) {
 		return (error);
 	}
 	KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg));
 	obj->dma_addr = seg.ds_addr;
 
 	return (0);
 }
 
 static void
 dma_pool_obj_dtor(void *mem, int size, void *arg)
 {
 	struct linux_dma_obj *obj = mem;
 	struct dma_pool *pool = arg;
 
 	DMA_POOL_LOCK(pool);
 	bus_dmamap_unload(pool->pool_dmat, obj->dmamap);
 	DMA_POOL_UNLOCK(pool);
 }
 
 static int
 dma_pool_obj_import(void *arg, void **store, int count, int domain __unused,
     int flags)
 {
 	struct dma_pool *pool = arg;
 	struct linux_dma_priv *priv;
 	struct linux_dma_obj *obj;
 	int error, i;
 
 	priv = pool->pool_device->dma_priv;
 	for (i = 0; i < count; i++) {
 		obj = uma_zalloc(linux_dma_obj_zone, flags);
 		if (obj == NULL)
 			break;
 
 		error = bus_dmamem_alloc(pool->pool_dmat, &obj->vaddr,
 		    BUS_DMA_NOWAIT, &obj->dmamap);
 		if (error!= 0) {
 			uma_zfree(linux_dma_obj_zone, obj);
 			break;
 		}
 
 		store[i] = obj;
 	}
 
 	return (i);
 }
 
 static void
 dma_pool_obj_release(void *arg, void **store, int count)
 {
 	struct dma_pool *pool = arg;
 	struct linux_dma_priv *priv;
 	struct linux_dma_obj *obj;
 	int i;
 
 	priv = pool->pool_device->dma_priv;
 	for (i = 0; i < count; i++) {
 		obj = store[i];
 		bus_dmamem_free(pool->pool_dmat, obj->vaddr, obj->dmamap);
 		uma_zfree(linux_dma_obj_zone, obj);
 	}
 }
 
 struct dma_pool *
 linux_dma_pool_create(char *name, struct device *dev, size_t size,
     size_t align, size_t boundary)
 {
 	struct linux_dma_priv *priv;
 	struct dma_pool *pool;
 
 	priv = dev->dma_priv;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	pool->pool_device = dev;
 	pool->pool_entry_size = size;
 
 	if (bus_dma_tag_create(bus_get_dma_tag(dev->bsddev),
 	    align, boundary,		/* alignment, boundary */
 	    priv->dma_mask,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filtfunc, filtfuncarg */
 	    size,			/* maxsize */
 	    1,				/* nsegments */
 	    size,			/* maxsegsz */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &pool->pool_dmat)) {
 		kfree(pool);
 		return (NULL);
 	}
 
 	pool->pool_zone = uma_zcache_create(name, -1, dma_pool_obj_ctor,
 	    dma_pool_obj_dtor, NULL, NULL, dma_pool_obj_import,
 	    dma_pool_obj_release, pool, 0);
 
 	mtx_init(&pool->pool_lock, "lkpi-dma-pool", NULL, MTX_DEF);
 	pctrie_init(&pool->pool_ptree);
 
 	return (pool);
 }
 
 void
 linux_dma_pool_destroy(struct dma_pool *pool)
 {
 
 	uma_zdestroy(pool->pool_zone);
 	bus_dma_tag_destroy(pool->pool_dmat);
 	mtx_destroy(&pool->pool_lock);
 	kfree(pool);
 }
 
 void *
 linux_dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
     dma_addr_t *handle)
 {
 	struct linux_dma_obj *obj;
 
 	obj = uma_zalloc_arg(pool->pool_zone, pool, mem_flags);
 	if (obj == NULL)
 		return (NULL);
 
 	DMA_POOL_LOCK(pool);
 	if (LINUX_DMA_PCTRIE_INSERT(&pool->pool_ptree, obj) != 0) {
 		DMA_POOL_UNLOCK(pool);
 		uma_zfree_arg(pool->pool_zone, obj, pool);
 		return (NULL);
 	}
 	DMA_POOL_UNLOCK(pool);
 
 	*handle = obj->dma_addr;
 	return (obj->vaddr);
 }
 
 void
 linux_dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma_addr)
 {
 	struct linux_dma_obj *obj;
 
 	DMA_POOL_LOCK(pool);
 	obj = LINUX_DMA_PCTRIE_LOOKUP(&pool->pool_ptree, dma_addr);
 	if (obj == NULL) {
 		DMA_POOL_UNLOCK(pool);
 		return;
 	}
 	LINUX_DMA_PCTRIE_REMOVE(&pool->pool_ptree, dma_addr);
 	DMA_POOL_UNLOCK(pool);
 
 	uma_zfree_arg(pool->pool_zone, obj, pool);
 }
Index: head/sys/sys/bus_dma.h
===================================================================
--- head/sys/sys/bus_dma.h	(revision 347835)
+++ head/sys/sys/bus_dma.h	(revision 347836)
@@ -1,306 +1,310 @@
 /*	$NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $	*/
 
 /*-
  * SPDX-License-Identifier: (BSD-2-Clause-NetBSD AND BSD-4-Clause)
  *
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1996 Charles M. Hannum.  All rights reserved.
  * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /* $FreeBSD$ */
 
 #ifndef _BUS_DMA_H_
 #define _BUS_DMA_H_
 
+#ifdef _KERNEL
 #include <sys/_bus_dma.h>
+#endif
 
 /*
  * Machine independent interface for mapping physical addresses to peripheral
  * bus 'physical' addresses, and assisting with DMA operations.
  *
  * XXX This file is always included from <machine/bus_dma.h> and should not
  *     (yet) be included directly.
  */
 
 /*
  * Flags used in various bus DMA methods.
  */
 #define	BUS_DMA_WAITOK		0x00	/* safe to sleep (pseudo-flag) */
 #define	BUS_DMA_NOWAIT		0x01	/* not safe to sleep */
 #define	BUS_DMA_ALLOCNOW	0x02	/* perform resource allocation now */
 #define	BUS_DMA_COHERENT	0x04	/* hint: map memory in a coherent way */
 #define	BUS_DMA_ZERO		0x08	/* allocate zero'ed memory */
 #define	BUS_DMA_BUS1		0x10	/* placeholders for bus functions... */
 #define	BUS_DMA_BUS2		0x20
 #define	BUS_DMA_BUS3		0x40
 #define	BUS_DMA_BUS4		0x80
 
 /*
  * The following two flags are non-standard or specific to only certain
  * architectures
  */
 #define	BUS_DMA_NOWRITE		0x100
 #define	BUS_DMA_NOCACHE		0x200
 
 /*
  * The following flag is a DMA tag hint that the page offset of the
  * loaded kernel virtual address must be preserved in the first
  * physical segment address, when the KVA is loaded into DMA.
  */
 #define	BUS_DMA_KEEP_PG_OFFSET	0x400
 
 #define	BUS_DMA_LOAD_MBUF	0x800
 
 /* Forwards needed by prototypes below. */
 union ccb;
 struct bio;
 struct mbuf;
 struct memdesc;
 struct pmap;
 struct uio;
 
 /*
  * Operations performed by bus_dmamap_sync().
  */
 #define	BUS_DMASYNC_PREREAD	1
 #define	BUS_DMASYNC_POSTREAD	2
 #define	BUS_DMASYNC_PREWRITE	4
 #define	BUS_DMASYNC_POSTWRITE	8
 
 /*
  *	bus_dma_segment_t
  *
  *	Describes a single contiguous DMA transaction.  Values
  *	are suitable for programming into DMA registers.
  */
 typedef struct bus_dma_segment {
 	bus_addr_t	ds_addr;	/* DMA address */
 	bus_size_t	ds_len;		/* length of transfer */
 } bus_dma_segment_t;
 
+#ifdef _KERNEL
 /*
  * A function that returns 1 if the address cannot be accessed by
  * a device and 0 if it can be.
  */
 typedef int bus_dma_filter_t(void *, bus_addr_t);
 
 /*
  * Generic helper function for manipulating mutexes.
  */
 void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op);
 
 /*
  * Allocate a device specific dma_tag encapsulating the constraints of
  * the parent tag in addition to other restrictions specified:
  *
  *	alignment:	Alignment for segments.
  *	boundary:	Boundary that segments cannot cross.
  *	lowaddr:	Low restricted address that cannot appear in a mapping.
  *	highaddr:	High restricted address that cannot appear in a mapping.
  *	filtfunc:	An optional function to further test if an address
  *			within the range of lowaddr and highaddr cannot appear
  *			in a mapping.
  *	filtfuncarg:	An argument that will be passed to filtfunc in addition
  *			to the address to test.
  *	maxsize:	Maximum mapping size supported by this tag.
  *	nsegments:	Number of discontinuities allowed in maps.
  *	maxsegsz:	Maximum size of a segment in the map.
  *	flags:		Bus DMA flags.
  *	lockfunc:	An optional function to handle driver-defined lock
  *			operations.
  *	lockfuncarg:	An argument that will be passed to lockfunc in addition
  *			to the lock operation.
  *	dmat:		A pointer to set to a valid dma tag should the return
  *			value of this function indicate success.
  */
 /* XXX Should probably allow specification of alignment */
 int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
 		       bus_addr_t boundary, bus_addr_t lowaddr,
 		       bus_addr_t highaddr, bus_dma_filter_t *filtfunc,
 		       void *filtfuncarg, bus_size_t maxsize, int nsegments,
 		       bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
 		       void *lockfuncarg, bus_dma_tag_t *dmat);
 
 /*
  * Set the memory domain to be used for allocations.
  *
  * Automatic for PCI devices.  Must be set prior to creating maps or
  * allocating memory.
  */
 int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain);
 
 int bus_dma_tag_destroy(bus_dma_tag_t dmat);
 
 /*
  * A function that processes a successfully loaded dma map or an error
  * from a delayed load map.
  */
 typedef void bus_dmamap_callback_t(void *, bus_dma_segment_t *, int, int);
 
 /*
  * Like bus_dmamap_callback but includes map size in bytes.  This is
  * defined as a separate interface to maintain compatibility for users
  * of bus_dmamap_callback_t--at some point these interfaces should be merged.
  */
 typedef void bus_dmamap_callback2_t(void *, bus_dma_segment_t *, int, bus_size_t, int);
 
 /*
  * Map the buffer buf into bus space using the dmamap map.
  */
 int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
 		    bus_size_t buflen, bus_dmamap_callback_t *callback,
 		    void *callback_arg, int flags);
 
 /*
  * Like bus_dmamap_load but for mbufs.  Note the use of the
  * bus_dmamap_callback2_t interface.
  */
 int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map,
 			 struct mbuf *mbuf,
 			 bus_dmamap_callback2_t *callback, void *callback_arg,
 			 int flags);
 
 int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
 			    struct mbuf *mbuf, bus_dma_segment_t *segs,
 			    int *nsegs, int flags);
 
 /*
  * Like bus_dmamap_load but for uios.  Note the use of the
  * bus_dmamap_callback2_t interface.
  */
 int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map,
 			struct uio *ui,
 			bus_dmamap_callback2_t *callback, void *callback_arg,
 			int flags);
 
 /*
  * Like bus_dmamap_load but for cam control blocks.
  */
 int bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
 			bus_dmamap_callback_t *callback, void *callback_arg,
 			int flags);
 
 /*
  * Like bus_dmamap_load but for bios.
  */
 int bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
 			bus_dmamap_callback_t *callback, void *callback_arg,
 			int flags);
 
 /*
  * Loads any memory descriptor.
  */
 int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
 			struct memdesc *mem, bus_dmamap_callback_t *callback,
 			void *callback_arg, int flags);
 
 /*
  * Placeholder for use by busdma implementations which do not benefit
  * from optimized procedure to load an array of vm_page_t.  Falls back
  * to do _bus_dmamap_load_phys() in loop.
  */
 int bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp);
 
 #ifdef WANT_INLINE_DMAMAP
 #define BUS_DMAMAP_OP static inline
 #else
 #define BUS_DMAMAP_OP
 #endif
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 BUS_DMAMAP_OP int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp);
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 BUS_DMAMAP_OP int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map);
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints listed in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 BUS_DMAMAP_OP int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
 		     bus_dmamap_t *mapp);
 
 /*
  * Free a piece of memory and its allocated dmamap, that was allocated
  * via bus_dmamem_alloc.
  */
 BUS_DMAMAP_OP void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map);
 
 /*
  * Perform a synchronization operation on the given map. If the map
  * is NULL we have a fully IO-coherent system.
  */
 BUS_DMAMAP_OP void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t dmamap, bus_dmasync_op_t op);
 
 /*
  * Release the mapping held by map.
  */
 BUS_DMAMAP_OP void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t dmamap);
 
 #undef BUS_DMAMAP_OP
+#endif /* _KERNEL */
 
 #endif /* _BUS_DMA_H_ */
Index: head/sys/x86/include/bus_dma.h
===================================================================
--- head/sys/x86/include/bus_dma.h	(revision 347835)
+++ head/sys/x86/include/bus_dma.h	(revision 347836)
@@ -1,183 +1,195 @@
 /*-
  * Copyright (c) 2017 Jason A. Harmening.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _X86_BUS_DMA_H_
 #define _X86_BUS_DMA_H_
 
 #define WANT_INLINE_DMAMAP
 #include <sys/bus_dma.h>
 #include <sys/_null.h>
 
 #include <x86/busdma_impl.h>
 
 /*
+ * Is DMA address 1:1 mapping of physical address
+ */
+static inline bool
+bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+	struct bus_dma_tag_common *tc;
+
+	tc = (struct bus_dma_tag_common *)dmat;
+	return (tc->impl->id_mapped(dmat, buf, buflen));
+}
+
+/*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static inline int
 bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_create(dmat, flags, mapp));
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static inline int
 bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_destroy(dmat, map));
 }
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static inline int
 bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp));
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static inline void
 bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	tc->impl->mem_free(dmat, vaddr, map);
 }
 
 /*
  * Release the mapping held by map.
  */
 static inline void
 bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bus_dma_tag_common *tc;
 
 	if (map != NULL) {
 		tc = (struct bus_dma_tag_common *)dmat;
 		tc->impl->map_unload(dmat, map);
 	}
 }
 
 static inline void
 bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
 {
 	struct bus_dma_tag_common *tc;
 
 	if (map != NULL) {
 		tc = (struct bus_dma_tag_common *)dmat;
 		tc->impl->map_sync(dmat, map, op);
 	}
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static inline int
 _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs,
 	    segp));
 }
 
 static inline int
 _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma,
     bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags,
 	    segs, segp));
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static inline int
 _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs,
 	    segp));
 }
 
 static inline void
 _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 	struct bus_dma_tag_common *tc;
 
 	if (map != NULL) {
 		tc = (struct bus_dma_tag_common *)dmat;
 		tc->impl->map_waitok(dmat, map, mem, callback, callback_arg);
 	}
 }
 
 static inline bus_dma_segment_t *
 _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct bus_dma_tag_common *tc;
 
 	tc = (struct bus_dma_tag_common *)dmat;
 	return (tc->impl->map_complete(dmat, map, segs, nsegs, error));
 }
 
 #endif /* !_X86_BUS_DMA_H_ */
 
Index: head/sys/x86/include/busdma_impl.h
===================================================================
--- head/sys/x86/include/busdma_impl.h	(revision 347835)
+++ head/sys/x86/include/busdma_impl.h	(revision 347836)
@@ -1,100 +1,101 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	__X86_BUSDMA_IMPL_H
 #define	__X86_BUSDMA_IMPL_H
 
 struct bus_dma_tag_common {
 	struct bus_dma_impl *impl;
 	struct bus_dma_tag_common *parent;
 	bus_size_t	  alignment;
 	bus_addr_t	  boundary;
 	bus_addr_t	  lowaddr;
 	bus_addr_t	  highaddr;
 	bus_dma_filter_t *filter;
 	void		 *filterarg;
 	bus_size_t	  maxsize;
 	u_int		  nsegments;
 	bus_size_t	  maxsegsz;
 	int		  flags;
 	bus_dma_lock_t	 *lockfunc;
 	void		 *lockfuncarg;
 	int		  ref_count;
 	int		  domain;
 };
 
 struct bus_dma_impl {
 	int (*tag_create)(bus_dma_tag_t parent,
 	    bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr,
 	    bus_addr_t highaddr, bus_dma_filter_t *filter,
 	    void *filterarg, bus_size_t maxsize, int nsegments,
 	    bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
 	    void *lockfuncarg, bus_dma_tag_t *dmat);
 	int (*tag_destroy)(bus_dma_tag_t dmat);
 	int (*tag_set_domain)(bus_dma_tag_t);
+	bool (*id_mapped)(bus_dma_tag_t, vm_paddr_t, bus_size_t);
 	int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp);
 	int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map);
 	int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags,
 	    bus_dmamap_t *mapp);
 	void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map);
 	int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    vm_paddr_t buf, bus_size_t buflen, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    void *buf, bus_size_t buflen, struct pmap *pmap, int flags,
 	    bus_dma_segment_t *segs, int *segp);
 	void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    struct memdesc *mem, bus_dmamap_callback_t *callback,
 	    void *callback_arg);
 	bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    bus_dma_segment_t *segs, int nsegs, int error);
 	void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map);
 	void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map,
 	    bus_dmasync_op_t op);
 };
 
 void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op);
 int bus_dma_run_filter(struct bus_dma_tag_common *dmat, vm_paddr_t paddr);
 int common_bus_dma_tag_create(struct bus_dma_tag_common *parent,
     bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, size_t sz, void **dmat);
 
 extern struct bus_dma_impl bus_dma_bounce_impl;
 
 #endif
Index: head/sys/x86/iommu/busdma_dmar.c
===================================================================
--- head/sys/x86/iommu/busdma_dmar.c	(revision 347835)
+++ head/sys/x86/iommu/busdma_dmar.c	(revision 347836)
@@ -1,939 +1,947 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domainset.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/tree.h>
 #include <sys/uio.h>
 #include <sys/vmem.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 #include <x86/iommu/intel_reg.h>
 #include <x86/iommu/busdma_dmar.h>
 #include <x86/iommu/intel_dmar.h>
 
 /*
  * busdma_dmar.c, the implementation of the busdma(9) interface using
  * DMAR units from Intel VT-d.
  */
 
 static bool
 dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
 {
 	char str[128], *env;
 	int default_bounce;
 	bool ret;
 	static const char bounce_str[] = "bounce";
 	static const char dmar_str[] = "dmar";
 
 	default_bounce = 0;
 	env = kern_getenv("hw.busdma.default");
 	if (env != NULL) {
 		if (strcmp(env, bounce_str) == 0)
 			default_bounce = 1;
 		else if (strcmp(env, dmar_str) == 0)
 			default_bounce = 0;
 		freeenv(env);
 	}
 
 	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
 	    domain, bus, slot, func);
 	env = kern_getenv(str);
 	if (env == NULL)
 		return (default_bounce != 0);
 	if (strcmp(env, bounce_str) == 0)
 		ret = true;
 	else if (strcmp(env, dmar_str) == 0)
 		ret = false;
 	else
 		ret = default_bounce != 0;
 	freeenv(env);
 	return (ret);
 }
 
 /*
  * Given original device, find the requester ID that will be seen by
  * the DMAR unit and used for page table lookup.  PCI bridges may take
  * ownership of transactions from downstream devices, so it may not be
  * the same as the BSF of the target device.  In those cases, all
  * devices downstream of the bridge must share a single mapping
  * domain, and must collectively be assigned to use either DMAR or
  * bounce mapping.
  */
 device_t
 dmar_get_requester(device_t dev, uint16_t *rid)
 {
 	devclass_t pci_class;
 	device_t l, pci, pcib, pcip, pcibp, requester;
 	int cap_offset;
 	uint16_t pcie_flags;
 	bool bridge_is_pcie;
 
 	pci_class = devclass_find("pci");
 	l = requester = dev;
 
 	*rid = pci_get_rid(dev);
 
 	/*
 	 * Walk the bridge hierarchy from the target device to the
 	 * host port to find the translating bridge nearest the DMAR
 	 * unit.
 	 */
 	for (;;) {
 		pci = device_get_parent(l);
 		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
 		    "for %s", device_get_name(dev), device_get_name(l)));
 		KASSERT(device_get_devclass(pci) == pci_class,
 		    ("dmar_get_requester(%s): non-pci parent %s for %s",
 		    device_get_name(dev), device_get_name(pci),
 		    device_get_name(l)));
 
 		pcib = device_get_parent(pci);
 		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
 		    "for %s", device_get_name(dev), device_get_name(pci)));
 
 		/*
 		 * The parent of our "bridge" isn't another PCI bus,
 		 * so pcib isn't a PCI->PCI bridge but rather a host
 		 * port, and the requester ID won't be translated
 		 * further.
 		 */
 		pcip = device_get_parent(pcib);
 		if (device_get_devclass(pcip) != pci_class)
 			break;
 		pcibp = device_get_parent(pcip);
 
 		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
 			/*
 			 * Do not stop the loop even if the target
 			 * device is PCIe, because it is possible (but
 			 * unlikely) to have a PCI->PCIe bridge
 			 * somewhere in the hierarchy.
 			 */
 			l = pcib;
 		} else {
 			/*
 			 * Device is not PCIe, it cannot be seen as a
 			 * requester by DMAR unit.  Check whether the
 			 * bridge is PCIe.
 			 */
 			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
 			    &cap_offset) == 0;
 			requester = pcib;
 
 			/*
 			 * Check for a buggy PCIe/PCI bridge that
 			 * doesn't report the express capability.  If
 			 * the bridge above it is express but isn't a
 			 * PCI bridge, then we know pcib is actually a
 			 * PCIe/PCI bridge.
 			 */
 			if (!bridge_is_pcie && pci_find_cap(pcibp,
 			    PCIY_EXPRESS, &cap_offset) == 0) {
 				pcie_flags = pci_read_config(pcibp,
 				    cap_offset + PCIER_FLAGS, 2);
 				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
 				    PCIEM_TYPE_PCI_BRIDGE)
 					bridge_is_pcie = true;
 			}
 
 			if (bridge_is_pcie) {
 				/*
 				 * The current device is not PCIe, but
 				 * the bridge above it is.  This is a
 				 * PCIe->PCI bridge.  Assume that the
 				 * requester ID will be the secondary
 				 * bus number with slot and function
 				 * set to zero.
 				 *
 				 * XXX: Doesn't handle the case where
 				 * the bridge is PCIe->PCI-X, and the
 				 * bridge will only take ownership of
 				 * requests in some cases.  We should
 				 * provide context entries with the
 				 * same page tables for taken and
 				 * non-taken transactions.
 				 */
 				*rid = PCI_RID(pci_get_bus(l), 0, 0);
 				l = pcibp;
 			} else {
 				/*
 				 * Neither the device nor the bridge
 				 * above it are PCIe.  This is a
 				 * conventional PCI->PCI bridge, which
 				 * will use the bridge's BSF as the
 				 * requester ID.
 				 */
 				*rid = pci_get_rid(pcib);
 				l = pcib;
 			}
 		}
 	}
 	return (requester);
 }
 
 struct dmar_ctx *
 dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
 {
 	device_t requester;
 	struct dmar_ctx *ctx;
 	bool disabled;
 	uint16_t rid;
 
 	requester = dmar_get_requester(dev, &rid);
 
 	/*
 	 * If the user requested the IOMMU disabled for the device, we
 	 * cannot disable the DMAR, due to possibility of other
 	 * devices on the same DMAR still requiring translation.
 	 * Instead provide the identity mapping for the device
 	 * context.
 	 */
 	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester), 
 	    pci_get_bus(requester), pci_get_slot(requester), 
 	    pci_get_function(requester));
 	ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr);
 	if (ctx == NULL)
 		return (NULL);
 	if (disabled) {
 		/*
 		 * Keep the first reference on context, release the
 		 * later refs.
 		 */
 		DMAR_LOCK(dmar);
 		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
 			ctx->flags |= DMAR_CTX_DISABLED;
 			DMAR_UNLOCK(dmar);
 		} else {
 			dmar_free_ctx_locked(dmar, ctx);
 		}
 		ctx = NULL;
 	}
 	return (ctx);
 }
 
 bus_dma_tag_t
 dmar_get_dma_tag(device_t dev, device_t child)
 {
 	struct dmar_unit *dmar;
 	struct dmar_ctx *ctx;
 	bus_dma_tag_t res;
 
 	dmar = dmar_find(child, bootverbose);
 	/* Not in scope of any DMAR ? */
 	if (dmar == NULL)
 		return (NULL);
 	if (!dmar->dma_enabled)
 		return (NULL);
 	dmar_quirks_pre_use(dmar);
 	dmar_instantiate_rmrr_ctxs(dmar);
 
 	ctx = dmar_instantiate_ctx(dmar, child, false);
 	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
 	return (res);
 }
 
 static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
 
 static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
     struct bus_dmamap_dmar *map);
 
 static int
 dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	struct bus_dma_tag_dmar *newtag, *oldtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ?
 	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
 	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
 	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
 	if (error != 0)
 		goto out;
 
 	oldtag = (struct bus_dma_tag_dmar *)parent;
 	newtag->common.impl = &bus_dma_dmar_impl;
 	newtag->ctx = oldtag->ctx;
 	newtag->owner = oldtag->owner;
 
 	*dmat = (bus_dma_tag_t)newtag;
 out:
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 static int
 dmar_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
 {
 
 	return (0);
 }
 
 static int
 dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
 {
 	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
 			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
 			    1) {
 				if (dmat == &dmat->ctx->ctx_tag)
 					dmar_free_ctx(dmat->ctx);
 				free_domain(dmat->segments, M_DMAR_DMAMAP);
 				free(dmat, M_DEVBUF);
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
+static bool
+dmar_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+
+	return (false);
+}
+
 static int
 dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = malloc_domainset(sizeof(*map), M_DMAR_DMAMAP,
 	    DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
 	if (map == NULL) {
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	if (tag->segments == NULL) {
 		tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
 		    tag->common.nsegments, M_DMAR_DMAMAP,
 		    DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
 		if (tag->segments == NULL) {
 			free_domain(map, M_DMAR_DMAMAP);
 			*mapp = NULL;
 			return (ENOMEM);
 		}
 	}
 	TAILQ_INIT(&map->map_entries);
 	map->tag = tag;
 	map->locked = true;
 	map->cansleep = false;
 	tag->map_count++;
 	*mapp = (bus_dmamap_t)map;
 
 	return (0);
 }
 
 static int
 dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_domain *domain;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	if (map != NULL) {
 		domain = tag->ctx->domain;
 		DMAR_DOMAIN_LOCK(domain);
 		if (!TAILQ_EMPTY(&map->map_entries)) {
 			DMAR_DOMAIN_UNLOCK(domain);
 			return (EBUSY);
 		}
 		DMAR_DOMAIN_UNLOCK(domain);
 		free_domain(map, M_DMAR_DMAMAP);
 	}
 	tag->map_count--;
 	return (0);
 }
 
 
 static int
 dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	int error, mflags;
 	vm_memattr_t attr;
 
 	error = dmar_bus_dmamap_create(dmat, flags, mapp);
 	if (error != 0)
 		return (error);
 
 	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
 	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
 	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
 	    VM_MEMATTR_DEFAULT;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)*mapp;
 
 	if (tag->common.maxsize < PAGE_SIZE &&
 	    tag->common.alignment <= tag->common.maxsize &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
 		    DOMAINSET_PREF(tag->common.domain), mflags);
 		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_attr_domainset(
 		    DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
 		    mflags, 0ul, BUS_SPACE_MAXADDR, attr);
 		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		dmar_bus_dmamap_destroy(dmat, *mapp);
 		*mapp = NULL;
 		return (ENOMEM);
 	}
 	return (0);
 }
 
 static void
 dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 
 	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
 		free_domain(vaddr, M_DEVBUF);
 		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
 	} else {
 		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
 		    ("dmar_bus_dmamem_free for non alloced map %p", map));
 		kmem_free((vm_offset_t)vaddr, tag->common.maxsize);
 		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
 	}
 
 	dmar_bus_dmamap_destroy(dmat, map1);
 }
 
 static int
 dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
     struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
     int flags, bus_dma_segment_t *segs, int *segp,
     struct dmar_map_entries_tailq *unroll_list)
 {
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 	struct dmar_map_entry *entry;
 	dmar_gaddr_t size;
 	bus_size_t buflen1;
 	int error, idx, gas_flags, seg;
 
 	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
 	if (segs == NULL)
 		segs = tag->segments;
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	seg = *segp;
 	error = 0;
 	idx = 0;
 	while (buflen > 0) {
 		seg++;
 		if (seg >= tag->common.nsegments) {
 			error = EFBIG;
 			break;
 		}
 		buflen1 = buflen > tag->common.maxsegsz ?
 		    tag->common.maxsegsz : buflen;
 		size = round_page(offset + buflen1);
 
 		/*
 		 * (Too) optimistically allow split if there are more
 		 * then one segments left.
 		 */
 		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
 		if (seg + 1 < tag->common.nsegments)
 			gas_flags |= DMAR_GM_CANSPLIT;
 
 		error = dmar_gas_map(domain, &tag->common, size, offset,
 		    DMAR_MAP_ENTRY_READ |
 		    ((flags & BUS_DMA_NOWRITE) == 0 ? DMAR_MAP_ENTRY_WRITE : 0),
 		    gas_flags, ma + idx, &entry);
 		if (error != 0)
 			break;
 		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
 			KASSERT(size >= entry->end - entry->start,
 			    ("split increased entry size %jx %jx %jx",
 			    (uintmax_t)size, (uintmax_t)entry->start,
 			    (uintmax_t)entry->end));
 			size = entry->end - entry->start;
 			if (buflen1 > size)
 				buflen1 = size;
 		} else {
 			KASSERT(entry->end - entry->start == size,
 			    ("no split allowed %jx %jx %jx",
 			    (uintmax_t)size, (uintmax_t)entry->start,
 			    (uintmax_t)entry->end));
 		}
 		if (offset + buflen1 > size)
 			buflen1 = size - offset;
 		if (buflen1 > tag->common.maxsegsz)
 			buflen1 = tag->common.maxsegsz;
 
 		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
 		    == 0,
 		    ("alignment failed: ctx %p start 0x%jx offset %x "
 		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
 		    (uintmax_t)tag->common.alignment));
 		KASSERT(entry->end <= tag->common.lowaddr ||
 		    entry->start >= tag->common.highaddr,
 		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
 		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
 		    (uintmax_t)tag->common.lowaddr,
 		    (uintmax_t)tag->common.highaddr));
 		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
 		    tag->common.boundary),
 		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
 		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
 		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
 		KASSERT(buflen1 <= tag->common.maxsegsz,
 		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
 		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
 		    (uintmax_t)entry->start, (uintmax_t)entry->end,
 		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
 
 		DMAR_DOMAIN_LOCK(domain);
 		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
 		entry->flags |= DMAR_MAP_ENTRY_MAP;
 		DMAR_DOMAIN_UNLOCK(domain);
 		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
 
 		segs[seg].ds_addr = entry->start + offset;
 		segs[seg].ds_len = buflen1;
 
 		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
 		offset += buflen1;
 		offset &= DMAR_PAGE_MASK;
 		buflen -= buflen1;
 	}
 	if (error == 0)
 		*segp = seg;
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
     struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
     int flags, bus_dma_segment_t *segs, int *segp)
 {
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 	struct dmar_map_entry *entry, *entry1;
 	struct dmar_map_entries_tailq unroll_list;
 	int error;
 
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	atomic_add_long(&ctx->loads, 1);
 
 	TAILQ_INIT(&unroll_list);
 	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
 	    buflen, flags, segs, segp, &unroll_list);
 	if (error != 0) {
 		/*
 		 * The busdma interface does not allow us to report
 		 * partial buffer load, so unfortunately we have to
 		 * revert all work done.
 		 */
 		DMAR_DOMAIN_LOCK(domain);
 		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
 		    entry1) {
 			/*
 			 * No entries other than what we have created
 			 * during the failed run might have been
 			 * inserted there in between, since we own ctx
 			 * pglock.
 			 */
 			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
 			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
 			TAILQ_INSERT_TAIL(&domain->unload_entries, entry,
 			    dmamap_link);
 		}
 		DMAR_DOMAIN_UNLOCK(domain);
 		taskqueue_enqueue(domain->dmar->delayed_taskqueue,
 		    &domain->unload_task);
 	}
 
 	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
 	    !map->cansleep)
 		error = EINPROGRESS;
 	if (error == EINPROGRESS)
 		dmar_bus_schedule_dmamap(domain->dmar, map);
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
     struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
 	    flags, segs, segp));
 }
 
 static int
 dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	vm_page_t *ma, fma;
 	vm_paddr_t pstart, pend, paddr;
 	int error, i, ma_cnt, mflags, offset;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	pstart = trunc_page(buf);
 	pend = round_page(buf + buflen);
 	offset = buf & PAGE_MASK;
 	ma_cnt = OFF_TO_IDX(pend - pstart);
 	mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
 	if (ma == NULL)
 		return (ENOMEM);
 	fma = NULL;
 	for (i = 0; i < ma_cnt; i++) {
 		paddr = pstart + i * PAGE_SIZE;
 		ma[i] = PHYS_TO_VM_PAGE(paddr);
 		if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
 			/*
 			 * If PHYS_TO_VM_PAGE() returned NULL or the
 			 * vm_page was not initialized we'll use a
 			 * fake page.
 			 */
 			if (fma == NULL) {
 				fma = malloc(sizeof(struct vm_page) * ma_cnt,
 				    M_DEVBUF, M_ZERO | mflags);
 				if (fma == NULL) {
 					free(ma, M_DEVBUF);
 					return (ENOMEM);
 				}
 			}
 			vm_page_initfake(&fma[i], pstart + i * PAGE_SIZE,
 			    VM_MEMATTR_DEFAULT);
 			ma[i] = &fma[i];
 		}
 	}
 	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
 	    flags, segs, segp);
 	free(fma, M_DEVBUF);
 	free(ma, M_DEVBUF);
 	return (error);
 }
 
 static int
 dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	vm_page_t *ma, fma;
 	vm_paddr_t pstart, pend, paddr;
 	int error, i, ma_cnt, mflags, offset;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	pstart = trunc_page((vm_offset_t)buf);
 	pend = round_page((vm_offset_t)buf + buflen);
 	offset = (vm_offset_t)buf & PAGE_MASK;
 	ma_cnt = OFF_TO_IDX(pend - pstart);
 	mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
 	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
 	if (ma == NULL)
 		return (ENOMEM);
 	fma = NULL;
 	for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
 		if (pmap == kernel_pmap)
 			paddr = pmap_kextract(pstart);
 		else
 			paddr = pmap_extract(pmap, pstart);
 		ma[i] = PHYS_TO_VM_PAGE(paddr);
 		if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
 			/*
 			 * If PHYS_TO_VM_PAGE() returned NULL or the
 			 * vm_page was not initialized we'll use a
 			 * fake page.
 			 */
 			if (fma == NULL) {
 				fma = malloc(sizeof(struct vm_page) * ma_cnt,
 				    M_DEVBUF, M_ZERO | mflags);
 				if (fma == NULL) {
 					free(ma, M_DEVBUF);
 					return (ENOMEM);
 				}
 			}
 			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
 			ma[i] = &fma[i];
 		}
 	}
 	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
 	    flags, segs, segp);
 	free(ma, M_DEVBUF);
 	free(fma, M_DEVBUF);
 	return (error);
 }
 
 static void
 dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 	struct bus_dmamap_dmar *map;
 
 	if (map1 == NULL)
 		return;
 	map = (struct bus_dmamap_dmar *)map1;
 	map->mem = *mem;
 	map->tag = (struct bus_dma_tag_dmar *)dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 
 	if (!map->locked) {
 		KASSERT(map->cansleep,
 		    ("map not locked and not sleepable context %p", map));
 
 		/*
 		 * We are called from the delayed context.  Relock the
 		 * driver.
 		 */
 		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
 		map->locked = true;
 	}
 
 	if (segs == NULL)
 		segs = tag->segments;
 	return (segs);
 }
 
 /*
  * The limitations of busdma KPI forces the dmar to perform the actual
  * unload, consisting of the unmapping of the map entries page tables,
  * from the delayed context on i386, since page table page mapping
  * might require a sleep to be successfull.  The unfortunate
  * consequence is that the DMA requests can be served some time after
  * the bus_dmamap_unload() call returned.
  *
  * On amd64, we assume that sf allocation cannot fail.
  */
 static void
 dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_ctx *ctx;
 	struct dmar_domain *domain;
 #if defined(__amd64__)
 	struct dmar_map_entries_tailq entries;
 #endif
 
 	tag = (struct bus_dma_tag_dmar *)dmat;
 	map = (struct bus_dmamap_dmar *)map1;
 	ctx = tag->ctx;
 	domain = ctx->domain;
 	atomic_add_long(&ctx->unloads, 1);
 
 #if defined(__i386__)
 	DMAR_DOMAIN_LOCK(domain);
 	TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link);
 	DMAR_DOMAIN_UNLOCK(domain);
 	taskqueue_enqueue(domain->dmar->delayed_taskqueue,
 	    &domain->unload_task);
 #else /* defined(__amd64__) */
 	TAILQ_INIT(&entries);
 	DMAR_DOMAIN_LOCK(domain);
 	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
 	DMAR_DOMAIN_UNLOCK(domain);
 	THREAD_NO_SLEEPING();
 	dmar_domain_unload(domain, &entries, false);
 	THREAD_SLEEPING_OK();
 	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
 #endif
 }
 
 static void
 dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 }
 
 struct bus_dma_impl bus_dma_dmar_impl = {
 	.tag_create = dmar_bus_dma_tag_create,
 	.tag_destroy = dmar_bus_dma_tag_destroy,
 	.tag_set_domain = dmar_bus_dma_tag_set_domain,
+	.id_mapped = dmar_bus_dma_id_mapped,
 	.map_create = dmar_bus_dmamap_create,
 	.map_destroy = dmar_bus_dmamap_destroy,
 	.mem_alloc = dmar_bus_dmamem_alloc,
 	.mem_free = dmar_bus_dmamem_free,
 	.load_phys = dmar_bus_dmamap_load_phys,
 	.load_buffer = dmar_bus_dmamap_load_buffer,
 	.load_ma = dmar_bus_dmamap_load_ma,
 	.map_waitok = dmar_bus_dmamap_waitok,
 	.map_complete = dmar_bus_dmamap_complete,
 	.map_unload = dmar_bus_dmamap_unload,
 	.map_sync = dmar_bus_dmamap_sync,
 };
 
 static void
 dmar_bus_task_dmamap(void *arg, int pending)
 {
 	struct bus_dma_tag_dmar *tag;
 	struct bus_dmamap_dmar *map;
 	struct dmar_unit *unit;
 
 	unit = arg;
 	DMAR_LOCK(unit);
 	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
 		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
 		DMAR_UNLOCK(unit);
 		tag = map->tag;
 		map->cansleep = true;
 		map->locked = false;
 		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
 		    &map->mem, map->callback, map->callback_arg,
 		    BUS_DMA_WAITOK);
 		map->cansleep = false;
 		if (map->locked) {
 			(tag->common.lockfunc)(tag->common.lockfuncarg,
 			    BUS_DMA_UNLOCK);
 		} else
 			map->locked = true;
 		map->cansleep = false;
 		DMAR_LOCK(unit);
 	}
 	DMAR_UNLOCK(unit);
 }
 
 static void
 dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
 {
 
 	map->locked = false;
 	DMAR_LOCK(unit);
 	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
 	DMAR_UNLOCK(unit);
 	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
 }
 
 int
 dmar_init_busdma(struct dmar_unit *unit)
 {
 
 	unit->dma_enabled = 1;
 	TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
 	TAILQ_INIT(&unit->delayed_maps);
 	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
 	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
 	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
 	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
 	    "dmar%d busdma taskq", unit->unit);
 	return (0);
 }
 
 void
 dmar_fini_busdma(struct dmar_unit *unit)
 {
 
 	if (unit->delayed_taskqueue == NULL)
 		return;
 
 	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
 	taskqueue_free(unit->delayed_taskqueue);
 	unit->delayed_taskqueue = NULL;
 }
Index: head/sys/x86/x86/busdma_bounce.c
===================================================================
--- head/sys/x86/x86/busdma_bounce.c	(revision 347835)
+++ head/sys/x86/x86/busdma_bounce.c	(revision 347836)
@@ -1,1319 +1,1347 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domainset.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 
 #ifdef __i386__
 #define MAX_BPAGES (Maxmem > atop(0x100000000ULL) ? 8192 : 512)
 #else
 #define MAX_BPAGES 8192
 #endif
 
 enum {
 	BUS_DMA_COULD_BOUNCE	= 0x01,
 	BUS_DMA_MIN_ALLOC_COMP	= 0x02,
 	BUS_DMA_KMEM_ALLOC	= 0x04,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	vm_offset_t	dataoffs;	/* page offset of client data */
 	vm_page_t	datapage[2];	/* physical page(s) of client data */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	int		domain;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 static struct bus_dmamap nobounce_dmamap;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_offset_t vaddr, vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
+static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf,
+    bus_size_t buflen, int *pagesneeded);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     pmap_t pmap, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
     int flags);
 
 static int
 bounce_bus_dma_zone_setup(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 	int error;
 
 	/* Must bounce */
 	if ((error = alloc_bounce_zone(dmat)) != 0)
 		return (error);
 	bz = dmat->bounce_zone;
 
 	if (ptoa(bz->total_bpages) < dmat->common.maxsize) {
 		int pages;
 
 		pages = atop(dmat->common.maxsize) - bz->total_bpages;
 
 		/* Add pages to our bounce pool */
 		if (alloc_bounce_pages(dmat, pages) < pages)
 			return (ENOMEM);
 	}
 	/* Performed initial allocation */
 	dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP;
 
 	return (0);
 }
 
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if (parent != NULL && (newtag->common.filter != NULL ||
 	    (parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0))
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if ((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0)
 		error = bounce_bus_dma_zone_setup(newtag);
 	else
 		error = 0;
 	
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
+static bool
+bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
+{
+
+	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0)
+		return (true);
+	return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL));
+}
+
 /*
  * Update the domain for the tag.  We may need to reallocate the zone and
  * bounce pages.
  */ 
 static int
 bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
 {
 
 	KASSERT(dmat->map_count == 0,
 	    ("bounce_bus_dma_tag_set_domain:  Domain set after use.\n"));
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0 ||
 	    dmat->bounce_zone == NULL)
 		return (0);
 	dmat->bounce_flags &= ~BUS_DMA_MIN_ALLOC_COMP;
 	return (bounce_bus_dma_zone_setup(dmat));
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free_domain(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domainset(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0)
 				return (error);
 		}
 		bz = dmat->bounce_zone;
 
 		*mapp = (bus_dmamap_t)malloc_domainset(sizeof(**mapp), M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO);
 		if (*mapp == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BUS_DMA_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	} else {
 		*mapp = NULL;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (map != NULL && map != &nobounce_dmamap) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
 			return (EBUSY);
 		}
 		if (dmat->bounce_zone)
 			dmat->bounce_zone->map_count--;
 		free_domain(map, M_DEVBUF);
 	}
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	vm_memattr_t attr;
 	int mflags;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__);
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	/* If we succeed, no mapping/bouncing will be required */
 	*mapp = NULL;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc_domainset(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/*
 	 * Allocate the buffer from the malloc(9) allocator if...
 	 *  - It's small enough to fit into a single power of two sized bucket.
 	 *  - The alignment is less than or equal to the maximum size
 	 *  - The low address requirement is fulfilled.
 	 * else allocate non-contiguous pages if...
 	 *  - The page count that could get allocated doesn't exceed
 	 *    nsegments also when the maximum segment size is less
 	 *    than PAGE_SIZE.
 	 *  - The alignment constraint isn't larger than a page boundary.
 	 *  - There are no boundary-crossing constraints.
 	 * else allocate a block of contiguous pages because one or more of the
 	 * constraints is something that only the contig allocator can fulfill.
 	 *
 	 * NOTE: The (dmat->common.alignment <= dmat->maxsize) check
 	 * below is just a quick hack. The exact alignment guarantees
 	 * of malloc(9) need to be nailed down, and the code below
 	 * should be rewritten to take that into account.
 	 *
 	 * In the meantime warn the user if malloc gets it wrong.
 	 */
 	if (dmat->common.maxsize <= PAGE_SIZE &&
 	    dmat->common.alignment <= dmat->common.maxsize &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc_domainset(dmat->common.maxsize, M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), mflags);
 	} else if (dmat->common.nsegments >=
 	    howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz,
 	    PAGE_SIZE)) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary % PAGE_SIZE) == 0) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize,
 		    mflags, 0ul, dmat->common.lowaddr, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig_domainset(
 		    DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize,
 		    mflags, 0ul, dmat->common.lowaddr,
 		    dmat->common.alignment != 0 ? dmat->common.alignment : 1ul,
 		    dmat->common.boundary, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	/*
 	 * dmamem does not need to be bounced, so the map should be
 	 * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if (map != NULL)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0)
 		free_domain(vaddr, M_DEVBUF);
 	else
 		kmem_free((vm_offset_t)vaddr, dmat->common.maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
+static bool
+_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen,
+    int *pagesneeded)
+{
+	vm_paddr_t curaddr;
+	bus_size_t sgsize;
+	int count;
+
+	/*
+	 * Count the number of bounce pages needed in order to
+	 * complete this transfer
+	 */
+	count = 0;
+	curaddr = buf;
+	while (buflen != 0) {
+		sgsize = MIN(buflen, dmat->common.maxsegsz);
+		if (bus_dma_run_filter(&dmat->common, curaddr)) {
+			sgsize = MIN(sgsize,
+			    PAGE_SIZE - (curaddr & PAGE_MASK));
+			if (pagesneeded == NULL)
+				return (true);
+			count++;
+		}
+		curaddr += sgsize;
+		buflen -= sgsize;
+	}
+
+	if (pagesneeded != NULL)
+		*pagesneeded = count;
+	return (count != 0);
+}
+
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
-	vm_paddr_t curaddr;
-	bus_size_t sgsize;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
-		/*
-		 * Count the number of bounce pages
-		 * needed in order to complete this transfer
-		 */
-		curaddr = buf;
-		while (buflen != 0) {
-			sgsize = MIN(buflen, dmat->common.maxsegsz);
-			if (bus_dma_run_filter(&dmat->common, curaddr)) {
-				sgsize = MIN(sgsize,
-				    PAGE_SIZE - (curaddr & PAGE_MASK));
-				map->pagesneeded++;
-			}
-			curaddr += sgsize;
-			buflen -= sgsize;
-		}
+		_bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded);
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	vm_paddr_t paddr;
 	bus_size_t sg_len;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma,
     int ma_offs, bus_size_t buflen, int flags)
 {
 	bus_size_t sg_len, max_sgsize;
 	int page_index;
 	vm_paddr_t paddr;
 
 	if (map != &nobounce_dmamap && map->pagesneeded == 0) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		page_index = 0;
 		while (buflen > 0) {
 			paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 			sg_len = PAGE_SIZE - ma_offs;
 			max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 			sg_len = MIN(sg_len, max_sgsize);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				sg_len = MIN(sg_len, max_sgsize);
 				KASSERT((sg_len & (dmat->common.alignment - 1))
 				    == 0, ("Segment size is not aligned"));
 				map->pagesneeded++;
 			}
 			if (((ma_offs + sg_len) & ~PAGE_MASK) != 0)
 				page_index++;
 			ma_offs = (ma_offs + sg_len) & PAGE_MASK;
 			KASSERT(buflen >= sg_len,
 			    ("Segment length overruns original buffer"));
 			buflen -= sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	KASSERT(curaddr <= BUS_SPACE_MAXADDR,
 	    ("ds_addr %#jx > BUS_SPACE_MAXADDR %#jx; dmat %p fl %#x low %#jx "
 	    "hi %#jx",
 	    (uintmax_t)curaddr, (uintmax_t)BUS_SPACE_MAXADDR,
 	    dmat, dmat->bounce_flags, (uintmax_t)dmat->common.lowaddr,
 	    (uintmax_t)dmat->common.highaddr));
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	vm_paddr_t curaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK));
 			curaddr = add_bounce_page(dmat, map, 0, curaddr, 0,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize, max_sgsize;
 	vm_paddr_t curaddr;
 	vm_offset_t kvaddr, vaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	vaddr = (vm_offset_t)buf;
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap) {
 			curaddr = pmap_kextract(vaddr);
 			kvaddr = vaddr;
 		} else {
 			curaddr = pmap_extract(pmap, vaddr);
 			kvaddr = 0;
 		}
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - (curaddr & PAGE_MASK);
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0,
 			    sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static int
 bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags,
     bus_dma_segment_t *segs, int *segp)
 {
 	vm_paddr_t paddr, next_paddr;
 	int error, page_index;
 	bus_size_t sgsize, max_sgsize;
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * If we have to keep the offset of each page this function
 		 * is not suitable, switch back to bus_dmamap_load_ma_triv
 		 * which is going to do the right thing in this case.
 		 */
 		error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs,
 		    flags, segs, segp);
 		return (error);
 	}
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	page_index = 0;
 	while (buflen > 0) {
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs;
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - ma_offs;
 		if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, paddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			KASSERT((sgsize & (dmat->common.alignment - 1)) == 0,
 			    ("Segment size is not aligned"));
 			/*
 			 * Check if two pages of the user provided buffer
 			 * are used.
 			 */
 			if ((ma_offs + sgsize) > PAGE_SIZE)
 				next_paddr =
 				    VM_PAGE_TO_PHYS(ma[page_index + 1]);
 			else
 				next_paddr = 0;
 			paddr = add_bounce_page(dmat, map, 0, paddr,
 			    next_paddr, sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		KASSERT(buflen >= sgsize,
 		    ("Segment length overruns original buffer"));
 		buflen -= sgsize;
 		if (((ma_offs + sgsize) & ~PAGE_MASK) != 0)
 			page_index++;
 		ma_offs = (ma_offs + sgsize) & PAGE_MASK;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if (map == NULL)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	if (map == NULL)
 		return;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 	vm_offset_t datavaddr, tempvaddr;
 	bus_size_t datacount1, datacount2;
 
 	if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL)
 		return;
 
 	/*
 	 * Handle data bouncing.  We might also want to add support for
 	 * invalidating the caches on broken hardware.
 	 */
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 	    "performing bounce", __func__, dmat, dmat->common.flags, op);
 
 	if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)datavaddr,
 			    (void *)bpage->vaddr, datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_w;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)datavaddr,
 			    (void *)(bpage->vaddr + datacount1), datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_w:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 
 	if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 		while (bpage != NULL) {
 			tempvaddr = 0;
 			datavaddr = bpage->datavaddr;
 			datacount1 = bpage->datacount;
 			if (datavaddr == 0) {
 				tempvaddr =
 				    pmap_quick_enter_page(bpage->datapage[0]);
 				datavaddr = tempvaddr | bpage->dataoffs;
 				datacount1 = min(PAGE_SIZE - bpage->dataoffs,
 				    datacount1);
 			}
 
 			bcopy((void *)bpage->vaddr, (void *)datavaddr,
 			    datacount1);
 
 			if (tempvaddr != 0)
 				pmap_quick_remove_page(tempvaddr);
 
 			if (bpage->datapage[1] == 0) {
 				KASSERT(datacount1 == bpage->datacount,
 		("Mismatch between data size and provided memory space"));
 				goto next_r;
 			}
 
 			/*
 			 * We are dealing with an unmapped buffer that expands
 			 * over two pages.
 			 */
 			datavaddr = pmap_quick_enter_page(bpage->datapage[1]);
 			datacount2 = bpage->datacount - datacount1;
 			bcopy((void *)(bpage->vaddr + datacount1),
 			    (void *)datavaddr, datacount2);
 			pmap_quick_remove_page(datavaddr);
 
 next_r:
 			bpage = STAILQ_NEXT(bpage, links);
 		}
 		dmat->bounce_zone->total_bounced++;
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if (dmat->common.alignment <= bz->alignment &&
 		    dmat->common.lowaddr >= bz->lowaddr &&
 		    dmat->common.domain == bz->domain) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	bz->domain = dmat->common.domain;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "domain", CTLFLAG_RD, &bz->domain, 0,
 	    "memory domain");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = malloc_domainset(sizeof(*bpage), M_DEVBUF,
 		    DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc_domainset(PAGE_SIZE,
 		    M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT,
 		    0ul, bz->lowaddr, PAGE_SIZE, 0);
 		if (bpage->vaddr == 0) {
 			free_domain(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
     vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL && map != &nobounce_dmamap,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr1 & PAGE_MASK;
 		bpage->busaddr |= addr1 & PAGE_MASK;
 		KASSERT(addr2 == 0,
 	("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET"));
 	}
 	bpage->datavaddr = vaddr;
 	bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1);
 	KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned"));
 	bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2);
 	bpage->dataoffs = addr1 & PAGE_MASK;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
 	.tag_set_domain = bounce_bus_dma_tag_set_domain,
+	.id_mapped = bounce_bus_dma_id_mapped,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bounce_bus_dmamap_load_ma,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync,
 };
Index: head/usr.sbin/camdd/camdd.c
===================================================================
--- head/usr.sbin/camdd/camdd.c	(revision 347835)
+++ head/usr.sbin/camdd/camdd.c	(revision 347836)
@@ -1,3510 +1,3509 @@
 /*-
  * Copyright (c) 1997-2007 Kenneth D. Merry
  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Ken Merry           (Spectra Logic Corporation)
  */
 
 /*
  * This is eventually intended to be:
  * - A basic data transfer/copy utility
  * - A simple benchmark utility
  * - An example of how to use the asynchronous pass(4) driver interface.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/ioctl.h>
 #include <sys/stdint.h>
 #include <sys/types.h>
 #include <sys/endian.h>
 #include <sys/param.h>
 #include <sys/sbuf.h>
 #include <sys/stat.h>
 #include <sys/event.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 #include <vm/vm.h>
-#include <machine/bus.h>
 #include <sys/bus.h>
 #include <sys/bus_dma.h>
 #include <sys/mtio.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <semaphore.h>
 #include <string.h>
 #include <unistd.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <fcntl.h>
 #include <ctype.h>
 #include <err.h>
 #include <libutil.h>
 #include <pthread.h>
 #include <assert.h>
 #include <bsdxml.h>
 
 #include <cam/cam.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_ccb.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_da.h>
 #include <cam/scsi/scsi_pass.h>
 #include <cam/scsi/scsi_message.h>
 #include <cam/scsi/smp_all.h>
 #include <camlib.h>
 #include <mtlib.h>
 #include <zlib.h>
 
 typedef enum {
 	CAMDD_CMD_NONE		= 0x00000000,
 	CAMDD_CMD_HELP		= 0x00000001,
 	CAMDD_CMD_WRITE		= 0x00000002,
 	CAMDD_CMD_READ		= 0x00000003
 } camdd_cmdmask;
 
 typedef enum {
 	CAMDD_ARG_NONE		= 0x00000000,
 	CAMDD_ARG_VERBOSE	= 0x00000001,
 	CAMDD_ARG_DEVICE	= 0x00000002,
 	CAMDD_ARG_BUS		= 0x00000004,
 	CAMDD_ARG_TARGET	= 0x00000008,
 	CAMDD_ARG_LUN		= 0x00000010,
 	CAMDD_ARG_UNIT		= 0x00000020,
 	CAMDD_ARG_TIMEOUT	= 0x00000040,
 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
 	CAMDD_ARG_RETRIES	= 0x00000100
 } camdd_argmask;
 
 typedef enum {
 	CAMDD_DEV_NONE		= 0x00,
 	CAMDD_DEV_PASS		= 0x01,
 	CAMDD_DEV_FILE		= 0x02
 } camdd_dev_type;
 
 struct camdd_io_opts {
 	camdd_dev_type	dev_type;
 	char		*dev_name;
 	uint64_t	blocksize;
 	uint64_t	queue_depth;
 	uint64_t	offset;
 	int		min_cmd_size;
 	int		write_dev;
 	uint64_t	debug;
 };
 
 typedef enum {
 	CAMDD_BUF_NONE,
 	CAMDD_BUF_DATA,
 	CAMDD_BUF_INDIRECT
 } camdd_buf_type;
 
 struct camdd_buf_indirect {
 	/*
 	 * Pointer to the source buffer.
 	 */
 	struct camdd_buf *src_buf;
 
 	/*
 	 * Offset into the source buffer, in bytes.
 	 */
 	uint64_t	  offset;
 	/*
 	 * Pointer to the starting point in the source buffer.
 	 */
 	uint8_t		 *start_ptr;
 
 	/*
 	 * Length of this chunk in bytes.
 	 */
 	size_t		  len;
 };
 
 struct camdd_buf_data {
 	/*
 	 * Buffer allocated when we allocate this camdd_buf.  This should
 	 * be the size of the blocksize for this device.
 	 */
 	uint8_t			*buf;
 
 	/*
 	 * The amount of backing store allocated in buf.  Generally this
 	 * will be the blocksize of the device.
 	 */
 	uint32_t		 alloc_len;
 
 	/*
 	 * The amount of data that was put into the buffer (on reads) or
 	 * the amount of data we have put onto the src_list so far (on
 	 * writes).
 	 */
 	uint32_t		 fill_len;
 
 	/*
 	 * The amount of data that was not transferred.
 	 */
 	uint32_t		 resid;
 
 	/*
 	 * Starting byte offset on the reader.
 	 */
 	uint64_t		 src_start_offset;
 	
 	/*
 	 * CCB used for pass(4) device targets.
 	 */
 	union ccb		 ccb;
 
 	/*
 	 * Number of scatter/gather segments.
 	 */
 	int			 sg_count;
 
 	/*
 	 * Set if we had to tack on an extra buffer to round the transfer
 	 * up to a sector size.
 	 */
 	int			 extra_buf;
 
 	/*
 	 * Scatter/gather list used generally when we're the writer for a
 	 * pass(4) device. 
 	 */
 	bus_dma_segment_t	*segs;
 
 	/*
 	 * Scatter/gather list used generally when we're the writer for a
 	 * file or block device;
 	 */
 	struct iovec		*iovec;
 };
 
 union camdd_buf_types {
 	struct camdd_buf_indirect	indirect;
 	struct camdd_buf_data		data;
 };
 
 typedef enum {
 	CAMDD_STATUS_NONE,
 	CAMDD_STATUS_OK,
 	CAMDD_STATUS_SHORT_IO,
 	CAMDD_STATUS_EOF,
 	CAMDD_STATUS_ERROR
 } camdd_buf_status;
 
 struct camdd_buf {
 	camdd_buf_type		 buf_type;
 	union camdd_buf_types	 buf_type_spec;
 
 	camdd_buf_status	 status;
 
 	uint64_t		 lba;
 	size_t			 len;
 
 	/*
 	 * A reference count of how many indirect buffers point to this
 	 * buffer.
 	 */
 	int			 refcount;
 
 	/*
 	 * A link back to our parent device.
 	 */
 	struct camdd_dev	*dev;
 	STAILQ_ENTRY(camdd_buf)  links;
 	STAILQ_ENTRY(camdd_buf)  work_links;
 
 	/*
 	 * A count of the buffers on the src_list.
 	 */
 	int			 src_count;
 
 	/*
 	 * List of buffers from our partner thread that are the components
 	 * of this buffer for the I/O.  Uses src_links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 src_list;
 	STAILQ_ENTRY(camdd_buf)  src_links;
 };
 
 #define	NUM_DEV_TYPES	2
 
 struct camdd_dev_pass {
 	int			 scsi_dev_type;
 	int			 protocol;
 	struct cam_device	*dev;
 	uint64_t		 max_sector;
 	uint32_t		 block_len;
 	uint32_t		 cpi_maxio;
 };
 
 typedef enum {
 	CAMDD_FILE_NONE,
 	CAMDD_FILE_REG,
 	CAMDD_FILE_STD,
 	CAMDD_FILE_PIPE,
 	CAMDD_FILE_DISK,
 	CAMDD_FILE_TAPE,
 	CAMDD_FILE_TTY,
 	CAMDD_FILE_MEM
 } camdd_file_type;
 
 typedef enum {
 	CAMDD_FF_NONE 		= 0x00,
 	CAMDD_FF_CAN_SEEK	= 0x01
 } camdd_file_flags;
 
 struct camdd_dev_file {
 	int			 fd;
 	struct stat		 sb;
 	char			 filename[MAXPATHLEN + 1];
 	camdd_file_type		 file_type;
 	camdd_file_flags	 file_flags;
 	uint8_t			*tmp_buf;
 };
 
 struct camdd_dev_block {
 	int			 fd;
 	uint64_t		 size_bytes;
 	uint32_t		 block_len;
 };
 
 union camdd_dev_spec {
 	struct camdd_dev_pass	pass;
 	struct camdd_dev_file	file;
 	struct camdd_dev_block	block;
 };
 
 typedef enum {
 	CAMDD_DEV_FLAG_NONE		= 0x00,
 	CAMDD_DEV_FLAG_EOF		= 0x01,
 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
 } camdd_dev_flags;
 
 struct camdd_dev {
 	camdd_dev_type		 dev_type;
 	union camdd_dev_spec	 dev_spec;
 	camdd_dev_flags		 flags;
 	char			 device_name[MAXPATHLEN+1];
 	uint32_t		 blocksize;
 	uint32_t		 sector_size;
 	uint64_t		 max_sector;
 	uint64_t		 sector_io_limit;
 	int			 min_cmd_size;
 	int			 write_dev;
 	int			 retry_count;
 	int			 io_timeout;
 	int			 debug;
 	uint64_t		 start_offset_bytes;
 	uint64_t		 next_io_pos_bytes;
 	uint64_t		 next_peer_pos_bytes;
 	uint64_t		 next_completion_pos_bytes;
 	uint64_t		 peer_bytes_queued;
 	uint64_t		 bytes_transferred;
 	uint32_t		 target_queue_depth;
 	uint32_t		 cur_active_io;
 	uint8_t			*extra_buf;
 	uint32_t		 extra_buf_len;
 	struct camdd_dev	*peer_dev;
 	pthread_mutex_t		 mutex;
 	pthread_cond_t		 cond;
 	int			 kq;
 
 	int			 (*run)(struct camdd_dev *dev);
 	int			 (*fetch)(struct camdd_dev *dev);
 
 	/*
 	 * Buffers that are available for I/O.  Uses links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 free_queue;
 
 	/*
 	 * Free indirect buffers.  These are used for breaking a large
 	 * buffer into multiple pieces.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
 
 	/*
 	 * Buffers that have been queued to the kernel.  Uses links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 active_queue;
 
 	/*
 	 * Will generally contain one of our buffers that is waiting for enough
 	 * I/O from our partner thread to be able to execute.  This will
 	 * generally happen when our per-I/O-size is larger than the
 	 * partner thread's per-I/O-size.  Uses links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
 
 	/*
 	 * Number of buffers on the pending queue
 	 */
 	int			 num_pending_queue;
 
 	/*
 	 * Buffers that are filled and ready to execute.  This is used when
 	 * our partner (reader) thread sends us blocks that are larger than
 	 * our blocksize, and so we have to split them into multiple pieces.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 run_queue;
 
 	/*
 	 * Number of buffers on the run queue.
 	 */
 	int			 num_run_queue;
 
 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
 
 	int			 num_reorder_queue;
 
 	/*
 	 * Buffers that have been queued to us by our partner thread
 	 * (generally the reader thread) to be written out.  Uses
 	 * work_links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 work_queue;
 
 	/*
 	 * Buffers that have been completed by our partner thread.  Uses
 	 * work_links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
 
 	/*
 	 * Number of buffers on the peer done queue.
 	 */
 	uint32_t		 num_peer_done_queue;
 
 	/*
 	 * A list of buffers that we have queued to our peer thread.  Uses
 	 * links.
 	 */
 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
 
 	/*
 	 * Number of buffers on the peer work queue.
 	 */
 	uint32_t		 num_peer_work_queue;
 };
 
 static sem_t camdd_sem;
 static sig_atomic_t need_exit = 0;
 static sig_atomic_t error_exit = 0;
 static sig_atomic_t need_status = 0;
 
 #ifndef min
 #define	min(a, b) (a < b) ? a : b
 #endif
 
 
 /* Generically useful offsets into the peripheral private area */
 #define ppriv_ptr0 periph_priv.entries[0].ptr
 #define ppriv_ptr1 periph_priv.entries[1].ptr
 #define ppriv_field0 periph_priv.entries[0].field
 #define ppriv_field1 periph_priv.entries[1].field
 
 #define	ccb_buf	ppriv_ptr0
 
 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
 #define	CAMDD_FILE_DEFAULT_DEPTH	1
 #define	CAMDD_PASS_MAX_BLOCK		1048576
 #define	CAMDD_PASS_DEFAULT_DEPTH	6
 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
 
 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
 		     camdd_argmask *arglst);
 void camdd_free_dev(struct camdd_dev *dev);
 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
 				  struct kevent *new_ke, int num_ke,
 				  int retry_count, int timeout);
 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
 					 camdd_buf_type buf_type);
 void camdd_release_buf(struct camdd_buf *buf);
 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
 			uint32_t sector_size, uint32_t *num_sectors_used,
 			int *double_buf_needed);
 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
          camdd_argmask arglist, int probe_retry_count,
          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
 				   int retry_count, int timeout);
 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
 				   struct camdd_io_opts *io_opts,
 				   camdd_argmask arglist, int probe_retry_count,
 				   int probe_timeout, int io_retry_count,
 				   int io_timeout);
 void *camdd_file_worker(void *arg);
 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
 void camdd_peer_done(struct camdd_buf *buf);
 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
 			int *error_count);
 int camdd_pass_fetch(struct camdd_dev *dev);
 int camdd_file_run(struct camdd_dev *dev);
 int camdd_pass_run(struct camdd_dev *dev);
 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
 		     uint32_t *peer_depth, uint32_t *our_bytes,
 		     uint32_t *peer_bytes);
 void *camdd_worker(void *arg);
 void camdd_sig_handler(int sig);
 void camdd_print_status(struct camdd_dev *camdd_dev,
 			struct camdd_dev *other_dev,
 			struct timespec *start_time);
 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
 	     uint64_t max_io, int retry_count, int timeout);
 int camdd_parse_io_opts(char *args, int is_write,
 			struct camdd_io_opts *io_opts);
 void usage(void);
 
 /*
  * Parse out a bus, or a bus, target and lun in the following
  * format:
  * bus
  * bus:target
  * bus:target:lun
  *
  * Returns the number of parsed components, or 0.
  */
 static int
 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
 {
 	char *tmpstr;
 	int convs = 0;
 
 	while (isspace(*tstr) && (*tstr != '\0'))
 		tstr++;
 
 	tmpstr = (char *)strtok(tstr, ":");
 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
 		*bus = strtol(tmpstr, NULL, 0);
 		*arglst |= CAMDD_ARG_BUS;
 		convs++;
 		tmpstr = (char *)strtok(NULL, ":");
 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
 			*target = strtol(tmpstr, NULL, 0);
 			*arglst |= CAMDD_ARG_TARGET;
 			convs++;
 			tmpstr = (char *)strtok(NULL, ":");
 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
 				*lun = strtol(tmpstr, NULL, 0);
 				*arglst |= CAMDD_ARG_LUN;
 				convs++;
 			}
 		}
 	}
 
 	return convs;
 }
 
 /*
  * XXX KDM clean up and free all of the buffers on the queue!
  */
 void
 camdd_free_dev(struct camdd_dev *dev)
 {
 	if (dev == NULL)
 		return;
 
 	switch (dev->dev_type) {
 	case CAMDD_DEV_FILE: {
 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
 
 		if (file_dev->fd != -1)
 			close(file_dev->fd);
 		free(file_dev->tmp_buf);
 		break;
 	}
 	case CAMDD_DEV_PASS: {
 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
 
 		if (pass_dev->dev != NULL)
 			cam_close_device(pass_dev->dev);
 		break;
 	}
 	default:
 		break;
 	}
 
 	free(dev);
 }
 
 struct camdd_dev *
 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
 		int retry_count, int timeout)
 {
 	struct camdd_dev *dev = NULL;
 	struct kevent *ke;
 	size_t ke_size;
 	int retval = 0;
 
 	dev = calloc(1, sizeof(*dev));
 	if (dev == NULL) {
 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
 		goto bailout;
 	}
 
 	dev->dev_type = dev_type;
 	dev->io_timeout = timeout;
 	dev->retry_count = retry_count;
 	STAILQ_INIT(&dev->free_queue);
 	STAILQ_INIT(&dev->free_indirect_queue);
 	STAILQ_INIT(&dev->active_queue);
 	STAILQ_INIT(&dev->pending_queue);
 	STAILQ_INIT(&dev->run_queue);
 	STAILQ_INIT(&dev->reorder_queue);
 	STAILQ_INIT(&dev->work_queue);
 	STAILQ_INIT(&dev->peer_done_queue);
 	STAILQ_INIT(&dev->peer_work_queue);
 	retval = pthread_mutex_init(&dev->mutex, NULL);
 	if (retval != 0) {
 		warnc(retval, "%s: failed to initialize mutex", __func__);
 		goto bailout;
 	}
 
 	retval = pthread_cond_init(&dev->cond, NULL);
 	if (retval != 0) {
 		warnc(retval, "%s: failed to initialize condition variable",
 		      __func__);
 		goto bailout;
 	}
 
 	dev->kq = kqueue();
 	if (dev->kq == -1) {
 		warn("%s: Unable to create kqueue", __func__);
 		goto bailout;
 	}
 
 	ke_size = sizeof(struct kevent) * (num_ke + 4);
 	ke = calloc(1, ke_size);
 	if (ke == NULL) {
 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
 		goto bailout;
 	}
 	if (num_ke > 0)
 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
 
 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
 
 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
 	if (retval == -1) {
 		warn("%s: Unable to register kevents", __func__);
 		goto bailout;
 	}
 
 
 	return (dev);
 
 bailout:
 	free(dev);
 
 	return (NULL);
 }
 
 static struct camdd_buf *
 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
 {
 	struct camdd_buf *buf = NULL;
 	uint8_t *data_ptr = NULL;
 
 	/*
 	 * We only need to allocate data space for data buffers.
 	 */
 	switch (buf_type) {
 	case CAMDD_BUF_DATA:
 		data_ptr = malloc(dev->blocksize);
 		if (data_ptr == NULL) {
 			warn("unable to allocate %u bytes", dev->blocksize);
 			goto bailout_error;
 		}
 		break;
 	default:
 		break;
 	}
 	
 	buf = calloc(1, sizeof(*buf));
 	if (buf == NULL) {
 		warn("unable to allocate %zu bytes", sizeof(*buf));
 		goto bailout_error;
 	}
 
 	buf->buf_type = buf_type;
 	buf->dev = dev;
 	switch (buf_type) {
 	case CAMDD_BUF_DATA: {
 		struct camdd_buf_data *data;
 
 		data = &buf->buf_type_spec.data;
 
 		data->alloc_len = dev->blocksize;
 		data->buf = data_ptr;
 		break;
 	}
 	case CAMDD_BUF_INDIRECT:
 		break;
 	default:
 		break;
 	}
 	STAILQ_INIT(&buf->src_list);
 
 	return (buf);
 
 bailout_error:
 	free(data_ptr);
 
 	return (NULL);
 }
 
 void
 camdd_release_buf(struct camdd_buf *buf)
 {
 	struct camdd_dev *dev;
 
 	dev = buf->dev;
 
 	switch (buf->buf_type) {
 	case CAMDD_BUF_DATA: {
 		struct camdd_buf_data *data;
 
 		data = &buf->buf_type_spec.data;
 
 		if (data->segs != NULL) {
 			if (data->extra_buf != 0) {
 				void *extra_buf;
 
 				extra_buf = (void *)
 				    data->segs[data->sg_count - 1].ds_addr;
 				free(extra_buf);
 				data->extra_buf = 0;
 			}
 			free(data->segs);
 			data->segs = NULL;
 			data->sg_count = 0;
 		} else if (data->iovec != NULL) {
 			if (data->extra_buf != 0) {
 				free(data->iovec[data->sg_count - 1].iov_base);
 				data->extra_buf = 0;
 			}
 			free(data->iovec);
 			data->iovec = NULL;
 			data->sg_count = 0;
 		}
 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
 		break;
 	}
 	case CAMDD_BUF_INDIRECT:
 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
 		break;
 	default:
 		err(1, "%s: Invalid buffer type %d for released buffer",
 		    __func__, buf->buf_type);
 		break;
 	}
 }
 
 struct camdd_buf *
 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
 {
 	struct camdd_buf *buf = NULL;
 
 	switch (buf_type) {
 	case CAMDD_BUF_DATA:
 		buf = STAILQ_FIRST(&dev->free_queue);
 		if (buf != NULL) {
 			struct camdd_buf_data *data;
 			uint8_t *data_ptr;
 			uint32_t alloc_len;
 
 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
 			data = &buf->buf_type_spec.data;
 			data_ptr = data->buf;
 			alloc_len = data->alloc_len;
 			bzero(buf, sizeof(*buf));
 			data->buf = data_ptr;
 			data->alloc_len = alloc_len;
 		}
 		break;
 	case CAMDD_BUF_INDIRECT:
 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
 		if (buf != NULL) {
 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
 
 			bzero(buf, sizeof(*buf));
 		}
 		break;
 	default:
 		warnx("Unknown buffer type %d requested", buf_type);
 		break;
 	}
 
 
 	if (buf == NULL)
 		return (camdd_alloc_buf(dev, buf_type));
 	else {
 		STAILQ_INIT(&buf->src_list);
 		buf->dev = dev;
 		buf->buf_type = buf_type;
 
 		return (buf);
 	}
 }
 
 int
 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
 		    uint32_t *num_sectors_used, int *double_buf_needed)
 {
 	struct camdd_buf *tmp_buf;
 	struct camdd_buf_data *data;
 	uint8_t *extra_buf = NULL;
 	size_t extra_buf_len = 0;
 	int extra_buf_attached = 0;
 	int i, retval = 0;
 
 	data = &buf->buf_type_spec.data;
 
 	data->sg_count = buf->src_count;
 	/*
 	 * Compose a scatter/gather list from all of the buffers in the list.
 	 * If the length of the buffer isn't a multiple of the sector size,
 	 * we'll have to add an extra buffer.  This should only happen
 	 * at the end of a transfer.
 	 */
 	if ((data->fill_len % sector_size) != 0) {
 		extra_buf_len = sector_size - (data->fill_len % sector_size);
 		extra_buf = calloc(extra_buf_len, 1);
 		if (extra_buf == NULL) {
 			warn("%s: unable to allocate %zu bytes for extra "
 			    "buffer space", __func__, extra_buf_len);
 			retval = 1;
 			goto bailout;
 		}
 		data->extra_buf = 1;
 		data->sg_count++;
 	}
 	if (iovec == 0) {
 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
 		if (data->segs == NULL) {
 			warn("%s: unable to allocate %zu bytes for S/G list",
 			    __func__, sizeof(bus_dma_segment_t) *
 			    data->sg_count);
 			retval = 1;
 			goto bailout;
 		}
 
 	} else {
 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
 		if (data->iovec == NULL) {
 			warn("%s: unable to allocate %zu bytes for S/G list",
 			    __func__, sizeof(struct iovec) * data->sg_count);
 			retval = 1;
 			goto bailout;
 		}
 	}
 
 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
 	     i < buf->src_count && tmp_buf != NULL; i++,
 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
 
 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
 			struct camdd_buf_data *tmp_data;
 
 			tmp_data = &tmp_buf->buf_type_spec.data;
 			if (iovec == 0) {
 				data->segs[i].ds_addr =
 				    (bus_addr_t) tmp_data->buf;
 				data->segs[i].ds_len = tmp_data->fill_len -
 				    tmp_data->resid;
 			} else {
 				data->iovec[i].iov_base = tmp_data->buf;
 				data->iovec[i].iov_len = tmp_data->fill_len -
 				    tmp_data->resid;
 			}
 			if (((tmp_data->fill_len - tmp_data->resid) %
 			     sector_size) != 0)
 				*double_buf_needed = 1;
 		} else {
 			struct camdd_buf_indirect *tmp_ind;
 
 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
 			if (iovec == 0) {
 				data->segs[i].ds_addr =
 				    (bus_addr_t)tmp_ind->start_ptr;
 				data->segs[i].ds_len = tmp_ind->len;
 			} else {
 				data->iovec[i].iov_base = tmp_ind->start_ptr;
 				data->iovec[i].iov_len = tmp_ind->len;
 			}
 			if ((tmp_ind->len % sector_size) != 0)
 				*double_buf_needed = 1;
 		}
 	}
 
 	if (extra_buf != NULL) {
 		if (iovec == 0) {
 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
 			data->segs[i].ds_len = extra_buf_len;
 		} else {
 			data->iovec[i].iov_base = extra_buf;
 			data->iovec[i].iov_len = extra_buf_len;
 		}
 		extra_buf_attached = 1;
 		i++;
 	}
 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
 		warnx("buffer source count does not match "
 		      "number of buffers in list!");
 		retval = 1;
 		goto bailout;
 	}
 
 bailout:
 	if (retval == 0) {
 		*num_sectors_used = (data->fill_len + extra_buf_len) /
 		    sector_size;
 	} else if (extra_buf_attached == 0) {
 		/*
 		 * If extra_buf isn't attached yet, we need to free it
 		 * to avoid leaking.
 		 */
 		free(extra_buf);
 		data->extra_buf = 0;
 		data->sg_count--;
 	}
 	return (retval);
 }
 
 uint32_t
 camdd_buf_get_len(struct camdd_buf *buf)
 {
 	uint32_t len = 0;
 
 	if (buf->buf_type != CAMDD_BUF_DATA) {
 		struct camdd_buf_indirect *indirect;
 
 		indirect = &buf->buf_type_spec.indirect;
 		len = indirect->len;
 	} else {
 		struct camdd_buf_data *data;
 
 		data = &buf->buf_type_spec.data;
 		len = data->fill_len;
 	}
 
 	return (len);
 }
 
 void
 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
 {
 	struct camdd_buf_data *data;
 
 	assert(buf->buf_type == CAMDD_BUF_DATA);
 
 	data = &buf->buf_type_spec.data;
 
 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
 	buf->src_count++;
 
 	data->fill_len += camdd_buf_get_len(child_buf);
 }
 
 typedef enum {
 	CAMDD_TS_MAX_BLK,
 	CAMDD_TS_MIN_BLK,
 	CAMDD_TS_BLK_GRAN,
 	CAMDD_TS_EFF_IOSIZE
 } camdd_status_item_index;
 
 static struct camdd_status_items {
 	const char *name;
 	struct mt_status_entry *entry;
 } req_status_items[] = {
 	{ "max_blk", NULL },
 	{ "min_blk", NULL },
 	{ "blk_gran", NULL },
 	{ "max_effective_iosize", NULL }
 };
 
 int
 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
 {
 	struct mt_status_data status_data;
 	char *xml_str = NULL;
 	unsigned int i;
 	int retval = 0;
 	
 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
 	if (retval != 0)
 		err(1, "Couldn't get XML string from %s", filename);
 
 	retval = mt_get_status(xml_str, &status_data);
 	if (retval != XML_STATUS_OK) {
 		warn("couldn't get status for %s", filename);
 		retval = 1;
 		goto bailout;
 	} else
 		retval = 0;
 
 	if (status_data.error != 0) {
 		warnx("%s", status_data.error_str);
 		retval = 1;
 		goto bailout;
 	}
 
 	for (i = 0; i < nitems(req_status_items); i++) {
                 char *name;
 
 		name = __DECONST(char *, req_status_items[i].name);
 		req_status_items[i].entry = mt_status_entry_find(&status_data,
 		    name);
 		if (req_status_items[i].entry == NULL) {
 			errx(1, "Cannot find status entry %s",
 			    req_status_items[i].name);
 		}
 	}
 
 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
 bailout:
 
 	free(xml_str);
 	mt_status_free(&status_data);
 
 	return (retval);
 }
 
 struct camdd_dev *
 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
     int timeout)
 {
 	struct camdd_dev *dev = NULL;
 	struct camdd_dev_file *file_dev;
 	uint64_t blocksize = io_opts->blocksize;
 
 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
 	if (dev == NULL)
 		goto bailout;
 
 	file_dev = &dev->dev_spec.file;
 	file_dev->fd = fd;
 	strlcpy(file_dev->filename, io_opts->dev_name,
 	    sizeof(file_dev->filename));
 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
 	if (blocksize == 0)
 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
 	else
 		dev->blocksize = blocksize;
 
 	if ((io_opts->queue_depth != 0)
 	 && (io_opts->queue_depth != 1)) {
 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
 		    "command supported", (uintmax_t)io_opts->queue_depth,
 		    io_opts->dev_name);
 	}
 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
 	dev->run = camdd_file_run;
 	dev->fetch = NULL;
 
 	/*
 	 * We can effectively access files on byte boundaries.  We'll reset
 	 * this for devices like disks that can be accessed on sector
 	 * boundaries.
 	 */
 	dev->sector_size = 1;
 
 	if ((fd != STDIN_FILENO)
 	 && (fd != STDOUT_FILENO)) {
 		int retval;
 
 		retval = fstat(fd, &file_dev->sb);
 		if (retval != 0) {
 			warn("Cannot stat %s", dev->device_name);
 			goto bailout_error;
 		}
 		if (S_ISREG(file_dev->sb.st_mode)) {
 			file_dev->file_type = CAMDD_FILE_REG;
 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
 			int type;
 
 			if (ioctl(fd, FIODTYPE, &type) == -1)
 				err(1, "FIODTYPE ioctl failed on %s",
 				    dev->device_name);
 			else {
 				if (type & D_TAPE)
 					file_dev->file_type = CAMDD_FILE_TAPE;
 				else if (type & D_DISK)
 					file_dev->file_type = CAMDD_FILE_DISK;
 				else if (type & D_MEM)
 					file_dev->file_type = CAMDD_FILE_MEM;
 				else if (type & D_TTY)
 					file_dev->file_type = CAMDD_FILE_TTY;
 			}
 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
 			errx(1, "cannot operate on directory %s",
 			    dev->device_name);
 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
 			file_dev->file_type = CAMDD_FILE_PIPE;
 		} else
 			errx(1, "Cannot determine file type for %s",
 			    dev->device_name);
 
 		switch (file_dev->file_type) {
 		case CAMDD_FILE_REG:
 			if (file_dev->sb.st_size != 0)
 				dev->max_sector = file_dev->sb.st_size - 1;
 			else
 				dev->max_sector = 0;
 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
 			break;
 		case CAMDD_FILE_TAPE: {
 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
 			/*
 			 * Check block limits and maximum effective iosize.
 			 * Make sure the blocksize is within the block
 			 * limits (and a multiple of the minimum blocksize)
 			 * and that the blocksize is <= maximum effective
 			 * iosize.
 			 */
 			retval = camdd_probe_tape(fd, dev->device_name,
 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
 			if (retval != 0)
 				errx(1, "Unable to probe tape %s",
 				    dev->device_name);
 
 			/*
 			 * The blocksize needs to be <= the maximum
 			 * effective I/O size of the tape device.  Note
 			 * that this also takes into account the maximum
 			 * blocksize reported by READ BLOCK LIMITS.
 			 */
 			if (dev->blocksize > max_iosize) {
 				warnx("Blocksize %u too big for %s, limiting "
 				    "to %ju", dev->blocksize, dev->device_name,
 				    max_iosize);
 				dev->blocksize = max_iosize;
 			}
 
 			/*
 			 * The blocksize needs to be at least min_blk;
 			 */
 			if (dev->blocksize < min_blk) {
 				warnx("Blocksize %u too small for %s, "
 				    "increasing to %ju", dev->blocksize,
 				    dev->device_name, min_blk);
 				dev->blocksize = min_blk;
 			}
 
 			/*
 			 * And the blocksize needs to be a multiple of
 			 * the block granularity.
 			 */
 			if ((blk_gran != 0)
 			 && (dev->blocksize % (1 << blk_gran))) {
 				warnx("Blocksize %u for %s not a multiple of "
 				    "%d, adjusting to %d", dev->blocksize,
 				    dev->device_name, (1 << blk_gran),
 				    dev->blocksize & ~((1 << blk_gran) - 1));
 				dev->blocksize &= ~((1 << blk_gran) - 1);
 			}
 
 			if (dev->blocksize == 0) {
 				errx(1, "Unable to derive valid blocksize for "
 				    "%s", dev->device_name);
 			}
 
 			/*
 			 * For tape drives, set the sector size to the
 			 * blocksize so that we make sure not to write
 			 * less than the blocksize out to the drive.
 			 */
 			dev->sector_size = dev->blocksize;
 			break;
 		}
 		case CAMDD_FILE_DISK: {
 			off_t media_size;
 			unsigned int sector_size;
 
 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
 
 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
 				    dev->device_name);
 			}
 
 			if (sector_size == 0) {
 				errx(1, "DIOCGSECTORSIZE ioctl returned "
 				    "invalid sector size %u for %s",
 				    sector_size, dev->device_name);
 			}
 
 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
 				    dev->device_name);
 			}
 
 			if (media_size == 0) {
 				errx(1, "DIOCGMEDIASIZE ioctl returned "
 				    "invalid media size %ju for %s",
 				    (uintmax_t)media_size, dev->device_name);
 			}
 
 			if (dev->blocksize % sector_size) {
 				errx(1, "%s blocksize %u not a multiple of "
 				    "sector size %u", dev->device_name,
 				    dev->blocksize, sector_size);
 			}
 
 			dev->sector_size = sector_size;
 			dev->max_sector = (media_size / sector_size) - 1;
 			break;
 		}
 		case CAMDD_FILE_MEM:
 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
 			break;
 		default:
 			break;
 		}
 	}
 
 	if ((io_opts->offset != 0)
 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
 		goto bailout_error;
 	}
 #if 0
 	else if ((io_opts->offset != 0)
 		&& ((io_opts->offset % dev->sector_size) != 0)) {
 		warnx("Offset %ju for %s is not a multiple of the "
 		      "sector size %u", io_opts->offset, 
 		      io_opts->dev_name, dev->sector_size);
 		goto bailout_error;
 	} else {
 		dev->start_offset_bytes = io_opts->offset;
 	}
 #endif
 
 bailout:
 	return (dev);
 
 bailout_error:
 	camdd_free_dev(dev);
 	return (NULL);
 }
 
 /*
  * Get a get device CCB for the specified device.
  */
 int
 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
 {
         union ccb *ccb;
 	int retval = 0;
 
 	ccb = cam_getccb(device);
  
 	if (ccb == NULL) {
 		warnx("%s: couldn't allocate CCB", __func__);
 		return -1;
 	}
 
 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
 
 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
  
 	if (cam_send_ccb(device, ccb) < 0) {
 		warn("%s: error sending Get Device Information CCB", __func__);
 			cam_error_print(device, ccb, CAM_ESF_ALL,
 					CAM_EPF_ALL, stderr);
 		retval = -1;
 		goto bailout;
 	}
 
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 			cam_error_print(device, ccb, CAM_ESF_ALL,
 					CAM_EPF_ALL, stderr);
 		retval = -1;
 		goto bailout;
 	}
 
 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
 
 bailout:
 	cam_freeccb(ccb);
  
 	return retval;
 }
 
 int
 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
 		 camdd_argmask arglist, int probe_retry_count,
 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
 {
 	struct scsi_read_capacity_data rcap;
 	struct scsi_read_capacity_data_long rcaplong;
 	int retval = -1;
 
 	if (ccb == NULL) {
 		warnx("%s: error passed ccb is NULL", __func__);
 		goto bailout;
 	}
 
 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
 
 	scsi_read_capacity(&ccb->csio,
 			   /*retries*/ probe_retry_count,
 			   /*cbfcnp*/ NULL,
 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
 			   &rcap,
 			   SSD_FULL_SIZE,
 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
 
 	/* Disable freezing the device queue */
 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
 
 	if (arglist & CAMDD_ARG_ERR_RECOVER)
 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
 
 	if (cam_send_ccb(cam_dev, ccb) < 0) {
 		warn("error sending READ CAPACITY command");
 
 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
 				CAM_EPF_ALL, stderr);
 
 		goto bailout;
 	}
 
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
 		goto bailout;
 	}
 
 	*maxsector = scsi_4btoul(rcap.addr);
 	*block_len = scsi_4btoul(rcap.length);
 
 	/*
 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
 	 * and we need to issue the long READ CAPACITY to get the real
 	 * capacity.  Otherwise, we're all set.
 	 */
 	if (*maxsector != 0xffffffff) {
 		retval = 0;
 		goto bailout;
 	}
 
 	scsi_read_capacity_16(&ccb->csio,
 			      /*retries*/ probe_retry_count,
 			      /*cbfcnp*/ NULL,
 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
 			      /*lba*/ 0,
 			      /*reladdr*/ 0,
 			      /*pmi*/ 0,
 			      (uint8_t *)&rcaplong,
 			      sizeof(rcaplong),
 			      /*sense_len*/ SSD_FULL_SIZE,
 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
 
 	/* Disable freezing the device queue */
 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
 
 	if (arglist & CAMDD_ARG_ERR_RECOVER)
 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
 
 	if (cam_send_ccb(cam_dev, ccb) < 0) {
 		warn("error sending READ CAPACITY (16) command");
 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
 				CAM_EPF_ALL, stderr);
 		goto bailout;
 	}
 
 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
 		goto bailout;
 	}
 
 	*maxsector = scsi_8btou64(rcaplong.addr);
 	*block_len = scsi_4btoul(rcaplong.length);
 
 	retval = 0;
 
 bailout:
 	return retval;
 }
 
 /*
  * Need to implement this.  Do a basic probe:
  * - Check the inquiry data, make sure we're talking to a device that we
  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
  * - Send a test unit ready, make sure the device is available.
  * - Get the capacity and block size.
  */
 struct camdd_dev *
 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
 		 camdd_argmask arglist, int probe_retry_count,
 		 int probe_timeout, int io_retry_count, int io_timeout)
 {
 	union ccb *ccb;
 	uint64_t maxsector = 0;
 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
 	uint32_t block_len = 0;
 	struct camdd_dev *dev = NULL;
 	struct camdd_dev_pass *pass_dev;
 	struct kevent ke;
 	struct ccb_getdev cgd;
 	int retval;
 	int scsi_dev_type;
 
 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
 		warnx("%s: error retrieving CGD", __func__);
 		return NULL;
 	}
 
 	ccb = cam_getccb(cam_dev);
 
 	if (ccb == NULL) {
 		warnx("%s: error allocating ccb", __func__);
 		goto bailout;
 	}
 
 	switch (cgd.protocol) {
 	case PROTO_SCSI:
 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
 
 		/*
 		 * For devices that support READ CAPACITY, we'll attempt to get the
 		 * capacity.  Otherwise, we really don't support tape or other
 		 * devices via SCSI passthrough, so just return an error in that case.
 		 */
 		switch (scsi_dev_type) {
 		case T_DIRECT:
 		case T_WORM:
 		case T_CDROM:
 		case T_OPTICAL:
 		case T_RBC:
 		case T_ZBC_HM:
 			break;
 		default:
 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
 			break; /*NOTREACHED*/
 		}
 
 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
 						arglist, probe_timeout, &maxsector,
 						&block_len))) {
 			goto bailout;
 		}
 		break;
 	default:
 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
 		break; /*NOTREACHED*/
 	}
 
 	if (block_len == 0) {
 		warnx("Sector size for %s%u is 0, cannot continue",
 		    cam_dev->device_name, cam_dev->dev_unit_num);
 		goto bailout_error;
 	}
 
 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
 
 	ccb->ccb_h.func_code = XPT_PATH_INQ;
 	ccb->ccb_h.flags = CAM_DIR_NONE;
 	ccb->ccb_h.retry_count = 1;
 	
 	if (cam_send_ccb(cam_dev, ccb) < 0) {
 		warn("error sending XPT_PATH_INQ CCB");
 
 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
 				CAM_EPF_ALL, stderr);
 		goto bailout;
 	}
 
 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
 
 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
 			      io_timeout);
 	if (dev == NULL)
 		goto bailout;
 
 	pass_dev = &dev->dev_spec.pass;
 	pass_dev->scsi_dev_type = scsi_dev_type;
 	pass_dev->protocol = cgd.protocol;
 	pass_dev->dev = cam_dev;
 	pass_dev->max_sector = maxsector;
 	pass_dev->block_len = block_len;
 	pass_dev->cpi_maxio = ccb->cpi.maxio;
 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
 	dev->sector_size = block_len;
 	dev->max_sector = maxsector;
 	
 
 	/*
 	 * Determine the optimal blocksize to use for this device.
 	 */
 
 	/*
 	 * If the controller has not specified a maximum I/O size,
 	 * just go with 128K as a somewhat conservative value.
 	 */
 	if (pass_dev->cpi_maxio == 0)
 		cpi_maxio = 131072;
 	else
 		cpi_maxio = pass_dev->cpi_maxio;
 
 	/*
 	 * If the controller has a large maximum I/O size, limit it
 	 * to something smaller so that the kernel doesn't have trouble
 	 * allocating buffers to copy data in and out for us.
 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
 	 */
 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
 
 	/*
 	 * If we weren't able to get a block size for some reason,
 	 * default to 512 bytes.
 	 */
 	block_len = pass_dev->block_len;
 	if (block_len == 0)
 		block_len = 512;
 
 	/*
 	 * Figure out how many blocksize chunks will fit in the
 	 * maximum I/O size.
 	 */
 	pass_numblocks = max_iosize / block_len;
 
 	/*
 	 * And finally, multiple the number of blocks by the LBA
 	 * length to get our maximum block size;
 	 */
 	dev->blocksize = pass_numblocks * block_len;
 
 	if (io_opts->blocksize != 0) {
 		if ((io_opts->blocksize % dev->sector_size) != 0) {
 			warnx("Blocksize %ju for %s is not a multiple of "
 			      "sector size %u", (uintmax_t)io_opts->blocksize, 
 			      dev->device_name, dev->sector_size);
 			goto bailout_error;
 		}
 		dev->blocksize = io_opts->blocksize;
 	}
 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
 	if (io_opts->queue_depth != 0)
 		dev->target_queue_depth = io_opts->queue_depth;
 
 	if (io_opts->offset != 0) {
 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
 			warnx("Offset %ju is past the end of device %s",
 			    io_opts->offset, dev->device_name);
 			goto bailout_error;
 		}
 #if 0
 		else if ((io_opts->offset % dev->sector_size) != 0) {
 			warnx("Offset %ju for %s is not a multiple of the "
 			      "sector size %u", io_opts->offset, 
 			      dev->device_name, dev->sector_size);
 			goto bailout_error;
 		}
 		dev->start_offset_bytes = io_opts->offset;
 #endif
 	}
 
 	dev->min_cmd_size = io_opts->min_cmd_size;
 
 	dev->run = camdd_pass_run;
 	dev->fetch = camdd_pass_fetch;
 
 bailout:
 	cam_freeccb(ccb);
 
 	return (dev);
 
 bailout_error:
 	cam_freeccb(ccb);
 
 	camdd_free_dev(dev);
 
 	return (NULL);
 }
 
 void *
 camdd_worker(void *arg)
 {
 	struct camdd_dev *dev = arg;
 	struct camdd_buf *buf;
 	struct timespec ts, *kq_ts;
 
 	ts.tv_sec = 0;
 	ts.tv_nsec = 0;
 
 	pthread_mutex_lock(&dev->mutex);
 
 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
 
 	for (;;) {
 		struct kevent ke;
 		int retval = 0;
 
 		/*
 		 * XXX KDM check the reorder queue depth?
 		 */
 		if (dev->write_dev == 0) {
 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
 			uint32_t target_depth = dev->target_queue_depth;
 			uint32_t peer_target_depth =
 			    dev->peer_dev->target_queue_depth;
 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
 
 			camdd_get_depth(dev, &our_depth, &peer_depth,
 					&our_bytes, &peer_bytes);
 
 #if 0
 			while (((our_depth < target_depth)
 			     && (peer_depth < peer_target_depth))
 			    || ((peer_bytes + our_bytes) <
 				 (peer_blocksize * 2))) {
 #endif
 			while (((our_depth + peer_depth) <
 			        (target_depth + peer_target_depth))
 			    || ((peer_bytes + our_bytes) <
 				(peer_blocksize * 3))) {
 
 				retval = camdd_queue(dev, NULL);
 				if (retval == 1)
 					break;
 				else if (retval != 0) {
 					error_exit = 1;
 					goto bailout;
 				}
 
 				camdd_get_depth(dev, &our_depth, &peer_depth,
 						&our_bytes, &peer_bytes);
 			}
 		}
 		/*
 		 * See if we have any I/O that is ready to execute.
 		 */
 		buf = STAILQ_FIRST(&dev->run_queue);
 		if (buf != NULL) {
 			while (dev->target_queue_depth > dev->cur_active_io) {
 				retval = dev->run(dev);
 				if (retval == -1) {
 					dev->flags |= CAMDD_DEV_FLAG_EOF;
 					error_exit = 1;
 					break;
 				} else if (retval != 0) {
 					break;
 				}
 			}
 		}
 
 		/*
 		 * We've reached EOF, or our partner has reached EOF.
 		 */
 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
 			if (dev->write_dev != 0) {
 			 	if ((STAILQ_EMPTY(&dev->work_queue))
 				 && (dev->num_run_queue == 0)
 				 && (dev->cur_active_io == 0)) {
 					goto bailout;
 				}
 			} else {
 				/*
 				 * If we're the reader, and the writer
 				 * got EOF, he is already done.  If we got
 				 * the EOF, then we need to wait until
 				 * everything is flushed out for the writer.
 				 */
 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
 					goto bailout;
 				} else if ((dev->num_peer_work_queue == 0)
 					&& (dev->num_peer_done_queue == 0)
 					&& (dev->cur_active_io == 0)
 					&& (dev->num_run_queue == 0)) {
 					goto bailout;
 				}
 			}
 			/*
 			 * XXX KDM need to do something about the pending
 			 * queue and cleanup resources.
 			 */
 		} 
 
 		if ((dev->write_dev == 0)
 		 && (dev->cur_active_io == 0)
 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
 			kq_ts = &ts;
 		else
 			kq_ts = NULL;
 
 		/*
 		 * Run kevent to see if there are events to process.
 		 */
 		pthread_mutex_unlock(&dev->mutex);
 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
 		pthread_mutex_lock(&dev->mutex);
 		if (retval == -1) {
 			warn("%s: error returned from kevent",__func__);
 			goto bailout;
 		} else if (retval != 0) {
 			switch (ke.filter) {
 			case EVFILT_READ:
 				if (dev->fetch != NULL) {
 					retval = dev->fetch(dev);
 					if (retval == -1) {
 						error_exit = 1;
 						goto bailout;
 					}
 				}
 				break;
 			case EVFILT_SIGNAL:
 				/*
 				 * We register for this so we don't get
 				 * an error as a result of a SIGINFO or a
 				 * SIGINT.  It will actually get handled
 				 * by the signal handler.  If we get a
 				 * SIGINT, bail out without printing an
 				 * error message.  Any other signals 
 				 * will result in the error message above.
 				 */
 				if (ke.ident == SIGINT)
 					goto bailout;
 				break;
 			case EVFILT_USER:
 				retval = 0;
 				/*
 				 * Check to see if the other thread has
 				 * queued any I/O for us to do.  (In this
 				 * case we're the writer.)
 				 */
 				for (buf = STAILQ_FIRST(&dev->work_queue);
 				     buf != NULL;
 				     buf = STAILQ_FIRST(&dev->work_queue)) {
 					STAILQ_REMOVE_HEAD(&dev->work_queue,
 							   work_links);
 					retval = camdd_queue(dev, buf);
 					/*
 					 * We keep going unless we get an
 					 * actual error.  If we get EOF, we
 					 * still want to remove the buffers
 					 * from the queue and send the back
 					 * to the reader thread.
 					 */
 					if (retval == -1) {
 						error_exit = 1;
 						goto bailout;
 					} else
 						retval = 0;
 				}
 
 				/*
 				 * Next check to see if the other thread has
 				 * queued any completed buffers back to us.
 				 * (In this case we're the reader.)
 				 */
 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
 				     buf != NULL;
 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
 					STAILQ_REMOVE_HEAD(
 					    &dev->peer_done_queue, work_links);
 					dev->num_peer_done_queue--;
 					camdd_peer_done(buf);
 				}
 				break;
 			default:
 				warnx("%s: unknown kevent filter %d",
 				      __func__, ke.filter);
 				break;
 			}
 		}
 	}
 
 bailout:
 
 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
 
 	/* XXX KDM cleanup resources here? */
 
 	pthread_mutex_unlock(&dev->mutex);
 
 	need_exit = 1;
 	sem_post(&camdd_sem);
 
 	return (NULL);
 }
 
 /*
  * Simplistic translation of CCB status to our local status.
  */
 camdd_buf_status
 camdd_ccb_status(union ccb *ccb, int protocol)
 {
 	camdd_buf_status status = CAMDD_STATUS_NONE;
 	cam_status ccb_status;
 
 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
 
 	switch (protocol) {
 	case PROTO_SCSI:
 		switch (ccb_status) {
 		case CAM_REQ_CMP: {
 			if (ccb->csio.resid == 0) {
 				status = CAMDD_STATUS_OK;
 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
 				status = CAMDD_STATUS_SHORT_IO;
 			} else {
 				status = CAMDD_STATUS_EOF;
 			}
 			break;
 		}
 		case CAM_SCSI_STATUS_ERROR: {
 			switch (ccb->csio.scsi_status) {
 			case SCSI_STATUS_OK:
 			case SCSI_STATUS_COND_MET:
 			case SCSI_STATUS_INTERMED:
 			case SCSI_STATUS_INTERMED_COND_MET:
 				status = CAMDD_STATUS_OK;
 				break;
 			case SCSI_STATUS_CMD_TERMINATED:
 			case SCSI_STATUS_CHECK_COND:
 			case SCSI_STATUS_QUEUE_FULL:
 			case SCSI_STATUS_BUSY:
 			case SCSI_STATUS_RESERV_CONFLICT:
 			default:
 				status = CAMDD_STATUS_ERROR;
 				break;
 			}
 			break;
 		}
 		default:
 			status = CAMDD_STATUS_ERROR;
 			break;
 		}
 		break;
 	default:
 		status = CAMDD_STATUS_ERROR;
 		break;
 	}
 
 	return (status);
 }
 
 /*
  * Queue a buffer to our peer's work thread for writing.
  *
  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
  */
 int
 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
 {
 	struct kevent ke;
 	STAILQ_HEAD(, camdd_buf) local_queue;
 	struct camdd_buf *buf1, *buf2;
 	struct camdd_buf_data *data = NULL;
 	uint64_t peer_bytes_queued = 0;
 	int active = 1;
 	int retval = 0;
 
 	STAILQ_INIT(&local_queue);
 
 	/*
 	 * Since we're the reader, we need to queue our I/O to the writer
 	 * in sequential order in order to make sure it gets written out
 	 * in sequential order.
 	 *
 	 * Check the next expected I/O starting offset.  If this doesn't
 	 * match, put it on the reorder queue.
 	 */
 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
 
 		/*
 		 * If there is nothing on the queue, there is no sorting
 		 * needed.
 		 */
 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
 			dev->num_reorder_queue++;
 			goto bailout;
 		}
 
 		/*
 		 * Sort in ascending order by starting LBA.  There should
 		 * be no identical LBAs.
 		 */
 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
 		     buf1 = buf2) {
 			buf2 = STAILQ_NEXT(buf1, links);
 			if (buf->lba < buf1->lba) {
 				/*
 				 * If we're less than the first one, then
 				 * we insert at the head of the list
 				 * because this has to be the first element
 				 * on the list.
 				 */
 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
 						   buf, links);
 				dev->num_reorder_queue++;
 				break;
 			} else if (buf->lba > buf1->lba) {
 				if (buf2 == NULL) {
 					STAILQ_INSERT_TAIL(&dev->reorder_queue, 
 					    buf, links);
 					dev->num_reorder_queue++;
 					break;
 				} else if (buf->lba < buf2->lba) {
 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
 					    buf1, buf, links);
 					dev->num_reorder_queue++;
 					break;
 				}
 			} else {
 				errx(1, "Found buffers with duplicate LBA %ju!",
 				     buf->lba);
 			}
 		}
 		goto bailout;
 	} else {
 
 		/*
 		 * We're the next expected I/O completion, so put ourselves
 		 * on the local queue to be sent to the writer.  We use
 		 * work_links here so that we can queue this to the 
 		 * peer_work_queue before taking the buffer off of the
 		 * local_queue.
 		 */
 		dev->next_completion_pos_bytes += buf->len;
 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
 
 		/*
 		 * Go through the reorder queue looking for more sequential
 		 * I/O and add it to the local queue.
 		 */
 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
 			/*
 			 * As soon as we see an I/O that is out of sequence,
 			 * we're done.
 			 */
 			if ((buf1->lba * dev->sector_size) !=
 			     dev->next_completion_pos_bytes)
 				break;
 
 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
 			dev->num_reorder_queue--;
 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
 			dev->next_completion_pos_bytes += buf1->len;
 		}
 	}
 
 	/*
 	 * Setup the event to let the other thread know that it has work
 	 * pending.
 	 */
 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
 	       NOTE_TRIGGER, 0, NULL);
 
 	/*
 	 * Put this on our shadow queue so that we know what we've queued
 	 * to the other thread.
 	 */
 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
 		if (buf1->buf_type != CAMDD_BUF_DATA) {
 			errx(1, "%s: should have a data buffer, not an "
 			    "indirect buffer", __func__);
 		}
 		data = &buf1->buf_type_spec.data;
 
 		/*
 		 * We only need to send one EOF to the writer, and don't
 		 * need to continue sending EOFs after that.
 		 */
 		if (buf1->status == CAMDD_STATUS_EOF) {
 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
 				    work_links);
 				camdd_release_buf(buf1);
 				retval = 1;
 				continue;
 			}
 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
 		}
 
 
 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
 		peer_bytes_queued += (data->fill_len - data->resid);
 		dev->peer_bytes_queued += (data->fill_len - data->resid);
 		dev->num_peer_work_queue++;
 	}
 
 	if (STAILQ_FIRST(&local_queue) == NULL)
 		goto bailout;
 
 	/*
 	 * Drop our mutex and pick up the other thread's mutex.  We need to
 	 * do this to avoid deadlocks.
 	 */
 	pthread_mutex_unlock(&dev->mutex);
 	pthread_mutex_lock(&dev->peer_dev->mutex);
 
 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
 		/*
 		 * Put the buffers on the other thread's incoming work queue.
 		 */
 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
 		     buf1 = STAILQ_FIRST(&local_queue)) {
 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
 					   work_links);
 		}
 		/*
 		 * Send an event to the other thread's kqueue to let it know
 		 * that there is something on the work queue.
 		 */
 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
 		if (retval == -1)
 			warn("%s: unable to add peer work_queue kevent",
 			     __func__);
 		else
 			retval = 0;
 	} else
 		active = 0;
 
 	pthread_mutex_unlock(&dev->peer_dev->mutex);
 	pthread_mutex_lock(&dev->mutex);
 
 	/*
 	 * If the other side isn't active, run through the queue and
 	 * release all of the buffers.
 	 */
 	if (active == 0) {
 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
 		     buf1 = STAILQ_FIRST(&local_queue)) {
 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
 				      links);
 			dev->num_peer_work_queue--;
 			camdd_release_buf(buf1);
 		}
 		dev->peer_bytes_queued -= peer_bytes_queued;
 		retval = 1;
 	}
 
 bailout:
 	return (retval);
 }
 
 /*
  * Return a buffer to the reader thread when we have completed writing it.
  */
 int
 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
 {
 	struct kevent ke;
 	int retval = 0;
 
 	/*
 	 * Setup the event to let the other thread know that we have
 	 * completed a buffer.
 	 */
 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
 	       NOTE_TRIGGER, 0, NULL);
 
 	/*
 	 * Drop our lock and acquire the other thread's lock before
 	 * manipulating 
 	 */
 	pthread_mutex_unlock(&dev->mutex);
 	pthread_mutex_lock(&dev->peer_dev->mutex);
 
 	/*
 	 * Put the buffer on the reader thread's peer done queue now that
 	 * we have completed it.
 	 */
 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
 			   work_links);
 	dev->peer_dev->num_peer_done_queue++;
 
 	/*
 	 * Send an event to the peer thread to let it know that we've added
 	 * something to its peer done queue.
 	 */
 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
 	if (retval == -1)
 		warn("%s: unable to add peer_done_queue kevent", __func__);
 	else
 		retval = 0;
 
 	/*
 	 * Drop the other thread's lock and reacquire ours.
 	 */
 	pthread_mutex_unlock(&dev->peer_dev->mutex);
 	pthread_mutex_lock(&dev->mutex);
 
 	return (retval);
 }
 
 /*
  * Free a buffer that was written out by the writer thread and returned to
  * the reader thread.
  */
 void
 camdd_peer_done(struct camdd_buf *buf)
 {
 	struct camdd_dev *dev;
 	struct camdd_buf_data *data;
 
 	dev = buf->dev;
 	if (buf->buf_type != CAMDD_BUF_DATA) {
 		errx(1, "%s: should have a data buffer, not an "
 		    "indirect buffer", __func__);
 	}
 
 	data = &buf->buf_type_spec.data;
 
 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
 	dev->num_peer_work_queue--;
 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
 
 	if (buf->status == CAMDD_STATUS_EOF)
 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
 
 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
 }
 
 /*
  * Assumes caller holds the lock for this device.
  */
 void
 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
 		   int *error_count)
 {
 	int retval = 0;
 
 	/*
 	 * If we're the reader, we need to send the completed I/O
 	 * to the writer.  If we're the writer, we need to just
 	 * free up resources, or let the reader know if we've
 	 * encountered an error.
 	 */
 	if (dev->write_dev == 0) {
 		retval = camdd_queue_peer_buf(dev, buf);
 		if (retval != 0)
 			(*error_count)++;
 	} else {
 		struct camdd_buf *tmp_buf, *next_buf;
 
 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
 				    next_buf) {
 			struct camdd_buf *src_buf;
 			struct camdd_buf_indirect *indirect;
 
 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
 				      camdd_buf, src_links);
 
 			tmp_buf->status = buf->status;
 
 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
 				camdd_complete_peer_buf(dev, tmp_buf);
 				continue;
 			}
 
 			indirect = &tmp_buf->buf_type_spec.indirect;
 			src_buf = indirect->src_buf;
 			src_buf->refcount--;
 			/*
 			 * XXX KDM we probably need to account for
 			 * exactly how many bytes we were able to
 			 * write.  Allocate the residual to the
 			 * first N buffers?  Or just track the
 			 * number of bytes written?  Right now the reader
 			 * doesn't do anything with a residual.
 			 */
 			src_buf->status = buf->status;
 			if (src_buf->refcount <= 0)
 				camdd_complete_peer_buf(dev, src_buf);
 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
 					   tmp_buf, links);
 		}
 
 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
 	}
 }
 
 /*
  * Fetch all completed commands from the pass(4) device.
  *
  * Returns the number of commands received, or -1 if any of the commands
  * completed with an error.  Returns 0 if no commands are available.
  */
 int
 camdd_pass_fetch(struct camdd_dev *dev)
 {
 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
 	union ccb ccb;
 	int retval = 0, num_fetched = 0, error_count = 0;
 
 	pthread_mutex_unlock(&dev->mutex);
 	/*
 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
 	 */
 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
 		struct camdd_buf *buf;
 		struct camdd_buf_data *data;
 		cam_status ccb_status;
 		union ccb *buf_ccb;
 
 		buf = ccb.ccb_h.ccb_buf;
 		data = &buf->buf_type_spec.data;
 		buf_ccb = &data->ccb;
 
 		num_fetched++;
 
 		/*
 		 * Copy the CCB back out so we get status, sense data, etc.
 		 */
 		bcopy(&ccb, buf_ccb, sizeof(ccb));
 
 		pthread_mutex_lock(&dev->mutex);
 
 		/*
 		 * We're now done, so take this off the active queue.
 		 */
 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
 		dev->cur_active_io--;
 
 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
 		if (ccb_status != CAM_REQ_CMP) {
 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
 					CAM_EPF_ALL, stderr);
 		}
 
 		switch (pass_dev->protocol) {
 		case PROTO_SCSI:
 			data->resid = ccb.csio.resid;
 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
 			break;
 		default:
 			return -1;
 			break;
 		}
 
 		if (buf->status == CAMDD_STATUS_NONE)
 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
 		if (buf->status == CAMDD_STATUS_ERROR)
 			error_count++;
 		else if (buf->status == CAMDD_STATUS_EOF) {
 			/*
 			 * Once we queue this buffer to our partner thread,
 			 * he will know that we've hit EOF.
 			 */
 			dev->flags |= CAMDD_DEV_FLAG_EOF;
 		}
 
 		camdd_complete_buf(dev, buf, &error_count);
 
 		/*
 		 * Unlock in preparation for the ioctl call.
 		 */
 		pthread_mutex_unlock(&dev->mutex);
 	}
 
 	pthread_mutex_lock(&dev->mutex);
 
 	if (error_count > 0)
 		return (-1);
 	else
 		return (num_fetched);
 }
 
 /*
  * Returns -1 for error, 0 for success/continue, and 1 for resource
  * shortage/stop processing.
  */
 int
 camdd_file_run(struct camdd_dev *dev)
 {
 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
 	struct camdd_buf_data *data;
 	struct camdd_buf *buf;
 	off_t io_offset;
 	int retval = 0, write_dev = dev->write_dev;
 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
 	uint32_t num_sectors = 0, db_len = 0;
 
 	buf = STAILQ_FIRST(&dev->run_queue);
 	if (buf == NULL) {
 		no_resources = 1;
 		goto bailout;
 	} else if ((dev->write_dev == 0)
 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
 				  CAMDD_DEV_FLAG_EOF_SENT))) {
 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
 		dev->num_run_queue--;
 		buf->status = CAMDD_STATUS_EOF;
 		error_count++;
 		goto bailout;
 	}
 
 	/*
 	 * If we're writing, we need to go through the source buffer list
 	 * and create an S/G list.
 	 */
 	if (write_dev != 0) {
 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
 		    dev->sector_size, &num_sectors, &double_buf_needed);
 		if (retval != 0) {
 			no_resources = 1;
 			goto bailout;
 		}
 	}
 
 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
 	dev->num_run_queue--;
 
 	data = &buf->buf_type_spec.data;
 
 	/*
 	 * pread(2) and pwrite(2) offsets are byte offsets.
 	 */
 	io_offset = buf->lba * dev->sector_size;
 
 	/*
 	 * Unlock the mutex while we read or write.
 	 */
 	pthread_mutex_unlock(&dev->mutex);
 
 	/*
 	 * Note that we don't need to double buffer if we're the reader
 	 * because in that case, we have allocated a single buffer of
 	 * sufficient size to do the read.  This copy is necessary on
 	 * writes because if one of the components of the S/G list is not
 	 * a sector size multiple, the kernel will reject the write.  This
 	 * is unfortunate but not surprising.  So this will make sure that
 	 * we're using a single buffer that is a multiple of the sector size.
 	 */
 	if ((double_buf_needed != 0)
 	 && (data->sg_count > 1)
 	 && (write_dev != 0)) {
 		uint32_t cur_offset;
 		int i;
 
 		if (file_dev->tmp_buf == NULL)
 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
 		if (file_dev->tmp_buf == NULL) {
 			buf->status = CAMDD_STATUS_ERROR;
 			error_count++;
 			pthread_mutex_lock(&dev->mutex);
 			goto bailout;
 		}
 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
 			bcopy(data->iovec[i].iov_base,
 			    &file_dev->tmp_buf[cur_offset],
 			    data->iovec[i].iov_len);
 			cur_offset += data->iovec[i].iov_len;
 		}
 		db_len = cur_offset;
 	}
 
 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
 		if (write_dev == 0) {
 			/*
 			 * XXX KDM is there any way we would need a S/G
 			 * list here?
 			 */
 			retval = pread(file_dev->fd, data->buf,
 			    buf->len, io_offset);
 		} else {
 			if (double_buf_needed != 0) {
 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
 				    db_len, io_offset);
 			} else if (data->sg_count == 0) {
 				retval = pwrite(file_dev->fd, data->buf,
 				    data->fill_len, io_offset);
 			} else {
 				retval = pwritev(file_dev->fd, data->iovec,
 				    data->sg_count, io_offset);
 			}
 		}
 	} else {
 		if (write_dev == 0) {
 			/*
 			 * XXX KDM is there any way we would need a S/G
 			 * list here?
 			 */
 			retval = read(file_dev->fd, data->buf, buf->len);
 		} else {
 			if (double_buf_needed != 0) {
 				retval = write(file_dev->fd, file_dev->tmp_buf,
 				    db_len);
 			} else if (data->sg_count == 0) {
 				retval = write(file_dev->fd, data->buf,
 				    data->fill_len);
 			} else {
 				retval = writev(file_dev->fd, data->iovec,
 				    data->sg_count);
 			}
 		}
 	}
 
 	/* We're done, re-acquire the lock */
 	pthread_mutex_lock(&dev->mutex);
 
 	if (retval >= (ssize_t)data->fill_len) {
 		/*
 		 * If the bytes transferred is more than the request size,
 		 * that indicates an overrun, which should only happen at
 		 * the end of a transfer if we have to round up to a sector
 		 * boundary.
 		 */
 		if (buf->status == CAMDD_STATUS_NONE)
 			buf->status = CAMDD_STATUS_OK;
 		data->resid = 0;
 		dev->bytes_transferred += retval;
 	} else if (retval == -1) {
 		warn("Error %s %s", (write_dev) ? "writing to" :
 		    "reading from", file_dev->filename);
 
 		buf->status = CAMDD_STATUS_ERROR;
 		data->resid = data->fill_len;
 		error_count++;
 
 		if (dev->debug == 0)
 			goto bailout;
 
 		if ((double_buf_needed != 0)
 		 && (write_dev != 0)) {
 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
 			    "offset %ju\n", __func__, file_dev->fd,
 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
 			    (uintmax_t)io_offset);
 		} else if (data->sg_count == 0) {
 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
 			    data->fill_len, (uintmax_t)buf->lba,
 			    (uintmax_t)io_offset);
 		} else {
 			int i;
 
 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
 			    "offset %ju\n", __func__, file_dev->fd, 
 			    data->fill_len, (uintmax_t)buf->lba,
 			    (uintmax_t)io_offset);
 
 			for (i = 0; i < data->sg_count; i++) {
 				fprintf(stderr, "index %d ptr %p len %zu\n",
 				    i, data->iovec[i].iov_base,
 				    data->iovec[i].iov_len);
 			}
 		}
 	} else if (retval == 0) {
 		buf->status = CAMDD_STATUS_EOF;
 		if (dev->debug != 0)
 			printf("%s: got EOF from %s!\n", __func__,
 			    file_dev->filename);
 		data->resid = data->fill_len;
 		error_count++;
 	} else if (retval < (ssize_t)data->fill_len) {
 		if (buf->status == CAMDD_STATUS_NONE)
 			buf->status = CAMDD_STATUS_SHORT_IO;
 		data->resid = data->fill_len - retval;
 		dev->bytes_transferred += retval;
 	}
 
 bailout:
 	if (buf != NULL) {
 		if (buf->status == CAMDD_STATUS_EOF) {
 			struct camdd_buf *buf2;
 			dev->flags |= CAMDD_DEV_FLAG_EOF;
 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
 				buf2->status = CAMDD_STATUS_EOF;
 		}
 
 		camdd_complete_buf(dev, buf, &error_count);
 	}
 
 	if (error_count != 0)
 		return (-1);
 	else if (no_resources != 0)
 		return (1);
 	else
 		return (0);
 }
 
 /*
  * Execute one command from the run queue.  Returns 0 for success, 1 for
  * stop processing, and -1 for error.
  */
 int
 camdd_pass_run(struct camdd_dev *dev)
 {
 	struct camdd_buf *buf = NULL;
 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
 	struct camdd_buf_data *data;
 	uint32_t num_blocks, sectors_used = 0;
 	union ccb *ccb;
 	int retval = 0, is_write = dev->write_dev;
 	int double_buf_needed = 0;
 
 	buf = STAILQ_FIRST(&dev->run_queue);
 	if (buf == NULL) {
 		retval = 1;
 		goto bailout;
 	}
 
 	/*
 	 * If we're writing, we need to go through the source buffer list
 	 * and create an S/G list.
 	 */
 	if (is_write != 0) {
 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
 		    &sectors_used, &double_buf_needed);
 		if (retval != 0) {
 			retval = -1;
 			goto bailout;
 		}
 	}
 
 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
 	dev->num_run_queue--;
 
 	data = &buf->buf_type_spec.data;
 
 	/*
 	 * In almost every case the number of blocks should be the device
 	 * block size.  The exception may be at the end of an I/O stream
 	 * for a partial block or at the end of a device.
 	 */
 	if (is_write != 0)
 		num_blocks = sectors_used;
 	else
 		num_blocks = data->fill_len / pass_dev->block_len;
 
 	ccb = &data->ccb;
 
 	switch (pass_dev->protocol) {
 	case PROTO_SCSI:
 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
 
 		scsi_read_write(&ccb->csio,
 				/*retries*/ dev->retry_count,
 				/*cbfcnp*/ NULL,
 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
 					   SCSI_RW_WRITE,
 				/*byte2*/ 0,
 				/*minimum_cmd_size*/ dev->min_cmd_size,
 				/*lba*/ buf->lba,
 				/*block_count*/ num_blocks,
 				/*data_ptr*/ (data->sg_count != 0) ?
 					     (uint8_t *)data->segs : data->buf,
 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
 				/*sense_len*/ SSD_FULL_SIZE,
 				/*timeout*/ dev->io_timeout);
 
 		if (data->sg_count != 0) {
 			ccb->csio.sglist_cnt = data->sg_count;
 		}
 		break;
 	default:
 		retval = -1;
 		goto bailout;
 	}
 
 	/* Disable freezing the device queue */
 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
 
 	if (dev->retry_count != 0)
 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
 
 	if (data->sg_count != 0) {
 		ccb->ccb_h.flags |= CAM_DATA_SG;
 	}
 
 	/*
 	 * Store a pointer to the buffer in the CCB.  The kernel will
 	 * restore this when we get it back, and we'll use it to identify
 	 * the buffer this CCB came from.
 	 */
 	ccb->ccb_h.ccb_buf = buf;
 
 	/*
 	 * Unlock our mutex in preparation for issuing the ioctl.
 	 */
 	pthread_mutex_unlock(&dev->mutex);
 	/*
 	 * Queue the CCB to the pass(4) driver.
 	 */
 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
 		pthread_mutex_lock(&dev->mutex);
 
 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
 		warn("%s: CCB address is %p", __func__, ccb);
 		retval = -1;
 
 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
 	} else {
 		pthread_mutex_lock(&dev->mutex);
 
 		dev->cur_active_io++;
 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
 	}
 
 bailout:
 	return (retval);
 }
 
 int
 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
 {
 	struct camdd_dev_pass *pass_dev;
 	uint32_t num_blocks;
 	int retval = 0;
 
 	pass_dev = &dev->dev_spec.pass;
 
 	*lba = dev->next_io_pos_bytes / dev->sector_size;
 	*len = dev->blocksize;
 	num_blocks = *len / dev->sector_size;
 
 	/*
 	 * If max_sector is 0, then we have no set limit.  This can happen
 	 * if we're writing to a file in a filesystem, or reading from
 	 * something like /dev/zero.
 	 */
 	if ((dev->max_sector != 0)
 	 || (dev->sector_io_limit != 0)) {
 		uint64_t max_sector;
 
 		if ((dev->max_sector != 0)
 		 && (dev->sector_io_limit != 0)) 
 			max_sector = min(dev->sector_io_limit, dev->max_sector);
 		else if (dev->max_sector != 0)
 			max_sector = dev->max_sector;
 		else
 			max_sector = dev->sector_io_limit;
 
 
 		/*
 		 * Check to see whether we're starting off past the end of
 		 * the device.  If so, we need to just send an EOF 	
 		 * notification to the writer.
 		 */
 		if (*lba > max_sector) {
 			*len = 0;
 			retval = 1;
 		} else if (((*lba + num_blocks) > max_sector + 1)
 			|| ((*lba + num_blocks) < *lba)) {
 			/*
 			 * If we get here (but pass the first check), we
 			 * can trim the request length down to go to the
 			 * end of the device.
 			 */
 			num_blocks = (max_sector + 1) - *lba;
 			*len = num_blocks * dev->sector_size;
 			retval = 1;
 		}
 	}
 
 	dev->next_io_pos_bytes += *len;
 
 	return (retval);
 }
 
 /*
  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
  */
 int
 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
 {
 	struct camdd_buf *buf = NULL;
 	struct camdd_buf_data *data;
 	struct camdd_dev_pass *pass_dev;
 	size_t new_len;
 	struct camdd_buf_data *rb_data;
 	int is_write = dev->write_dev;
 	int eof_flush_needed = 0;
 	int retval = 0;
 	int error;
 
 	pass_dev = &dev->dev_spec.pass;
 
 	/*
 	 * If we've gotten EOF or our partner has, we should not continue
 	 * queueing I/O.  If we're a writer, though, we should continue
 	 * to write any buffers that don't have EOF status.
 	 */
 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
 	  && (is_write == 0))) {
 		/*
 		 * Tell the worker thread that we have seen EOF.
 		 */
 		retval = 1;
 
 		/*
 		 * If we're the writer, send the buffer back with EOF status.
 		 */
 		if (is_write) {
 			read_buf->status = CAMDD_STATUS_EOF;
 			
 			error = camdd_complete_peer_buf(dev, read_buf);
 		}
 		goto bailout;
 	}
 
 	if (is_write == 0) {
 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
 		if (buf == NULL) {
 			retval = -1;
 			goto bailout;
 		}
 		data = &buf->buf_type_spec.data;
 
 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
 		if (retval != 0) {
 			buf->status = CAMDD_STATUS_EOF;
 
 		 	if ((buf->len == 0)
 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
 				camdd_release_buf(buf);
 				goto bailout;
 			}
 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
 		}
 
 		data->fill_len = buf->len;
 		data->src_start_offset = buf->lba * dev->sector_size;
 
 		/*
 		 * Put this on the run queue.
 		 */
 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
 		dev->num_run_queue++;
 
 		/* We're done. */
 		goto bailout;
 	}
 
 	/*
 	 * Check for new EOF status from the reader.
 	 */
 	if ((read_buf->status == CAMDD_STATUS_EOF)
 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
 		 && (read_buf->len == 0)) {
 			camdd_complete_peer_buf(dev, read_buf);
 			retval = 1;
 			goto bailout;
 		} else
 			eof_flush_needed = 1;
 	}
 
 	/*
 	 * See if we have a buffer we're composing with pieces from our
 	 * partner thread.
 	 */
 	buf = STAILQ_FIRST(&dev->pending_queue);
 	if (buf == NULL) {
 		uint64_t lba;
 		ssize_t len;
 
 		retval = camdd_get_next_lba_len(dev, &lba, &len);
 		if (retval != 0) {
 			read_buf->status = CAMDD_STATUS_EOF;
 
 			if (len == 0) {
 				dev->flags |= CAMDD_DEV_FLAG_EOF;
 				error = camdd_complete_peer_buf(dev, read_buf);
 				goto bailout;
 			}
 		}
 
 		/*
 		 * If we don't have a pending buffer, we need to grab a new
 		 * one from the free list or allocate another one.
 		 */
 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
 		if (buf == NULL) {
 			retval = 1;
 			goto bailout;
 		}
 
 		buf->lba = lba;
 		buf->len = len;
 
 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
 		dev->num_pending_queue++;
 	}
 
 	data = &buf->buf_type_spec.data;
 
 	rb_data = &read_buf->buf_type_spec.data;
 
 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
 	 && (dev->debug != 0)) {
 		printf("%s: WARNING: reader offset %#jx != expected offset "
 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
 		    (uintmax_t)dev->next_peer_pos_bytes);
 	}
 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
 	    (rb_data->fill_len - rb_data->resid);
 
 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
 	if (new_len < buf->len) {
 		/*
 		 * There are three cases here:
 		 * 1. We need more data to fill up a block, so we put 
 		 *    this I/O on the queue and wait for more I/O.
 		 * 2. We have a pending buffer in the queue that is
 		 *    smaller than our blocksize, but we got an EOF.  So we
 		 *    need to go ahead and flush the write out.
 		 * 3. We got an error.
 		 */
 
 		/*
 		 * Increment our fill length.
 		 */
 		data->fill_len += (rb_data->fill_len - rb_data->resid);
 
 		/*
 		 * Add the new read buffer to the list for writing.
 		 */
 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
 
 		/* Increment the count */
 		buf->src_count++;
 
 		if (eof_flush_needed == 0) {
 			/*
 			 * We need to exit, because we don't have enough
 			 * data yet.
 			 */
 			goto bailout;
 		} else {
 			/*
 			 * Take the buffer off of the pending queue.
 			 */
 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
 				      links);
 			dev->num_pending_queue--;
 
 			/*
 			 * If we need an EOF flush, but there is no data
 			 * to flush, go ahead and return this buffer.
 			 */
 			if (data->fill_len == 0) {
 				camdd_complete_buf(dev, buf, /*error_count*/0);
 				retval = 1;
 				goto bailout;
 			}
 
 			/*
 			 * Put this on the next queue for execution.
 			 */
 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
 			dev->num_run_queue++;
 		}
 	} else if (new_len == buf->len) {
 		/*
 		 * We have enough data to completey fill one block,
 		 * so we're ready to issue the I/O.
 		 */
 
 		/*
 		 * Take the buffer off of the pending queue.
 		 */
 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
 		dev->num_pending_queue--;
 
 		/*
 		 * Add the new read buffer to the list for writing.
 		 */
 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
 
 		/* Increment the count */
 		buf->src_count++;
 
 		/*
 		 * Increment our fill length.
 		 */
 		data->fill_len += (rb_data->fill_len - rb_data->resid);
 
 		/*
 		 * Put this on the next queue for execution.
 		 */
 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
 		dev->num_run_queue++;
 	} else {
 		struct camdd_buf *idb;
 		struct camdd_buf_indirect *indirect;
 		uint32_t len_to_go, cur_offset;
 
 		
 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
 		if (idb == NULL) {
 			retval = 1;
 			goto bailout;
 		}
 		indirect = &idb->buf_type_spec.indirect;
 		indirect->src_buf = read_buf;
 		read_buf->refcount++;
 		indirect->offset = 0;
 		indirect->start_ptr = rb_data->buf;
 		/*
 		 * We've already established that there is more
 		 * data in read_buf than we have room for in our
 		 * current write request.  So this particular chunk
 		 * of the request should just be the remainder
 		 * needed to fill up a block.
 		 */
 		indirect->len = buf->len - (data->fill_len - data->resid);
 
 		camdd_buf_add_child(buf, idb);
 
 		/*
 		 * This buffer is ready to execute, so we can take
 		 * it off the pending queue and put it on the run
 		 * queue.
 		 */
 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
 			      links);
 		dev->num_pending_queue--;
 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
 		dev->num_run_queue++;
 
 		cur_offset = indirect->offset + indirect->len;
 
 		/*
 		 * The resulting I/O would be too large to fit in
 		 * one block.  We need to split this I/O into
 		 * multiple pieces.  Allocate as many buffers as needed.
 		 */
 		for (len_to_go = rb_data->fill_len - rb_data->resid -
 		     indirect->len; len_to_go > 0;) {
 			struct camdd_buf *new_buf;
 			struct camdd_buf_data *new_data;
 			uint64_t lba;
 			ssize_t len;
 
 			retval = camdd_get_next_lba_len(dev, &lba, &len);
 			if ((retval != 0)
 			 && (len == 0)) {
 				/*
 				 * The device has already been marked
 				 * as EOF, and there is no space left.
 				 */
 				goto bailout;
 			}
 
 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
 			if (new_buf == NULL) {
 				retval = 1;
 				goto bailout;
 			}
 
 			new_buf->lba = lba;
 			new_buf->len = len;
 
 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
 			if (idb == NULL) {
 				retval = 1;
 				goto bailout;
 			}
 
 			indirect = &idb->buf_type_spec.indirect;
 
 			indirect->src_buf = read_buf;
 			read_buf->refcount++;
 			indirect->offset = cur_offset;
 			indirect->start_ptr = rb_data->buf + cur_offset;
 			indirect->len = min(len_to_go, new_buf->len);
 #if 0
 			if (((indirect->len % dev->sector_size) != 0)
 			 || ((indirect->offset % dev->sector_size) != 0)) {
 				warnx("offset %ju len %ju not aligned with "
 				    "sector size %u", indirect->offset,
 				    (uintmax_t)indirect->len, dev->sector_size);
 			}
 #endif
 			cur_offset += indirect->len;
 			len_to_go -= indirect->len;
 
 			camdd_buf_add_child(new_buf, idb);
 
 			new_data = &new_buf->buf_type_spec.data;
 
 			if ((new_data->fill_len == new_buf->len)
 			 || (eof_flush_needed != 0)) {
 				STAILQ_INSERT_TAIL(&dev->run_queue,
 						   new_buf, links);
 				dev->num_run_queue++;
 			} else if (new_data->fill_len < buf->len) {
 				STAILQ_INSERT_TAIL(&dev->pending_queue,
 					   	new_buf, links);
 				dev->num_pending_queue++;
 			} else {
 				warnx("%s: too much data in new "
 				      "buffer!", __func__);
 				retval = 1;
 				goto bailout;
 			}
 		}
 	}
 
 bailout:
 	return (retval);
 }
 
 void
 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
 {
 	*our_depth = dev->cur_active_io + dev->num_run_queue;
 	if (dev->num_peer_work_queue >
 	    dev->num_peer_done_queue)
 		*peer_depth = dev->num_peer_work_queue -
 			      dev->num_peer_done_queue;
 	else
 		*peer_depth = 0;
 	*our_bytes = *our_depth * dev->blocksize;
 	*peer_bytes = dev->peer_bytes_queued;
 }
 
 void
 camdd_sig_handler(int sig)
 {
 	if (sig == SIGINFO)
 		need_status = 1;
 	else {
 		need_exit = 1;
 		error_exit = 1;
 	}
 
 	sem_post(&camdd_sem);
 }
 
 void
 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, 
 		   struct timespec *start_time)
 {
 	struct timespec done_time;
 	uint64_t total_ns;
 	long double mb_sec, total_sec;
 	int error = 0;
 
 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
 	if (error != 0) {
 		warn("Unable to get done time");
 		return;
 	}
 
 	timespecsub(&done_time, start_time, &done_time);
 	
 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
 	total_sec = total_ns;
 	total_sec /= 1000000000;
 
 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
 		"%.4Lf seconds elapsed\n",
 		(uintmax_t)camdd_dev->bytes_transferred,
 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
 		camdd_dev->device_name,
 		(uintmax_t)other_dev->bytes_transferred,
 		(other_dev->write_dev == 0) ? "read from" : "written to",
 		other_dev->device_name, total_sec);
 
 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
 	mb_sec /= 1024 * 1024;
 	mb_sec *= 1000000000;
 	mb_sec /= total_ns;
 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
 }
 
 int
 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
 	 int retry_count, int timeout)
 {
 	struct cam_device *new_cam_dev = NULL;
 	struct camdd_dev *devs[2];
 	struct timespec start_time;
 	pthread_t threads[2];
 	int unit = 0;
 	int error = 0;
 	int i;
 
 	if (num_io_opts != 2) {
 		warnx("Must have one input and one output path");
 		error = 1;
 		goto bailout;
 	}
 
 	bzero(devs, sizeof(devs));
 
 	for (i = 0; i < num_io_opts; i++) {
 		switch (io_opts[i].dev_type) {
 		case CAMDD_DEV_PASS: {
 			if (isdigit(io_opts[i].dev_name[0])) {
 				camdd_argmask new_arglist = CAMDD_ARG_NONE;
 				int bus = 0, target = 0, lun = 0;
 				int rv;
 
 				/* device specified as bus:target[:lun] */
 				rv = parse_btl(io_opts[i].dev_name, &bus,
 				    &target, &lun, &new_arglist);
 				if (rv < 2) {
 					warnx("numeric device specification "
 					     "must be either bus:target, or "
 					     "bus:target:lun");
 					error = 1;
 					goto bailout;
 				}
 				/* default to 0 if lun was not specified */
 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
 					lun = 0;
 					new_arglist |= CAMDD_ARG_LUN;
 				}
 				new_cam_dev = cam_open_btl(bus, target, lun,
 				    O_RDWR, NULL);
 			} else {
 				char name[30];
 
 				if (cam_get_device(io_opts[i].dev_name, name,
 						   sizeof name, &unit) == -1) {
 					warnx("%s", cam_errbuf);
 					error = 1;
 					goto bailout;
 				}
 				new_cam_dev = cam_open_spec_device(name, unit,
 				    O_RDWR, NULL);
 			}
 
 			if (new_cam_dev == NULL) {
 				warnx("%s", cam_errbuf);
 				error = 1;
 				goto bailout;
 			}
 
 			devs[i] = camdd_probe_pass(new_cam_dev,
 			    /*io_opts*/ &io_opts[i],
 			    CAMDD_ARG_ERR_RECOVER, 
 			    /*probe_retry_count*/ 3,
 			    /*probe_timeout*/ 5000,
 			    /*io_retry_count*/ retry_count,
 			    /*io_timeout*/ timeout);
 			if (devs[i] == NULL) {
 				warn("Unable to probe device %s%u",
 				     new_cam_dev->device_name,
 				     new_cam_dev->dev_unit_num);
 				error = 1;
 				goto bailout;
 			}
 			break;
 		}
 		case CAMDD_DEV_FILE: {
 			int fd = -1;
 
 			if (io_opts[i].dev_name[0] == '-') {
 				if (io_opts[i].write_dev != 0)
 					fd = STDOUT_FILENO;
 				else
 					fd = STDIN_FILENO;
 			} else {
 				if (io_opts[i].write_dev != 0) {
 					fd = open(io_opts[i].dev_name,
 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
 				} else {
 					fd = open(io_opts[i].dev_name,
 					    O_RDONLY);
 				}
 			}
 			if (fd == -1) {
 				warn("error opening file %s",
 				    io_opts[i].dev_name);
 				error = 1;
 				goto bailout;
 			}
 
 			devs[i] = camdd_probe_file(fd, &io_opts[i],
 			    retry_count, timeout);
 			if (devs[i] == NULL) {
 				error = 1;
 				goto bailout;
 			}
 
 			break;
 		}
 		default:
 			warnx("Unknown device type %d (%s)",
 			    io_opts[i].dev_type, io_opts[i].dev_name);
 			error = 1;
 			goto bailout;
 			break; /*NOTREACHED */
 		}
 
 		devs[i]->write_dev = io_opts[i].write_dev;
 
 		devs[i]->start_offset_bytes = io_opts[i].offset;
 
 		if (max_io != 0) {
 			devs[i]->sector_io_limit =
 			    (devs[i]->start_offset_bytes /
 			    devs[i]->sector_size) +
 			    (max_io / devs[i]->sector_size) - 1;
 		}
 
 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
 	}
 
 	devs[0]->peer_dev = devs[1];
 	devs[1]->peer_dev = devs[0];
 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
 
 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
 
 	signal(SIGINFO, camdd_sig_handler);
 	signal(SIGINT, camdd_sig_handler);
 
 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
 	if (error != 0) {
 		warn("Unable to get start time");
 		goto bailout;
 	}
 
 	for (i = 0; i < num_io_opts; i++) {
 		error = pthread_create(&threads[i], NULL, camdd_worker,
 				       (void *)devs[i]);
 		if (error != 0) {
 			warnc(error, "pthread_create() failed");
 			goto bailout;
 		}
 	}
 
 	for (;;) {
 		if ((sem_wait(&camdd_sem) == -1)
 		 || (need_exit != 0)) {
 			struct kevent ke;
 
 			for (i = 0; i < num_io_opts; i++) {
 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
 
 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
 
 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
 						NULL);
 				if (error == -1)
 					warn("%s: unable to wake up thread",
 					    __func__);
 				error = 0;
 			}
 			break;
 		} else if (need_status != 0) {
 			camdd_print_status(devs[0], devs[1], &start_time);
 			need_status = 0;
 		}
 	} 
 	for (i = 0; i < num_io_opts; i++) {
 		pthread_join(threads[i], NULL);
 	}
 
 	camdd_print_status(devs[0], devs[1], &start_time);
 
 bailout:
 
 	for (i = 0; i < num_io_opts; i++)
 		camdd_free_dev(devs[i]);
 
 	return (error + error_exit);
 }
 
 void
 usage(void)
 {
 	fprintf(stderr,
 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
 "Option description\n"
 "-i <arg=val>  Specify input device/file and parameters\n"
 "-o <arg=val>  Specify output device/file and parameters\n"
 "Input and Output parameters\n"
 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
 "              or - for stdin/stdout\n"
 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
 "Optional arguments\n"
 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
 "-E            Enable CAM error recovery for pass(4) devices\n"
 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
 "              using K, G, M, etc. suffixes\n"
 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
 "-v            Enable verbose error recovery\n"
 "-h            Print this message\n");
 }
 
 
 int
 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
 {
 	char *tmpstr, *tmpstr2;
 	char *orig_tmpstr = NULL;
 	int retval = 0;
 
 	io_opts->write_dev = is_write;
 
 	tmpstr = strdup(args);
 	if (tmpstr == NULL) {
 		warn("strdup failed");
 		retval = 1;
 		goto bailout;
 	}
 	orig_tmpstr = tmpstr;
 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
 		char *name, *value;
 
 		/*
 		 * If the user creates an empty parameter by putting in two
 		 * commas, skip over it and look for the next field.
 		 */
 		if (*tmpstr2 == '\0')
 			continue;
 
 		name = strsep(&tmpstr2, "=");
 		if (*name == '\0') {
 			warnx("Got empty I/O parameter name");
 			retval = 1;
 			goto bailout;
 		}
 		value = strsep(&tmpstr2, "=");
 		if ((value == NULL)
 		 || (*value == '\0')) {
 			warnx("Empty I/O parameter value for %s", name);
 			retval = 1;
 			goto bailout;
 		}
 		if (strncasecmp(name, "file", 4) == 0) {
 			io_opts->dev_type = CAMDD_DEV_FILE;
 			io_opts->dev_name = strdup(value);
 			if (io_opts->dev_name == NULL) {
 				warn("Error allocating memory");
 				retval = 1;
 				goto bailout;
 			}
 		} else if (strncasecmp(name, "pass", 4) == 0) {
 			io_opts->dev_type = CAMDD_DEV_PASS;
 			io_opts->dev_name = strdup(value);
 			if (io_opts->dev_name == NULL) {
 				warn("Error allocating memory");
 				retval = 1;
 				goto bailout;
 			}
 		} else if ((strncasecmp(name, "bs", 2) == 0)
 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
 			retval = expand_number(value, &io_opts->blocksize);
 			if (retval == -1) {
 				warn("expand_number(3) failed on %s=%s", name,
 				    value);
 				retval = 1;
 				goto bailout;
 			}
 		} else if (strncasecmp(name, "depth", 5) == 0) {
 			char *endptr;
 
 			io_opts->queue_depth = strtoull(value, &endptr, 0);
 			if (*endptr != '\0') {
 				warnx("invalid queue depth %s", value);
 				retval = 1;
 				goto bailout;
 			}
 		} else if (strncasecmp(name, "mcs", 3) == 0) {
 			char *endptr;
 
 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
 			if ((*endptr != '\0')
 			 || ((io_opts->min_cmd_size > 16)
 			  || (io_opts->min_cmd_size < 0))) {
 				warnx("invalid minimum cmd size %s", value);
 				retval = 1;
 				goto bailout;
 			}
 		} else if (strncasecmp(name, "offset", 6) == 0) {
 			retval = expand_number(value, &io_opts->offset);
 			if (retval == -1) {
 				warn("expand_number(3) failed on %s=%s", name,
 				    value);
 				retval = 1;
 				goto bailout;
 			}
 		} else if (strncasecmp(name, "debug", 5) == 0) {
 			char *endptr;
 
 			io_opts->debug = strtoull(value, &endptr, 0);
 			if (*endptr != '\0') {
 				warnx("invalid debug level %s", value);
 				retval = 1;
 				goto bailout;
 			}
 		} else {
 			warnx("Unrecognized parameter %s=%s", name, value);
 		}
 	}
 bailout:
 	free(orig_tmpstr);
 
 	return (retval);
 }
 
 int
 main(int argc, char **argv)
 {
 	int c;
 	camdd_argmask arglist = CAMDD_ARG_NONE;
 	int timeout = 0, retry_count = 1;
 	int error = 0;
 	uint64_t max_io = 0;
 	struct camdd_io_opts *opt_list = NULL;
 
 	if (argc == 1) {
 		usage();
 		exit(1);
 	}
 
 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
 	if (opt_list == NULL) {
 		warn("Unable to allocate option list");
 		error = 1;
 		goto bailout;
 	}
 
 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
 		switch (c) {
 		case 'C':
 			retry_count = strtol(optarg, NULL, 0);
 			if (retry_count < 0)
 				errx(1, "retry count %d is < 0",
 				     retry_count);
 			arglist |= CAMDD_ARG_RETRIES;
 			break;
 		case 'E':
 			arglist |= CAMDD_ARG_ERR_RECOVER;
 			break;
 		case 'i':
 		case 'o':
 			if (((c == 'i')
 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
 			 || ((c == 'o')
 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
 				errx(1, "Only one input and output path "
 				    "allowed");
 			}
 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
 			if (error != 0)
 				goto bailout;
 			break;
 		case 'm':
 			error = expand_number(optarg, &max_io);
 			if (error == -1) {
 				warn("invalid maximum I/O amount %s", optarg);
 				error = 1;
 				goto bailout;
 			}
 			break;
 		case 't':
 			timeout = strtol(optarg, NULL, 0);
 			if (timeout < 0)
 				errx(1, "invalid timeout %d", timeout);
 			/* Convert the timeout from seconds to ms */
 			timeout *= 1000;
 			arglist |= CAMDD_ARG_TIMEOUT;
 			break;
 		case 'v':
 			arglist |= CAMDD_ARG_VERBOSE;
 			break;
 		case 'h':
 		default:
 			usage();
 			exit(1);
 			break; /*NOTREACHED*/
 		}
 	}
 
 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
 		errx(1, "Must specify both -i and -o");
 
 	/*
 	 * Set the timeout if the user hasn't specified one.
 	 */
 	if (timeout == 0)
 		timeout = CAMDD_PASS_RW_TIMEOUT;
 
 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
 
 bailout:
 	free(opt_list);
 
 	exit(error);
 }