Index: head/sys/arm64/arm64/busdma_bounce.c =================================================================== --- head/sys/arm64/arm64/busdma_bounce.c (revision 347835) +++ head/sys/arm64/arm64/busdma_bounce.c (revision 347836) @@ -1,1330 +1,1358 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship of the FreeBSD Foundation. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 4096 enum { BF_COULD_BOUNCE = 0x01, BF_MIN_ALLOC_COMP = 0x02, BF_KMEM_ALLOC = 0x04, BF_COHERENT = 0x10, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct sync_list { vm_offset_t vaddr; /* kva of client data */ bus_addr_t paddr; /* physical address */ vm_page_t pages; /* starting page of client data */ bus_size_t datacount; /* client data count */ }; struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; u_int flags; #define DMAMAP_COULD_BOUNCE (1 << 0) #define DMAMAP_FROM_DMAMEM (1 << 1) int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); +static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, + bus_size_t buflen, int *pagesneeded); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if ((flags & BUS_DMA_COHERENT) != 0) newtag->bounce_flags |= BF_COHERENT; if (parent != NULL) { if ((newtag->common.filter != NULL || (parent->bounce_flags & BF_COULD_BOUNCE) != 0)) newtag->bounce_flags |= BF_COULD_BOUNCE; /* Copy some flags from the parent */ newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT; } if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BF_COULD_BOUNCE; if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->bounce_flags |= BF_MIN_ALLOC_COMP; } else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy, parent; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } +static bool +bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + + if ((dmat->bounce_flags & BF_COULD_BOUNCE) == 0) + return (true); + return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL)); +} + static bus_dmamap_t alloc_dmamap(bus_dma_tag_t dmat, int flags) { u_long mapsize; bus_dmamap_t map; mapsize = sizeof(*map); mapsize += sizeof(struct sync_list) * dmat->common.nsegments; map = malloc(mapsize, M_DEVBUF, flags | M_ZERO); if (map == NULL) return (NULL); /* Initialize the new map */ STAILQ_INIT(&map->bpages); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } *mapp = alloc_dmamap(dmat, M_NOWAIT); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->bounce_flags & BF_COULD_BOUNCE) { /* Must bounce */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { free(*mapp, M_DEVBUF); return (error); } } bz = dmat->bounce_zone; (*mapp)->flags = DMAMAP_COULD_BOUNCE; /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = MAX(atop(dmat->common.maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BF_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; } if (error == 0) dmat->map_count++; else free(*mapp, M_DEVBUF); CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { /* Check we are destroying the correct map type */ if ((map->flags & DMAMAP_FROM_DMAMEM) != 0) panic("bounce_bus_dmamap_destroy: Invalid map freed\n"); if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) { KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, ("%s: Bounce zone when cannot bounce", __func__)); dmat->bounce_zone->map_count--; } free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { /* * XXX ARM64TODO: * This bus_dma implementation requires IO-Coherent architecutre. * If IO-Coherency is not guaranteed, the BUS_DMA_COHERENT flag has * to be implented using non-cacheable memory. */ vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else if ((flags & BUS_DMA_COHERENT) != 0 && (dmat->bounce_flags & BF_COHERENT) == 0) /* * If we have a non-coherent tag, and are trying to allocate * a coherent block of memory it needs to be uncached. */ attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Create the map, but don't set the could bounce flag as * this allocation should never bounce; */ *mapp = alloc_dmamap(dmat, mflags); if (*mapp == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } (*mapp)->flags = DMAMAP_FROM_DMAMEM; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if ((dmat->common.maxsize <= PAGE_SIZE) && (dmat->common.alignment <= dmat->common.maxsize) && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); } else if (dmat->common.nsegments >= howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->common.alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr(dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } else { *vaddr = (void *)kmem_alloc_contig(dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); free(*mapp, M_DEVBUF); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * Check the map came from bounce_bus_dmamem_alloc, so the map * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else kmem_free((vm_offset_t)vaddr, dmat->common.maxsize); free(map, M_DEVBUF); dmat->map_count--; CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } +static bool +_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen, + int *pagesneeded) +{ + bus_addr_t curaddr; + bus_size_t sgsize; + int count; + + /* + * Count the number of bounce pages needed in order to + * complete this transfer + */ + count = 0; + curaddr = buf; + while (buflen != 0) { + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, + PAGE_SIZE - (curaddr & PAGE_MASK)); + if (pagesneeded == NULL) + return (true); + count++; + } + curaddr += sgsize; + buflen -= sgsize; + } + + if (pagesneeded != NULL) + *pagesneeded = count; + return (count != 0); +} + static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { - bus_addr_t curaddr; - bus_size_t sgsize; if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - curaddr = buf; - while (buflen != 0) { - sgsize = MIN(buflen, dmat->common.maxsegsz); - if (bus_dma_run_filter(&dmat->common, curaddr)) { - sgsize = MIN(sgsize, - PAGE_SIZE - (curaddr & PAGE_MASK)); - map->pagesneeded++; - } - curaddr += sgsize; - buflen -= sgsize; - } + _bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded); CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; bus_size_t sg_len; if ((map->flags & DMAMAP_COULD_BOUNCE) != 0 && map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize; bus_addr_t curaddr, sl_end; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; sl_end = 0; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->common.nsegments) break; sl++; sl->vaddr = 0; sl->paddr = curaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not in " "vm_page_array", __func__, curaddr)); } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize, max_sgsize; bus_addr_t curaddr, sl_pend; vm_offset_t kvaddr, vaddr, sl_vend; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; sl_pend = 0; sl_vend = 0; while (buflen > 0) { /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { sgsize = MIN(sgsize, max_sgsize); if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (curaddr != sl_pend)) { if (++map->sync_count > dmat->common.nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->paddr = curaddr; if (kvaddr != 0) { sl->pages = NULL; } else { sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not " "in vm_page_array", __func__, curaddr)); } sl->datacount = sgsize; } else sl->datacount += sgsize; } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if ((map->flags & DMAMAP_COULD_BOUNCE) == 0) return; map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; } static void dma_preread_safe(vm_offset_t va, vm_size_t size) { /* * Write back any partial cachelines immediately before and * after the DMA region. */ if (va & (dcache_line_size - 1)) cpu_dcache_wb_range(va, 1); if ((va + size) & (dcache_line_size - 1)) cpu_dcache_wb_range(va + size, 1); cpu_dcache_inv_range(va, size); } static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) { uint32_t len, offset; vm_page_t m; vm_paddr_t pa; vm_offset_t va, tempva; bus_size_t size; offset = sl->paddr & PAGE_MASK; m = sl->pages; size = sl->datacount; pa = sl->paddr; for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { tempva = 0; if (sl->vaddr == 0) { len = min(PAGE_SIZE - offset, size); tempva = pmap_quick_enter_page(m); va = tempva | offset; KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx", VM_PAGE_TO_PHYS(m) | offset, pa)); } else { len = sl->datacount; va = sl->vaddr; } switch (op) { case BUS_DMASYNC_PREWRITE: case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: cpu_dcache_wb_range(va, len); break; case BUS_DMASYNC_PREREAD: /* * An mbuf may start in the middle of a cacheline. There * will be no cpu writes to the beginning of that line * (which contains the mbuf header) while dma is in * progress. Handle that case by doing a writeback of * just the first cacheline before invalidating the * overall buffer. Any mbuf in a chain may have this * misalignment. Buffers which are not mbufs bounce if * they are not aligned to a cacheline. */ dma_preread_safe(va, len); break; case BUS_DMASYNC_POSTREAD: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: cpu_dcache_inv_range(va, len); break; default: panic("unsupported combination of sync operations: " "0x%08x\n", op); } if (tempva != 0) pmap_quick_remove_page(tempva); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; if (op == BUS_DMASYNC_POSTWRITE) return; if ((op & BUS_DMASYNC_POSTREAD) != 0) { /* * Wait for any DMA operations to complete before the bcopy. */ dsb(sy); } if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } else if ((op & BUS_DMASYNC_PREREAD) != 0) { while (bpage != NULL) { if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_wbinv_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { if ((dmat->bounce_flags & BF_COHERENT) == 0) cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } /* * Cache maintenance for normal (non-COHERENT non-bounce) buffers. */ if (map->sync_count != 0) { sl = &map->slist[0]; end = &map->slist[map->sync_count]; CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x " "performing sync", __func__, dmat, op); for ( ; sl != end; ++sl) dma_dcache_sync(sl, op); } if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) { /* * Wait for the bcopy to complete before any DMA operations. */ dsb(sy); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->common.alignment <= bz->alignment) && (dmat->common.lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT((map->flags & DMAMAP_COULD_BOUNCE) != 0, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, + .id_mapped = bounce_bus_dma_id_mapped, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bus_dmamap_load_ma_triv, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync }; Index: head/sys/arm64/include/bus_dma.h =================================================================== --- head/sys/arm64/include/bus_dma.h (revision 347835) +++ head/sys/arm64/include/bus_dma.h (revision 347836) @@ -1,141 +1,153 @@ /* $FreeBSD$ */ #ifndef _MACHINE_BUS_DMA_H_ #define _MACHINE_BUS_DMA_H_ #define WANT_INLINE_DMAMAP #include #include /* + * Is DMA address 1:1 mapping of physical address + */ +static inline bool +bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->id_mapped(dmat, buf, buflen)); +} + +/* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static inline int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_create(dmat, flags, mapp)); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static inline int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_destroy(dmat, map)); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints listed in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static inline int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static inline void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; tc->impl->mem_free(dmat, vaddr, map); } /* * Release the mapping held by map. */ static inline void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_unload(dmat, map); } static inline void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_sync(dmat, map, op); } static inline int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, segp)); } static inline int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } static inline int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, segp)); } static inline void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); } static inline bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); } #endif /* !_MACHINE_BUS_DMA_H_ */ Index: head/sys/arm64/include/bus_dma_impl.h =================================================================== --- head/sys/arm64/include/bus_dma_impl.h (revision 347835) +++ head/sys/arm64/include/bus_dma_impl.h (revision 347836) @@ -1,96 +1,97 @@ /*- * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_BUS_DMA_IMPL_H_ #define _MACHINE_BUS_DMA_IMPL_H_ struct bus_dma_tag_common { struct bus_dma_impl *impl; struct bus_dma_tag_common *parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; bus_dma_lock_t *lockfunc; void *lockfuncarg; int ref_count; }; struct bus_dma_impl { int (*tag_create)(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int (*tag_destroy)(bus_dma_tag_t dmat); + bool (*id_mapped)(bus_dma_tag_t, vm_paddr_t, bus_size_t); int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op); }; void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr); int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat); extern struct bus_dma_impl bus_dma_bounce_impl; #endif Index: head/sys/compat/linuxkpi/common/src/linux_pci.c =================================================================== --- head/sys/compat/linuxkpi/common/src/linux_pci.c (revision 347835) +++ head/sys/compat/linuxkpi/common/src/linux_pci.c (revision 347836) @@ -1,817 +1,844 @@ /*- * Copyright (c) 2015-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static device_probe_t linux_pci_probe; static device_attach_t linux_pci_attach; static device_detach_t linux_pci_detach; static device_suspend_t linux_pci_suspend; static device_resume_t linux_pci_resume; static device_shutdown_t linux_pci_shutdown; static device_method_t pci_methods[] = { DEVMETHOD(device_probe, linux_pci_probe), DEVMETHOD(device_attach, linux_pci_attach), DEVMETHOD(device_detach, linux_pci_detach), DEVMETHOD(device_suspend, linux_pci_suspend), DEVMETHOD(device_resume, linux_pci_resume), DEVMETHOD(device_shutdown, linux_pci_shutdown), DEVMETHOD_END }; struct linux_dma_priv { uint64_t dma_mask; struct mtx lock; bus_dma_tag_t dmat; struct pctrie ptree; }; #define DMA_PRIV_LOCK(priv) mtx_lock(&(priv)->lock) #define DMA_PRIV_UNLOCK(priv) mtx_unlock(&(priv)->lock) static int linux_pdev_dma_init(struct pci_dev *pdev) { struct linux_dma_priv *priv; int error; priv = malloc(sizeof(*priv), M_DEVBUF, M_WAITOK | M_ZERO); pdev->dev.dma_priv = priv; mtx_init(&priv->lock, "lkpi-priv-dma", NULL, MTX_DEF); pctrie_init(&priv->ptree); /* create a default DMA tag */ error = linux_dma_tag_init(&pdev->dev, DMA_BIT_MASK(64)); if (error) { mtx_destroy(&priv->lock); free(priv, M_DEVBUF); pdev->dev.dma_priv = NULL; } return (error); } static int linux_pdev_dma_uninit(struct pci_dev *pdev) { struct linux_dma_priv *priv; priv = pdev->dev.dma_priv; if (priv->dmat) bus_dma_tag_destroy(priv->dmat); mtx_destroy(&priv->lock); free(priv, M_DEVBUF); pdev->dev.dma_priv = NULL; return (0); } int linux_dma_tag_init(struct device *dev, u64 dma_mask) { struct linux_dma_priv *priv; int error; priv = dev->dma_priv; if (priv->dmat) { if (priv->dma_mask == dma_mask) return (0); bus_dma_tag_destroy(priv->dmat); } priv->dma_mask = dma_mask; error = bus_dma_tag_create(bus_get_dma_tag(dev->bsddev), 1, 0, /* alignment, boundary */ dma_mask, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ BUS_SPACE_MAXSIZE, /* maxsize */ 1, /* nsegments */ BUS_SPACE_MAXSIZE, /* maxsegsz */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &priv->dmat); return (-error); } static struct pci_driver * linux_pci_find(device_t dev, const struct pci_device_id **idp) { const struct pci_device_id *id; struct pci_driver *pdrv; uint16_t vendor; uint16_t device; uint16_t subvendor; uint16_t subdevice; vendor = pci_get_vendor(dev); device = pci_get_device(dev); subvendor = pci_get_subvendor(dev); subdevice = pci_get_subdevice(dev); spin_lock(&pci_lock); list_for_each_entry(pdrv, &pci_drivers, links) { for (id = pdrv->id_table; id->vendor != 0; id++) { if (vendor == id->vendor && (PCI_ANY_ID == id->device || device == id->device) && (PCI_ANY_ID == id->subvendor || subvendor == id->subvendor) && (PCI_ANY_ID == id->subdevice || subdevice == id->subdevice)) { *idp = id; spin_unlock(&pci_lock); return (pdrv); } } } spin_unlock(&pci_lock); return (NULL); } static int linux_pci_probe(device_t dev) { const struct pci_device_id *id; struct pci_driver *pdrv; if ((pdrv = linux_pci_find(dev, &id)) == NULL) return (ENXIO); if (device_get_driver(dev) != &pdrv->bsddriver) return (ENXIO); device_set_desc(dev, pdrv->name); return (0); } static int linux_pci_attach(device_t dev) { struct resource_list_entry *rle; struct pci_bus *pbus; struct pci_dev *pdev; struct pci_devinfo *dinfo; struct pci_driver *pdrv; const struct pci_device_id *id; device_t parent; devclass_t devclass; int error; linux_set_current(curthread); pdrv = linux_pci_find(dev, &id); pdev = device_get_softc(dev); parent = device_get_parent(dev); devclass = device_get_devclass(parent); if (pdrv->isdrm) { dinfo = device_get_ivars(parent); device_set_ivars(dev, dinfo); } else { dinfo = device_get_ivars(dev); } pdev->dev.parent = &linux_root_device; pdev->dev.bsddev = dev; INIT_LIST_HEAD(&pdev->dev.irqents); pdev->devfn = PCI_DEVFN(pci_get_slot(dev), pci_get_function(dev)); pdev->device = dinfo->cfg.device; pdev->vendor = dinfo->cfg.vendor; pdev->subsystem_vendor = dinfo->cfg.subvendor; pdev->subsystem_device = dinfo->cfg.subdevice; pdev->class = pci_get_class(dev); pdev->revision = pci_get_revid(dev); pdev->pdrv = pdrv; kobject_init(&pdev->dev.kobj, &linux_dev_ktype); kobject_set_name(&pdev->dev.kobj, device_get_nameunit(dev)); kobject_add(&pdev->dev.kobj, &linux_root_device.kobj, kobject_name(&pdev->dev.kobj)); rle = linux_pci_get_rle(pdev, SYS_RES_IRQ, 0); if (rle != NULL) pdev->dev.irq = rle->start; else pdev->dev.irq = LINUX_IRQ_INVALID; pdev->irq = pdev->dev.irq; error = linux_pdev_dma_init(pdev); if (error) goto out_dma_init; pbus = malloc(sizeof(*pbus), M_DEVBUF, M_WAITOK | M_ZERO); pbus->self = pdev; pbus->number = pci_get_bus(dev); pdev->bus = pbus; spin_lock(&pci_lock); list_add(&pdev->links, &pci_devices); spin_unlock(&pci_lock); error = pdrv->probe(pdev, id); if (error) goto out_probe; return (0); out_probe: free(pdev->bus, M_DEVBUF); linux_pdev_dma_uninit(pdev); out_dma_init: spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); put_device(&pdev->dev); return (-error); } static int linux_pci_detach(device_t dev) { struct pci_dev *pdev; linux_set_current(curthread); pdev = device_get_softc(dev); pdev->pdrv->remove(pdev); free(pdev->bus, M_DEVBUF); linux_pdev_dma_uninit(pdev); spin_lock(&pci_lock); list_del(&pdev->links); spin_unlock(&pci_lock); device_set_desc(dev, NULL); put_device(&pdev->dev); return (0); } static int linux_pci_suspend(device_t dev) { const struct dev_pm_ops *pmops; struct pm_message pm = { }; struct pci_dev *pdev; int error; error = 0; linux_set_current(curthread); pdev = device_get_softc(dev); pmops = pdev->pdrv->driver.pm; if (pdev->pdrv->suspend != NULL) error = -pdev->pdrv->suspend(pdev, pm); else if (pmops != NULL && pmops->suspend != NULL) { error = -pmops->suspend(&pdev->dev); if (error == 0 && pmops->suspend_late != NULL) error = -pmops->suspend_late(&pdev->dev); } return (error); } static int linux_pci_resume(device_t dev) { const struct dev_pm_ops *pmops; struct pci_dev *pdev; int error; error = 0; linux_set_current(curthread); pdev = device_get_softc(dev); pmops = pdev->pdrv->driver.pm; if (pdev->pdrv->resume != NULL) error = -pdev->pdrv->resume(pdev); else if (pmops != NULL && pmops->resume != NULL) { if (pmops->resume_early != NULL) error = -pmops->resume_early(&pdev->dev); if (error == 0 && pmops->resume != NULL) error = -pmops->resume(&pdev->dev); } return (error); } static int linux_pci_shutdown(device_t dev) { struct pci_dev *pdev; linux_set_current(curthread); pdev = device_get_softc(dev); if (pdev->pdrv->shutdown != NULL) pdev->pdrv->shutdown(pdev); return (0); } static int _linux_pci_register_driver(struct pci_driver *pdrv, devclass_t dc) { int error; linux_set_current(curthread); spin_lock(&pci_lock); list_add(&pdrv->links, &pci_drivers); spin_unlock(&pci_lock); pdrv->bsddriver.name = pdrv->name; pdrv->bsddriver.methods = pci_methods; pdrv->bsddriver.size = sizeof(struct pci_dev); mtx_lock(&Giant); error = devclass_add_driver(dc, &pdrv->bsddriver, BUS_PASS_DEFAULT, &pdrv->bsdclass); mtx_unlock(&Giant); return (-error); } int linux_pci_register_driver(struct pci_driver *pdrv) { devclass_t dc; dc = devclass_find("pci"); if (dc == NULL) return (-ENXIO); pdrv->isdrm = false; return (_linux_pci_register_driver(pdrv, dc)); } int linux_pci_register_drm_driver(struct pci_driver *pdrv) { devclass_t dc; dc = devclass_create("vgapci"); if (dc == NULL) return (-ENXIO); pdrv->isdrm = true; pdrv->name = "drmn"; return (_linux_pci_register_driver(pdrv, dc)); } void linux_pci_unregister_driver(struct pci_driver *pdrv) { devclass_t bus; bus = devclass_find("pci"); spin_lock(&pci_lock); list_del(&pdrv->links); spin_unlock(&pci_lock); mtx_lock(&Giant); if (bus != NULL) devclass_delete_driver(bus, &pdrv->bsddriver); mtx_unlock(&Giant); } void linux_pci_unregister_drm_driver(struct pci_driver *pdrv) { devclass_t bus; bus = devclass_find("vgapci"); spin_lock(&pci_lock); list_del(&pdrv->links); spin_unlock(&pci_lock); mtx_lock(&Giant); if (bus != NULL) devclass_delete_driver(bus, &pdrv->bsddriver); mtx_unlock(&Giant); } CTASSERT(sizeof(dma_addr_t) <= sizeof(uint64_t)); struct linux_dma_obj { void *vaddr; uint64_t dma_addr; bus_dmamap_t dmamap; }; static uma_zone_t linux_dma_trie_zone; static uma_zone_t linux_dma_obj_zone; static void linux_dma_init(void *arg) { linux_dma_trie_zone = uma_zcreate("linux_dma_pctrie", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 0); linux_dma_obj_zone = uma_zcreate("linux_dma_object", sizeof(struct linux_dma_obj), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_init, NULL); static void linux_dma_uninit(void *arg) { uma_zdestroy(linux_dma_obj_zone); uma_zdestroy(linux_dma_trie_zone); } SYSUNINIT(linux_dma, SI_SUB_DRIVERS, SI_ORDER_THIRD, linux_dma_uninit, NULL); static void * linux_dma_trie_alloc(struct pctrie *ptree) { return (uma_zalloc(linux_dma_trie_zone, 0)); } static void linux_dma_trie_free(struct pctrie *ptree, void *node) { uma_zfree(linux_dma_trie_zone, node); } PCTRIE_DEFINE(LINUX_DMA, linux_dma_obj, dma_addr, linux_dma_trie_alloc, linux_dma_trie_free); void * linux_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { struct linux_dma_priv *priv; vm_paddr_t high; size_t align; void *mem; if (dev == NULL || dev->dma_priv == NULL) { *dma_handle = 0; return (NULL); } priv = dev->dma_priv; if (priv->dma_mask) high = priv->dma_mask; else if (flag & GFP_DMA32) high = BUS_SPACE_MAXADDR_32BIT; else high = BUS_SPACE_MAXADDR; align = PAGE_SIZE << get_order(size); mem = (void *)kmem_alloc_contig(size, flag, 0, high, align, 0, VM_MEMATTR_DEFAULT); if (mem != NULL) { *dma_handle = linux_dma_map_phys(dev, vtophys(mem), size); if (*dma_handle == 0) { kmem_free((vm_offset_t)mem, size); mem = NULL; } } else { *dma_handle = 0; } return (mem); } +#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) dma_addr_t linux_dma_map_phys(struct device *dev, vm_paddr_t phys, size_t len) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; int error, nseg; bus_dma_segment_t seg; priv = dev->dma_priv; + /* + * If the resultant mapping will be entirely 1:1 with the + * physical address, short-circuit the remainder of the + * bus_dma API. This avoids tracking collisions in the pctrie + * with the additional benefit of reducing overhead. + */ + if (bus_dma_id_mapped(priv->dmat, phys, len)) + return (phys); + obj = uma_zalloc(linux_dma_obj_zone, 0); DMA_PRIV_LOCK(priv); if (bus_dmamap_create(priv->dmat, 0, &obj->dmamap) != 0) { DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } nseg = -1; if (_bus_dmamap_load_phys(priv->dmat, obj->dmamap, phys, len, BUS_DMA_NOWAIT, &seg, &nseg) != 0) { bus_dmamap_destroy(priv->dmat, obj->dmamap); DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg)); obj->dma_addr = seg.ds_addr; error = LINUX_DMA_PCTRIE_INSERT(&priv->ptree, obj); if (error != 0) { bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); return (0); } DMA_PRIV_UNLOCK(priv); return (obj->dma_addr); } +#else +dma_addr_t +linux_dma_map_phys(struct device *dev, vm_paddr_t phys, size_t len) +{ + return (phys); +} +#endif +#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) void linux_dma_unmap(struct device *dev, dma_addr_t dma_addr, size_t len) { struct linux_dma_priv *priv; struct linux_dma_obj *obj; priv = dev->dma_priv; + if (pctrie_is_empty(&priv->ptree)) + return; + DMA_PRIV_LOCK(priv); obj = LINUX_DMA_PCTRIE_LOOKUP(&priv->ptree, dma_addr); if (obj == NULL) { DMA_PRIV_UNLOCK(priv); return; } LINUX_DMA_PCTRIE_REMOVE(&priv->ptree, dma_addr); bus_dmamap_unload(priv->dmat, obj->dmamap); bus_dmamap_destroy(priv->dmat, obj->dmamap); DMA_PRIV_UNLOCK(priv); uma_zfree(linux_dma_obj_zone, obj); } +#else +void +linux_dma_unmap(struct device *dev, dma_addr_t dma_addr, size_t len) +{ +} +#endif int linux_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct linux_dma_priv *priv; struct scatterlist *sg; int i, nseg; bus_dma_segment_t seg; priv = dev->dma_priv; DMA_PRIV_LOCK(priv); /* create common DMA map in the first S/G entry */ if (bus_dmamap_create(priv->dmat, 0, &sgl->dma_map) != 0) { DMA_PRIV_UNLOCK(priv); return (0); } /* load all S/G list entries */ for_each_sg(sgl, sg, nents, i) { nseg = -1; if (_bus_dmamap_load_phys(priv->dmat, sgl->dma_map, sg_phys(sg), sg->length, BUS_DMA_NOWAIT, &seg, &nseg) != 0) { bus_dmamap_unload(priv->dmat, sgl->dma_map); bus_dmamap_destroy(priv->dmat, sgl->dma_map); DMA_PRIV_UNLOCK(priv); return (0); } KASSERT(nseg == 0, ("More than one segment (nseg=%d)", nseg + 1)); sg_dma_address(sg) = seg.ds_addr; } DMA_PRIV_UNLOCK(priv); return (nents); } void linux_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, struct dma_attrs *attrs) { struct linux_dma_priv *priv; priv = dev->dma_priv; DMA_PRIV_LOCK(priv); bus_dmamap_unload(priv->dmat, sgl->dma_map); bus_dmamap_destroy(priv->dmat, sgl->dma_map); DMA_PRIV_UNLOCK(priv); } struct dma_pool { struct device *pool_device; uma_zone_t pool_zone; struct mtx pool_lock; bus_dma_tag_t pool_dmat; size_t pool_entry_size; struct pctrie pool_ptree; }; #define DMA_POOL_LOCK(pool) mtx_lock(&(pool)->pool_lock) #define DMA_POOL_UNLOCK(pool) mtx_unlock(&(pool)->pool_lock) static inline int dma_pool_obj_ctor(void *mem, int size, void *arg, int flags) { struct linux_dma_obj *obj = mem; struct dma_pool *pool = arg; int error, nseg; bus_dma_segment_t seg; nseg = -1; DMA_POOL_LOCK(pool); error = _bus_dmamap_load_phys(pool->pool_dmat, obj->dmamap, vtophys(obj->vaddr), pool->pool_entry_size, BUS_DMA_NOWAIT, &seg, &nseg); DMA_POOL_UNLOCK(pool); if (error != 0) { return (error); } KASSERT(++nseg == 1, ("More than one segment (nseg=%d)", nseg)); obj->dma_addr = seg.ds_addr; return (0); } static void dma_pool_obj_dtor(void *mem, int size, void *arg) { struct linux_dma_obj *obj = mem; struct dma_pool *pool = arg; DMA_POOL_LOCK(pool); bus_dmamap_unload(pool->pool_dmat, obj->dmamap); DMA_POOL_UNLOCK(pool); } static int dma_pool_obj_import(void *arg, void **store, int count, int domain __unused, int flags) { struct dma_pool *pool = arg; struct linux_dma_priv *priv; struct linux_dma_obj *obj; int error, i; priv = pool->pool_device->dma_priv; for (i = 0; i < count; i++) { obj = uma_zalloc(linux_dma_obj_zone, flags); if (obj == NULL) break; error = bus_dmamem_alloc(pool->pool_dmat, &obj->vaddr, BUS_DMA_NOWAIT, &obj->dmamap); if (error!= 0) { uma_zfree(linux_dma_obj_zone, obj); break; } store[i] = obj; } return (i); } static void dma_pool_obj_release(void *arg, void **store, int count) { struct dma_pool *pool = arg; struct linux_dma_priv *priv; struct linux_dma_obj *obj; int i; priv = pool->pool_device->dma_priv; for (i = 0; i < count; i++) { obj = store[i]; bus_dmamem_free(pool->pool_dmat, obj->vaddr, obj->dmamap); uma_zfree(linux_dma_obj_zone, obj); } } struct dma_pool * linux_dma_pool_create(char *name, struct device *dev, size_t size, size_t align, size_t boundary) { struct linux_dma_priv *priv; struct dma_pool *pool; priv = dev->dma_priv; pool = kzalloc(sizeof(*pool), GFP_KERNEL); pool->pool_device = dev; pool->pool_entry_size = size; if (bus_dma_tag_create(bus_get_dma_tag(dev->bsddev), align, boundary, /* alignment, boundary */ priv->dma_mask, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsz */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &pool->pool_dmat)) { kfree(pool); return (NULL); } pool->pool_zone = uma_zcache_create(name, -1, dma_pool_obj_ctor, dma_pool_obj_dtor, NULL, NULL, dma_pool_obj_import, dma_pool_obj_release, pool, 0); mtx_init(&pool->pool_lock, "lkpi-dma-pool", NULL, MTX_DEF); pctrie_init(&pool->pool_ptree); return (pool); } void linux_dma_pool_destroy(struct dma_pool *pool) { uma_zdestroy(pool->pool_zone); bus_dma_tag_destroy(pool->pool_dmat); mtx_destroy(&pool->pool_lock); kfree(pool); } void * linux_dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { struct linux_dma_obj *obj; obj = uma_zalloc_arg(pool->pool_zone, pool, mem_flags); if (obj == NULL) return (NULL); DMA_POOL_LOCK(pool); if (LINUX_DMA_PCTRIE_INSERT(&pool->pool_ptree, obj) != 0) { DMA_POOL_UNLOCK(pool); uma_zfree_arg(pool->pool_zone, obj, pool); return (NULL); } DMA_POOL_UNLOCK(pool); *handle = obj->dma_addr; return (obj->vaddr); } void linux_dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma_addr) { struct linux_dma_obj *obj; DMA_POOL_LOCK(pool); obj = LINUX_DMA_PCTRIE_LOOKUP(&pool->pool_ptree, dma_addr); if (obj == NULL) { DMA_POOL_UNLOCK(pool); return; } LINUX_DMA_PCTRIE_REMOVE(&pool->pool_ptree, dma_addr); DMA_POOL_UNLOCK(pool); uma_zfree_arg(pool->pool_zone, obj, pool); } Index: head/sys/sys/bus_dma.h =================================================================== --- head/sys/sys/bus_dma.h (revision 347835) +++ head/sys/sys/bus_dma.h (revision 347836) @@ -1,306 +1,310 @@ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * SPDX-License-Identifier: (BSD-2-Clause-NetBSD AND BSD-4-Clause) * * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _BUS_DMA_H_ #define _BUS_DMA_H_ +#ifdef _KERNEL #include +#endif /* * Machine independent interface for mapping physical addresses to peripheral * bus 'physical' addresses, and assisting with DMA operations. * * XXX This file is always included from and should not * (yet) be included directly. */ /* * Flags used in various bus DMA methods. */ #define BUS_DMA_WAITOK 0x00 /* safe to sleep (pseudo-flag) */ #define BUS_DMA_NOWAIT 0x01 /* not safe to sleep */ #define BUS_DMA_ALLOCNOW 0x02 /* perform resource allocation now */ #define BUS_DMA_COHERENT 0x04 /* hint: map memory in a coherent way */ #define BUS_DMA_ZERO 0x08 /* allocate zero'ed memory */ #define BUS_DMA_BUS1 0x10 /* placeholders for bus functions... */ #define BUS_DMA_BUS2 0x20 #define BUS_DMA_BUS3 0x40 #define BUS_DMA_BUS4 0x80 /* * The following two flags are non-standard or specific to only certain * architectures */ #define BUS_DMA_NOWRITE 0x100 #define BUS_DMA_NOCACHE 0x200 /* * The following flag is a DMA tag hint that the page offset of the * loaded kernel virtual address must be preserved in the first * physical segment address, when the KVA is loaded into DMA. */ #define BUS_DMA_KEEP_PG_OFFSET 0x400 #define BUS_DMA_LOAD_MBUF 0x800 /* Forwards needed by prototypes below. */ union ccb; struct bio; struct mbuf; struct memdesc; struct pmap; struct uio; /* * Operations performed by bus_dmamap_sync(). */ #define BUS_DMASYNC_PREREAD 1 #define BUS_DMASYNC_POSTREAD 2 #define BUS_DMASYNC_PREWRITE 4 #define BUS_DMASYNC_POSTWRITE 8 /* * bus_dma_segment_t * * Describes a single contiguous DMA transaction. Values * are suitable for programming into DMA registers. */ typedef struct bus_dma_segment { bus_addr_t ds_addr; /* DMA address */ bus_size_t ds_len; /* length of transfer */ } bus_dma_segment_t; +#ifdef _KERNEL /* * A function that returns 1 if the address cannot be accessed by * a device and 0 if it can be. */ typedef int bus_dma_filter_t(void *, bus_addr_t); /* * Generic helper function for manipulating mutexes. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op); /* * Allocate a device specific dma_tag encapsulating the constraints of * the parent tag in addition to other restrictions specified: * * alignment: Alignment for segments. * boundary: Boundary that segments cannot cross. * lowaddr: Low restricted address that cannot appear in a mapping. * highaddr: High restricted address that cannot appear in a mapping. * filtfunc: An optional function to further test if an address * within the range of lowaddr and highaddr cannot appear * in a mapping. * filtfuncarg: An argument that will be passed to filtfunc in addition * to the address to test. * maxsize: Maximum mapping size supported by this tag. * nsegments: Number of discontinuities allowed in maps. * maxsegsz: Maximum size of a segment in the map. * flags: Bus DMA flags. * lockfunc: An optional function to handle driver-defined lock * operations. * lockfuncarg: An argument that will be passed to lockfunc in addition * to the lock operation. * dmat: A pointer to set to a valid dma tag should the return * value of this function indicate success. */ /* XXX Should probably allow specification of alignment */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filtfunc, void *filtfuncarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); /* * Set the memory domain to be used for allocations. * * Automatic for PCI devices. Must be set prior to creating maps or * allocating memory. */ int bus_dma_tag_set_domain(bus_dma_tag_t dmat, int domain); int bus_dma_tag_destroy(bus_dma_tag_t dmat); /* * A function that processes a successfully loaded dma map or an error * from a delayed load map. */ typedef void bus_dmamap_callback_t(void *, bus_dma_segment_t *, int, int); /* * Like bus_dmamap_callback but includes map size in bytes. This is * defined as a separate interface to maintain compatibility for users * of bus_dmamap_callback_t--at some point these interfaces should be merged. */ typedef void bus_dmamap_callback2_t(void *, bus_dma_segment_t *, int, bus_size_t, int); /* * Map the buffer buf into bus space using the dmamap map. */ int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for mbufs. Note the use of the * bus_dmamap_callback2_t interface. */ int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *mbuf, bus_dmamap_callback2_t *callback, void *callback_arg, int flags); int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *mbuf, bus_dma_segment_t *segs, int *nsegs, int flags); /* * Like bus_dmamap_load but for uios. Note the use of the * bus_dmamap_callback2_t interface. */ int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *ui, bus_dmamap_callback2_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for cam control blocks. */ int bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for bios. */ int bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Loads any memory descriptor. */ int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Placeholder for use by busdma implementations which do not benefit * from optimized procedure to load an array of vm_page_t. Falls back * to do _bus_dmamap_load_phys() in loop. */ int bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); #ifdef WANT_INLINE_DMAMAP #define BUS_DMAMAP_OP static inline #else #define BUS_DMAMAP_OP #endif /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ BUS_DMAMAP_OP int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ BUS_DMAMAP_OP int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map); /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints listed in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ BUS_DMAMAP_OP int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); /* * Free a piece of memory and its allocated dmamap, that was allocated * via bus_dmamem_alloc. */ BUS_DMAMAP_OP void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); /* * Perform a synchronization operation on the given map. If the map * is NULL we have a fully IO-coherent system. */ BUS_DMAMAP_OP void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t dmamap, bus_dmasync_op_t op); /* * Release the mapping held by map. */ BUS_DMAMAP_OP void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t dmamap); #undef BUS_DMAMAP_OP +#endif /* _KERNEL */ #endif /* _BUS_DMA_H_ */ Index: head/sys/x86/include/bus_dma.h =================================================================== --- head/sys/x86/include/bus_dma.h (revision 347835) +++ head/sys/x86/include/bus_dma.h (revision 347836) @@ -1,183 +1,195 @@ /*- * Copyright (c) 2017 Jason A. Harmening. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_BUS_DMA_H_ #define _X86_BUS_DMA_H_ #define WANT_INLINE_DMAMAP #include #include #include /* + * Is DMA address 1:1 mapping of physical address + */ +static inline bool +bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->id_mapped(dmat, buf, buflen)); +} + +/* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static inline int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_create(dmat, flags, mapp)); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static inline int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_destroy(dmat, map)); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static inline int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static inline void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; tc->impl->mem_free(dmat, vaddr, map); } /* * Release the mapping held by map. */ static inline void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bus_dma_tag_common *tc; if (map != NULL) { tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_unload(dmat, map); } } static inline void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bus_dma_tag_common *tc; if (map != NULL) { tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_sync(dmat, map, op); } } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static inline int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, segp)); } static inline int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static inline int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, segp)); } static inline void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dma_tag_common *tc; if (map != NULL) { tc = (struct bus_dma_tag_common *)dmat; tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); } } static inline bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); } #endif /* !_X86_BUS_DMA_H_ */ Index: head/sys/x86/include/busdma_impl.h =================================================================== --- head/sys/x86/include/busdma_impl.h (revision 347835) +++ head/sys/x86/include/busdma_impl.h (revision 347836) @@ -1,100 +1,101 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_BUSDMA_IMPL_H #define __X86_BUSDMA_IMPL_H struct bus_dma_tag_common { struct bus_dma_impl *impl; struct bus_dma_tag_common *parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; bus_dma_lock_t *lockfunc; void *lockfuncarg; int ref_count; int domain; }; struct bus_dma_impl { int (*tag_create)(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int (*tag_destroy)(bus_dma_tag_t dmat); int (*tag_set_domain)(bus_dma_tag_t); + bool (*id_mapped)(bus_dma_tag_t, vm_paddr_t, bus_size_t); int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op); }; void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); int bus_dma_run_filter(struct bus_dma_tag_common *dmat, vm_paddr_t paddr); int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat); extern struct bus_dma_impl bus_dma_bounce_impl; #endif Index: head/sys/x86/iommu/busdma_dmar.c =================================================================== --- head/sys/x86/iommu/busdma_dmar.c (revision 347835) +++ head/sys/x86/iommu/busdma_dmar.c (revision 347836) @@ -1,939 +1,947 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * busdma_dmar.c, the implementation of the busdma(9) interface using * DMAR units from Intel VT-d. */ static bool dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) { char str[128], *env; int default_bounce; bool ret; static const char bounce_str[] = "bounce"; static const char dmar_str[] = "dmar"; default_bounce = 0; env = kern_getenv("hw.busdma.default"); if (env != NULL) { if (strcmp(env, bounce_str) == 0) default_bounce = 1; else if (strcmp(env, dmar_str) == 0) default_bounce = 0; freeenv(env); } snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", domain, bus, slot, func); env = kern_getenv(str); if (env == NULL) return (default_bounce != 0); if (strcmp(env, bounce_str) == 0) ret = true; else if (strcmp(env, dmar_str) == 0) ret = false; else ret = default_bounce != 0; freeenv(env); return (ret); } /* * Given original device, find the requester ID that will be seen by * the DMAR unit and used for page table lookup. PCI bridges may take * ownership of transactions from downstream devices, so it may not be * the same as the BSF of the target device. In those cases, all * devices downstream of the bridge must share a single mapping * domain, and must collectively be assigned to use either DMAR or * bounce mapping. */ device_t dmar_get_requester(device_t dev, uint16_t *rid) { devclass_t pci_class; device_t l, pci, pcib, pcip, pcibp, requester; int cap_offset; uint16_t pcie_flags; bool bridge_is_pcie; pci_class = devclass_find("pci"); l = requester = dev; *rid = pci_get_rid(dev); /* * Walk the bridge hierarchy from the target device to the * host port to find the translating bridge nearest the DMAR * unit. */ for (;;) { pci = device_get_parent(l); KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent " "for %s", device_get_name(dev), device_get_name(l))); KASSERT(device_get_devclass(pci) == pci_class, ("dmar_get_requester(%s): non-pci parent %s for %s", device_get_name(dev), device_get_name(pci), device_get_name(l))); pcib = device_get_parent(pci); KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge " "for %s", device_get_name(dev), device_get_name(pci))); /* * The parent of our "bridge" isn't another PCI bus, * so pcib isn't a PCI->PCI bridge but rather a host * port, and the requester ID won't be translated * further. */ pcip = device_get_parent(pcib); if (device_get_devclass(pcip) != pci_class) break; pcibp = device_get_parent(pcip); if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { /* * Do not stop the loop even if the target * device is PCIe, because it is possible (but * unlikely) to have a PCI->PCIe bridge * somewhere in the hierarchy. */ l = pcib; } else { /* * Device is not PCIe, it cannot be seen as a * requester by DMAR unit. Check whether the * bridge is PCIe. */ bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, &cap_offset) == 0; requester = pcib; /* * Check for a buggy PCIe/PCI bridge that * doesn't report the express capability. If * the bridge above it is express but isn't a * PCI bridge, then we know pcib is actually a * PCIe/PCI bridge. */ if (!bridge_is_pcie && pci_find_cap(pcibp, PCIY_EXPRESS, &cap_offset) == 0) { pcie_flags = pci_read_config(pcibp, cap_offset + PCIER_FLAGS, 2); if ((pcie_flags & PCIEM_FLAGS_TYPE) != PCIEM_TYPE_PCI_BRIDGE) bridge_is_pcie = true; } if (bridge_is_pcie) { /* * The current device is not PCIe, but * the bridge above it is. This is a * PCIe->PCI bridge. Assume that the * requester ID will be the secondary * bus number with slot and function * set to zero. * * XXX: Doesn't handle the case where * the bridge is PCIe->PCI-X, and the * bridge will only take ownership of * requests in some cases. We should * provide context entries with the * same page tables for taken and * non-taken transactions. */ *rid = PCI_RID(pci_get_bus(l), 0, 0); l = pcibp; } else { /* * Neither the device nor the bridge * above it are PCIe. This is a * conventional PCI->PCI bridge, which * will use the bridge's BSF as the * requester ID. */ *rid = pci_get_rid(pcib); l = pcib; } } } return (requester); } struct dmar_ctx * dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr) { device_t requester; struct dmar_ctx *ctx; bool disabled; uint16_t rid; requester = dmar_get_requester(dev, &rid); /* * If the user requested the IOMMU disabled for the device, we * cannot disable the DMAR, due to possibility of other * devices on the same DMAR still requiring translation. * Instead provide the identity mapping for the device * context. */ disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester), pci_get_bus(requester), pci_get_slot(requester), pci_get_function(requester)); ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr); if (ctx == NULL) return (NULL); if (disabled) { /* * Keep the first reference on context, release the * later refs. */ DMAR_LOCK(dmar); if ((ctx->flags & DMAR_CTX_DISABLED) == 0) { ctx->flags |= DMAR_CTX_DISABLED; DMAR_UNLOCK(dmar); } else { dmar_free_ctx_locked(dmar, ctx); } ctx = NULL; } return (ctx); } bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child) { struct dmar_unit *dmar; struct dmar_ctx *ctx; bus_dma_tag_t res; dmar = dmar_find(child, bootverbose); /* Not in scope of any DMAR ? */ if (dmar == NULL) return (NULL); if (!dmar->dma_enabled) return (NULL); dmar_quirks_pre_use(dmar); dmar_instantiate_rmrr_ctxs(dmar); ctx = dmar_instantiate_ctx(dmar, child, false); res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag; return (res); } static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map"); static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map); static int dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_dmar *newtag, *oldtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_dmar), (void **)&newtag); if (error != 0) goto out; oldtag = (struct bus_dma_tag_dmar *)parent; newtag->common.impl = &bus_dma_dmar_impl; newtag->ctx = oldtag->ctx; newtag->owner = oldtag->owner; *dmat = (bus_dma_tag_t)newtag; out: CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int dmar_bus_dma_tag_set_domain(bus_dma_tag_t dmat) { return (0); } static int dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1) { struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent; int error; error = 0; dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (struct bus_dma_tag_dmar *)dmat->common.parent; if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 1) { if (dmat == &dmat->ctx->ctx_tag) dmar_free_ctx(dmat->ctx); free_domain(dmat->segments, M_DMAR_DMAMAP); free(dmat, M_DEVBUF); dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } +static bool +dmar_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + + return (false); +} + static int dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = malloc_domainset(sizeof(*map), M_DMAR_DMAMAP, DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); if (map == NULL) { *mapp = NULL; return (ENOMEM); } if (tag->segments == NULL) { tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * tag->common.nsegments, M_DMAR_DMAMAP, DOMAINSET_PREF(tag->common.domain), M_NOWAIT); if (tag->segments == NULL) { free_domain(map, M_DMAR_DMAMAP); *mapp = NULL; return (ENOMEM); } } TAILQ_INIT(&map->map_entries); map->tag = tag; map->locked = true; map->cansleep = false; tag->map_count++; *mapp = (bus_dmamap_t)map; return (0); } static int dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_domain *domain; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (map != NULL) { domain = tag->ctx->domain; DMAR_DOMAIN_LOCK(domain); if (!TAILQ_EMPTY(&map->map_entries)) { DMAR_DOMAIN_UNLOCK(domain); return (EBUSY); } DMAR_DOMAIN_UNLOCK(domain); free_domain(map, M_DMAR_DMAMAP); } tag->map_count--; return (0); } static int dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; int error, mflags; vm_memattr_t attr; error = dmar_bus_dmamap_create(dmat, flags, mapp); if (error != 0) return (error); mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : VM_MEMATTR_DEFAULT; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)*mapp; if (tag->common.maxsize < PAGE_SIZE && tag->common.alignment <= tag->common.maxsize && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, DOMAINSET_PREF(tag->common.domain), mflags); map->flags |= BUS_DMAMAP_DMAR_MALLOC; } else { *vaddr = (void *)kmem_alloc_attr_domainset( DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR, attr); map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC; } if (*vaddr == NULL) { dmar_bus_dmamap_destroy(dmat, *mapp); *mapp = NULL; return (ENOMEM); } return (0); } static void dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) { free_domain(vaddr, M_DEVBUF); map->flags &= ~BUS_DMAMAP_DMAR_MALLOC; } else { KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0, ("dmar_bus_dmamem_free for non alloced map %p", map)); kmem_free((vm_offset_t)vaddr, tag->common.maxsize); map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC; } dmar_bus_dmamap_destroy(dmat, map1); } static int dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp, struct dmar_map_entries_tailq *unroll_list) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry; dmar_gaddr_t size; bus_size_t buflen1; int error, idx, gas_flags, seg; KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset)); if (segs == NULL) segs = tag->segments; ctx = tag->ctx; domain = ctx->domain; seg = *segp; error = 0; idx = 0; while (buflen > 0) { seg++; if (seg >= tag->common.nsegments) { error = EFBIG; break; } buflen1 = buflen > tag->common.maxsegsz ? tag->common.maxsegsz : buflen; size = round_page(offset + buflen1); /* * (Too) optimistically allow split if there are more * then one segments left. */ gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0; if (seg + 1 < tag->common.nsegments) gas_flags |= DMAR_GM_CANSPLIT; error = dmar_gas_map(domain, &tag->common, size, offset, DMAR_MAP_ENTRY_READ | ((flags & BUS_DMA_NOWRITE) == 0 ? DMAR_MAP_ENTRY_WRITE : 0), gas_flags, ma + idx, &entry); if (error != 0) break; if ((gas_flags & DMAR_GM_CANSPLIT) != 0) { KASSERT(size >= entry->end - entry->start, ("split increased entry size %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); size = entry->end - entry->start; if (buflen1 > size) buflen1 = size; } else { KASSERT(entry->end - entry->start == size, ("no split allowed %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); } if (offset + buflen1 > size) buflen1 = size - offset; if (buflen1 > tag->common.maxsegsz) buflen1 = tag->common.maxsegsz; KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) == 0, ("alignment failed: ctx %p start 0x%jx offset %x " "align 0x%jx", ctx, (uintmax_t)entry->start, offset, (uintmax_t)tag->common.alignment)); KASSERT(entry->end <= tag->common.lowaddr || entry->start >= tag->common.highaddr, ("entry placement failed: ctx %p start 0x%jx end 0x%jx " "lowaddr 0x%jx highaddr 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.lowaddr, (uintmax_t)tag->common.highaddr)); KASSERT(dmar_test_boundary(entry->start + offset, buflen1, tag->common.boundary), ("boundary failed: ctx %p start 0x%jx end 0x%jx " "boundary 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); KASSERT(buflen1 <= tag->common.maxsegsz, ("segment too large: ctx %p start 0x%jx end 0x%jx " "buflen1 0x%jx maxsegsz 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); DMAR_DOMAIN_LOCK(domain); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); entry->flags |= DMAR_MAP_ENTRY_MAP; DMAR_DOMAIN_UNLOCK(domain); TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; idx += OFF_TO_IDX(trunc_page(offset + buflen1)); offset += buflen1; offset &= DMAR_PAGE_MASK; buflen -= buflen1; } if (error == 0) *segp = seg; return (error); } static int dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry, *entry1; struct dmar_map_entries_tailq unroll_list; int error; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->loads, 1); TAILQ_INIT(&unroll_list); error = dmar_bus_dmamap_load_something1(tag, map, ma, offset, buflen, flags, segs, segp, &unroll_list); if (error != 0) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ DMAR_DOMAIN_LOCK(domain); TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, entry1) { /* * No entries other than what we have created * during the failed run might have been * inserted there in between, since we own ctx * pglock. */ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); TAILQ_REMOVE(&unroll_list, entry, unroll_link); TAILQ_INSERT_TAIL(&domain->unload_entries, entry, dmamap_link); } DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); } if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && !map->cansleep) error = EINPROGRESS; if (error == EINPROGRESS) dmar_bus_schedule_dmamap(domain->dmar, map); return (error); } static int dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, flags, segs, segp)); } static int dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, mflags, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page(buf); pend = round_page(buf + buflen); offset = buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); mflags = map->cansleep ? M_WAITOK : M_NOWAIT; ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); if (ma == NULL) return (ENOMEM); fma = NULL; for (i = 0; i < ma_cnt; i++) { paddr = pstart + i * PAGE_SIZE; ma[i] = PHYS_TO_VM_PAGE(paddr); if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { /* * If PHYS_TO_VM_PAGE() returned NULL or the * vm_page was not initialized we'll use a * fake page. */ if (fma == NULL) { fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | mflags); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } } vm_page_initfake(&fma[i], pstart + i * PAGE_SIZE, VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(fma, M_DEVBUF); free(ma, M_DEVBUF); return (error); } static int dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, mflags, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page((vm_offset_t)buf); pend = round_page((vm_offset_t)buf + buflen); offset = (vm_offset_t)buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); mflags = map->cansleep ? M_WAITOK : M_NOWAIT; ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); if (ma == NULL) return (ENOMEM); fma = NULL; for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { if (pmap == kernel_pmap) paddr = pmap_kextract(pstart); else paddr = pmap_extract(pmap, pstart); ma[i] = PHYS_TO_VM_PAGE(paddr); if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { /* * If PHYS_TO_VM_PAGE() returned NULL or the * vm_page was not initialized we'll use a * fake page. */ if (fma == NULL) { fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | mflags); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } } vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); free(fma, M_DEVBUF); return (error); } static void dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dmamap_dmar *map; if (map1 == NULL) return; map = (struct bus_dmamap_dmar *)map1; map->mem = *mem; map->tag = (struct bus_dma_tag_dmar *)dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (!map->locked) { KASSERT(map->cansleep, ("map not locked and not sleepable context %p", map)); /* * We are called from the delayed context. Relock the * driver. */ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); map->locked = true; } if (segs == NULL) segs = tag->segments; return (segs); } /* * The limitations of busdma KPI forces the dmar to perform the actual * unload, consisting of the unmapping of the map entries page tables, * from the delayed context on i386, since page table page mapping * might require a sleep to be successfull. The unfortunate * consequence is that the DMA requests can be served some time after * the bus_dmamap_unload() call returned. * * On amd64, we assume that sf allocation cannot fail. */ static void dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_ctx *ctx; struct dmar_domain *domain; #if defined(__amd64__) struct dmar_map_entries_tailq entries; #endif tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->unloads, 1); #if defined(__i386__) DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); #else /* defined(__amd64__) */ TAILQ_INIT(&entries); DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); THREAD_NO_SLEEPING(); dmar_domain_unload(domain, &entries, false); THREAD_SLEEPING_OK(); KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx)); #endif } static void dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { } struct bus_dma_impl bus_dma_dmar_impl = { .tag_create = dmar_bus_dma_tag_create, .tag_destroy = dmar_bus_dma_tag_destroy, .tag_set_domain = dmar_bus_dma_tag_set_domain, + .id_mapped = dmar_bus_dma_id_mapped, .map_create = dmar_bus_dmamap_create, .map_destroy = dmar_bus_dmamap_destroy, .mem_alloc = dmar_bus_dmamem_alloc, .mem_free = dmar_bus_dmamem_free, .load_phys = dmar_bus_dmamap_load_phys, .load_buffer = dmar_bus_dmamap_load_buffer, .load_ma = dmar_bus_dmamap_load_ma, .map_waitok = dmar_bus_dmamap_waitok, .map_complete = dmar_bus_dmamap_complete, .map_unload = dmar_bus_dmamap_unload, .map_sync = dmar_bus_dmamap_sync, }; static void dmar_bus_task_dmamap(void *arg, int pending) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_unit *unit; unit = arg; DMAR_LOCK(unit); while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); tag = map->tag; map->cansleep = true; map->locked = false; bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); map->cansleep = false; if (map->locked) { (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_UNLOCK); } else map->locked = true; map->cansleep = false; DMAR_LOCK(unit); } DMAR_UNLOCK(unit); } static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map) { map->locked = false; DMAR_LOCK(unit); TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); } int dmar_init_busdma(struct dmar_unit *unit) { unit->dma_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); TAILQ_INIT(&unit->delayed_maps); TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit); unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK, taskqueue_thread_enqueue, &unit->delayed_taskqueue); taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, "dmar%d busdma taskq", unit->unit); return (0); } void dmar_fini_busdma(struct dmar_unit *unit) { if (unit->delayed_taskqueue == NULL) return; taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); taskqueue_free(unit->delayed_taskqueue); unit->delayed_taskqueue = NULL; } Index: head/sys/x86/x86/busdma_bounce.c =================================================================== --- head/sys/x86/x86/busdma_bounce.c (revision 347835) +++ head/sys/x86/x86/busdma_bounce.c (revision 347836) @@ -1,1319 +1,1347 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define MAX_BPAGES (Maxmem > atop(0x100000000ULL) ? 8192 : 512) #else #define MAX_BPAGES 8192 #endif enum { BUS_DMA_COULD_BOUNCE = 0x01, BUS_DMA_MIN_ALLOC_COMP = 0x02, BUS_DMA_KMEM_ALLOC = 0x04, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_offset_t dataoffs; /* page offset of client data */ vm_page_t datapage[2]; /* physical page(s) of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; int domain; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static struct bus_dmamap nobounce_dmamap; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); +static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, + bus_size_t buflen, int *pagesneeded); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); static int bounce_bus_dma_zone_setup(bus_dma_tag_t dmat) { struct bounce_zone *bz; int error; /* Must bounce */ if ((error = alloc_bounce_zone(dmat)) != 0) return (error); bz = dmat->bounce_zone; if (ptoa(bz->total_bpages) < dmat->common.maxsize) { int pages; pages = atop(dmat->common.maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(dmat, pages) < pages) return (ENOMEM); } /* Performed initial allocation */ dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; return (0); } /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if (parent != NULL && (newtag->common.filter != NULL || (parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if ((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && (flags & BUS_DMA_ALLOCNOW) != 0) error = bounce_bus_dma_zone_setup(newtag); else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } +static bool +bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) +{ + + if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0) + return (true); + return (!_bus_dmamap_pagesneeded(dmat, buf, buflen, NULL)); +} + /* * Update the domain for the tag. We may need to reallocate the zone and * bounce pages. */ static int bounce_bus_dma_tag_set_domain(bus_dma_tag_t dmat) { KASSERT(dmat->map_count == 0, ("bounce_bus_dma_tag_set_domain: Domain set after use.\n")); if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) == 0 || dmat->bounce_zone == NULL) return (0); dmat->bounce_flags &= ~BUS_DMA_MIN_ALLOC_COMP; return (bounce_bus_dma_zone_setup(dmat)); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy, parent; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free_domain(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc_domainset( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { /* Must bounce */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; *mapp = (bus_dmamap_t)malloc_domainset(sizeof(**mapp), M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = MAX(atop(dmat->common.maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; } else { *mapp = NULL; } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (map != NULL && map != &nobounce_dmamap) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; free_domain(map, M_DEVBUF); } dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; /* If we succeed, no mapping/bouncing will be required */ *mapp = NULL; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc_domainset( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if (dmat->common.maxsize <= PAGE_SIZE && dmat->common.alignment <= dmat->common.maxsize && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc_domainset(dmat->common.maxsize, M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), mflags); } else if (dmat->common.nsegments >= howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->common.alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr_domainset( DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } else { *vaddr = (void *)kmem_alloc_contig_domainset( DOMAINSET_PREF(dmat->common.domain), dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * dmamem does not need to be bounced, so the map should be * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if (map != NULL) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0) free_domain(vaddr, M_DEVBUF); else kmem_free((vm_offset_t)vaddr, dmat->common.maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } +static bool +_bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen, + int *pagesneeded) +{ + vm_paddr_t curaddr; + bus_size_t sgsize; + int count; + + /* + * Count the number of bounce pages needed in order to + * complete this transfer + */ + count = 0; + curaddr = buf; + while (buflen != 0) { + sgsize = MIN(buflen, dmat->common.maxsegsz); + if (bus_dma_run_filter(&dmat->common, curaddr)) { + sgsize = MIN(sgsize, + PAGE_SIZE - (curaddr & PAGE_MASK)); + if (pagesneeded == NULL) + return (true); + count++; + } + curaddr += sgsize; + buflen -= sgsize; + } + + if (pagesneeded != NULL) + *pagesneeded = count; + return (count != 0); +} + static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { - vm_paddr_t curaddr; - bus_size_t sgsize; if (map != &nobounce_dmamap && map->pagesneeded == 0) { - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - curaddr = buf; - while (buflen != 0) { - sgsize = MIN(buflen, dmat->common.maxsegsz); - if (bus_dma_run_filter(&dmat->common, curaddr)) { - sgsize = MIN(sgsize, - PAGE_SIZE - (curaddr & PAGE_MASK)); - map->pagesneeded++; - } - curaddr += sgsize; - buflen -= sgsize; - } + _bus_dmamap_pagesneeded(dmat, buf, buflen, &map->pagesneeded); CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; vm_paddr_t paddr; bus_size_t sg_len; if (map != &nobounce_dmamap && map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, int ma_offs, bus_size_t buflen, int flags) { bus_size_t sg_len, max_sgsize; int page_index; vm_paddr_t paddr; if (map != &nobounce_dmamap && map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ page_index = 0; while (buflen > 0) { paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; sg_len = PAGE_SIZE - ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sg_len = MIN(sg_len, max_sgsize); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); sg_len = MIN(sg_len, max_sgsize); KASSERT((sg_len & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); map->pagesneeded++; } if (((ma_offs + sg_len) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sg_len) & PAGE_MASK; KASSERT(buflen >= sg_len, ("Segment length overruns original buffer")); buflen -= sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; KASSERT(curaddr <= BUS_SPACE_MAXADDR, ("ds_addr %#jx > BUS_SPACE_MAXADDR %#jx; dmat %p fl %#x low %#jx " "hi %#jx", (uintmax_t)curaddr, (uintmax_t)BUS_SPACE_MAXADDR, dmat, dmat->bounce_flags, (uintmax_t)dmat->common.lowaddr, (uintmax_t)dmat->common.highaddr)); /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; vm_paddr_t curaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, 0, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize, max_sgsize; vm_paddr_t curaddr; vm_offset_t kvaddr, vaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static int bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { vm_paddr_t paddr, next_paddr; int error, page_index; bus_size_t sgsize, max_sgsize; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * If we have to keep the offset of each page this function * is not suitable, switch back to bus_dmamap_load_ma_triv * which is going to do the right thing in this case. */ error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs, flags, segs, segp); return (error); } if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } page_index = 0; while (buflen > 0) { /* * Compute the segment size, and adjust counts. */ paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - ma_offs; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0 && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, paddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); KASSERT((sgsize & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); /* * Check if two pages of the user provided buffer * are used. */ if ((ma_offs + sgsize) > PAGE_SIZE) next_paddr = VM_PAGE_TO_PHYS(ma[page_index + 1]); else next_paddr = 0; paddr = add_bounce_page(dmat, map, 0, paddr, next_paddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs, segp); if (sgsize == 0) break; KASSERT(buflen >= sgsize, ("Segment length overruns original buffer")); buflen -= sgsize; if (((ma_offs + sgsize) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sgsize) & PAGE_MASK; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (map == NULL) return; map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (map == NULL) return; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; bus_size_t datacount1, datacount2; if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL) return; /* * Handle data bouncing. We might also want to add support for * invalidating the caches on broken hardware. */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)datavaddr, (void *)bpage->vaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_w; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)datavaddr, (void *)(bpage->vaddr + datacount1), datacount2); pmap_quick_remove_page(datavaddr); next_w: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)bpage->vaddr, (void *)datavaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_r; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)(bpage->vaddr + datacount1), (void *)datavaddr, datacount2); pmap_quick_remove_page(datavaddr); next_r: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if (dmat->common.alignment <= bz->alignment && dmat->common.lowaddr >= bz->lowaddr && dmat->common.domain == bz->domain) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; bz->domain = dmat->common.domain; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "domain", CTLFLAG_RD, &bz->domain, 0, "memory domain"); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = malloc_domainset(sizeof(*bpage), M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc_domainset(PAGE_SIZE, M_DEVBUF, DOMAINSET_PREF(dmat->common.domain), M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free_domain(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, vm_paddr_t addr1, vm_paddr_t addr2, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL && map != &nobounce_dmamap, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr1 & PAGE_MASK; bpage->busaddr |= addr1 & PAGE_MASK; KASSERT(addr2 == 0, ("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET")); } bpage->datavaddr = vaddr; bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1); KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned")); bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2); bpage->dataoffs = addr1 & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, .tag_set_domain = bounce_bus_dma_tag_set_domain, + .id_mapped = bounce_bus_dma_id_mapped, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bounce_bus_dmamap_load_ma, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync, }; Index: head/usr.sbin/camdd/camdd.c =================================================================== --- head/usr.sbin/camdd/camdd.c (revision 347835) +++ head/usr.sbin/camdd/camdd.c (revision 347836) @@ -1,3510 +1,3509 @@ /*- * Copyright (c) 1997-2007 Kenneth D. Merry * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. * * Authors: Ken Merry (Spectra Logic Corporation) */ /* * This is eventually intended to be: * - A basic data transfer/copy utility * - A simple benchmark utility * - An example of how to use the asynchronous pass(4) driver interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { CAMDD_CMD_NONE = 0x00000000, CAMDD_CMD_HELP = 0x00000001, CAMDD_CMD_WRITE = 0x00000002, CAMDD_CMD_READ = 0x00000003 } camdd_cmdmask; typedef enum { CAMDD_ARG_NONE = 0x00000000, CAMDD_ARG_VERBOSE = 0x00000001, CAMDD_ARG_DEVICE = 0x00000002, CAMDD_ARG_BUS = 0x00000004, CAMDD_ARG_TARGET = 0x00000008, CAMDD_ARG_LUN = 0x00000010, CAMDD_ARG_UNIT = 0x00000020, CAMDD_ARG_TIMEOUT = 0x00000040, CAMDD_ARG_ERR_RECOVER = 0x00000080, CAMDD_ARG_RETRIES = 0x00000100 } camdd_argmask; typedef enum { CAMDD_DEV_NONE = 0x00, CAMDD_DEV_PASS = 0x01, CAMDD_DEV_FILE = 0x02 } camdd_dev_type; struct camdd_io_opts { camdd_dev_type dev_type; char *dev_name; uint64_t blocksize; uint64_t queue_depth; uint64_t offset; int min_cmd_size; int write_dev; uint64_t debug; }; typedef enum { CAMDD_BUF_NONE, CAMDD_BUF_DATA, CAMDD_BUF_INDIRECT } camdd_buf_type; struct camdd_buf_indirect { /* * Pointer to the source buffer. */ struct camdd_buf *src_buf; /* * Offset into the source buffer, in bytes. */ uint64_t offset; /* * Pointer to the starting point in the source buffer. */ uint8_t *start_ptr; /* * Length of this chunk in bytes. */ size_t len; }; struct camdd_buf_data { /* * Buffer allocated when we allocate this camdd_buf. This should * be the size of the blocksize for this device. */ uint8_t *buf; /* * The amount of backing store allocated in buf. Generally this * will be the blocksize of the device. */ uint32_t alloc_len; /* * The amount of data that was put into the buffer (on reads) or * the amount of data we have put onto the src_list so far (on * writes). */ uint32_t fill_len; /* * The amount of data that was not transferred. */ uint32_t resid; /* * Starting byte offset on the reader. */ uint64_t src_start_offset; /* * CCB used for pass(4) device targets. */ union ccb ccb; /* * Number of scatter/gather segments. */ int sg_count; /* * Set if we had to tack on an extra buffer to round the transfer * up to a sector size. */ int extra_buf; /* * Scatter/gather list used generally when we're the writer for a * pass(4) device. */ bus_dma_segment_t *segs; /* * Scatter/gather list used generally when we're the writer for a * file or block device; */ struct iovec *iovec; }; union camdd_buf_types { struct camdd_buf_indirect indirect; struct camdd_buf_data data; }; typedef enum { CAMDD_STATUS_NONE, CAMDD_STATUS_OK, CAMDD_STATUS_SHORT_IO, CAMDD_STATUS_EOF, CAMDD_STATUS_ERROR } camdd_buf_status; struct camdd_buf { camdd_buf_type buf_type; union camdd_buf_types buf_type_spec; camdd_buf_status status; uint64_t lba; size_t len; /* * A reference count of how many indirect buffers point to this * buffer. */ int refcount; /* * A link back to our parent device. */ struct camdd_dev *dev; STAILQ_ENTRY(camdd_buf) links; STAILQ_ENTRY(camdd_buf) work_links; /* * A count of the buffers on the src_list. */ int src_count; /* * List of buffers from our partner thread that are the components * of this buffer for the I/O. Uses src_links. */ STAILQ_HEAD(,camdd_buf) src_list; STAILQ_ENTRY(camdd_buf) src_links; }; #define NUM_DEV_TYPES 2 struct camdd_dev_pass { int scsi_dev_type; int protocol; struct cam_device *dev; uint64_t max_sector; uint32_t block_len; uint32_t cpi_maxio; }; typedef enum { CAMDD_FILE_NONE, CAMDD_FILE_REG, CAMDD_FILE_STD, CAMDD_FILE_PIPE, CAMDD_FILE_DISK, CAMDD_FILE_TAPE, CAMDD_FILE_TTY, CAMDD_FILE_MEM } camdd_file_type; typedef enum { CAMDD_FF_NONE = 0x00, CAMDD_FF_CAN_SEEK = 0x01 } camdd_file_flags; struct camdd_dev_file { int fd; struct stat sb; char filename[MAXPATHLEN + 1]; camdd_file_type file_type; camdd_file_flags file_flags; uint8_t *tmp_buf; }; struct camdd_dev_block { int fd; uint64_t size_bytes; uint32_t block_len; }; union camdd_dev_spec { struct camdd_dev_pass pass; struct camdd_dev_file file; struct camdd_dev_block block; }; typedef enum { CAMDD_DEV_FLAG_NONE = 0x00, CAMDD_DEV_FLAG_EOF = 0x01, CAMDD_DEV_FLAG_PEER_EOF = 0x02, CAMDD_DEV_FLAG_ACTIVE = 0x04, CAMDD_DEV_FLAG_EOF_SENT = 0x08, CAMDD_DEV_FLAG_EOF_QUEUED = 0x10 } camdd_dev_flags; struct camdd_dev { camdd_dev_type dev_type; union camdd_dev_spec dev_spec; camdd_dev_flags flags; char device_name[MAXPATHLEN+1]; uint32_t blocksize; uint32_t sector_size; uint64_t max_sector; uint64_t sector_io_limit; int min_cmd_size; int write_dev; int retry_count; int io_timeout; int debug; uint64_t start_offset_bytes; uint64_t next_io_pos_bytes; uint64_t next_peer_pos_bytes; uint64_t next_completion_pos_bytes; uint64_t peer_bytes_queued; uint64_t bytes_transferred; uint32_t target_queue_depth; uint32_t cur_active_io; uint8_t *extra_buf; uint32_t extra_buf_len; struct camdd_dev *peer_dev; pthread_mutex_t mutex; pthread_cond_t cond; int kq; int (*run)(struct camdd_dev *dev); int (*fetch)(struct camdd_dev *dev); /* * Buffers that are available for I/O. Uses links. */ STAILQ_HEAD(,camdd_buf) free_queue; /* * Free indirect buffers. These are used for breaking a large * buffer into multiple pieces. */ STAILQ_HEAD(,camdd_buf) free_indirect_queue; /* * Buffers that have been queued to the kernel. Uses links. */ STAILQ_HEAD(,camdd_buf) active_queue; /* * Will generally contain one of our buffers that is waiting for enough * I/O from our partner thread to be able to execute. This will * generally happen when our per-I/O-size is larger than the * partner thread's per-I/O-size. Uses links. */ STAILQ_HEAD(,camdd_buf) pending_queue; /* * Number of buffers on the pending queue */ int num_pending_queue; /* * Buffers that are filled and ready to execute. This is used when * our partner (reader) thread sends us blocks that are larger than * our blocksize, and so we have to split them into multiple pieces. */ STAILQ_HEAD(,camdd_buf) run_queue; /* * Number of buffers on the run queue. */ int num_run_queue; STAILQ_HEAD(,camdd_buf) reorder_queue; int num_reorder_queue; /* * Buffers that have been queued to us by our partner thread * (generally the reader thread) to be written out. Uses * work_links. */ STAILQ_HEAD(,camdd_buf) work_queue; /* * Buffers that have been completed by our partner thread. Uses * work_links. */ STAILQ_HEAD(,camdd_buf) peer_done_queue; /* * Number of buffers on the peer done queue. */ uint32_t num_peer_done_queue; /* * A list of buffers that we have queued to our peer thread. Uses * links. */ STAILQ_HEAD(,camdd_buf) peer_work_queue; /* * Number of buffers on the peer work queue. */ uint32_t num_peer_work_queue; }; static sem_t camdd_sem; static sig_atomic_t need_exit = 0; static sig_atomic_t error_exit = 0; static sig_atomic_t need_status = 0; #ifndef min #define min(a, b) (a < b) ? a : b #endif /* Generically useful offsets into the peripheral private area */ #define ppriv_ptr0 periph_priv.entries[0].ptr #define ppriv_ptr1 periph_priv.entries[1].ptr #define ppriv_field0 periph_priv.entries[0].field #define ppriv_field1 periph_priv.entries[1].field #define ccb_buf ppriv_ptr0 #define CAMDD_FILE_DEFAULT_BLOCK 524288 #define CAMDD_FILE_DEFAULT_DEPTH 1 #define CAMDD_PASS_MAX_BLOCK 1048576 #define CAMDD_PASS_DEFAULT_DEPTH 6 #define CAMDD_PASS_RW_TIMEOUT 60 * 1000 static int parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst); void camdd_free_dev(struct camdd_dev *dev); struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke, int retry_count, int timeout); static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type); void camdd_release_buf(struct camdd_buf *buf); struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type); int camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size, uint32_t *num_sectors_used, int *double_buf_needed); uint32_t camdd_buf_get_len(struct camdd_buf *buf); void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf); int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize, uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran); int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb, camdd_argmask arglist, int probe_retry_count, int probe_timeout, uint64_t *maxsector, uint32_t *block_len); struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count, int timeout); struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts, camdd_argmask arglist, int probe_retry_count, int probe_timeout, int io_retry_count, int io_timeout); void *camdd_file_worker(void *arg); camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol); int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd); int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf); int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf); void camdd_peer_done(struct camdd_buf *buf); void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf, int *error_count); int camdd_pass_fetch(struct camdd_dev *dev); int camdd_file_run(struct camdd_dev *dev); int camdd_pass_run(struct camdd_dev *dev); int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len); int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf); void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth, uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes); void *camdd_worker(void *arg); void camdd_sig_handler(int sig); void camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, struct timespec *start_time); int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io, int retry_count, int timeout); int camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts); void usage(void); /* * Parse out a bus, or a bus, target and lun in the following * format: * bus * bus:target * bus:target:lun * * Returns the number of parsed components, or 0. */ static int parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst) { char *tmpstr; int convs = 0; while (isspace(*tstr) && (*tstr != '\0')) tstr++; tmpstr = (char *)strtok(tstr, ":"); if ((tmpstr != NULL) && (*tmpstr != '\0')) { *bus = strtol(tmpstr, NULL, 0); *arglst |= CAMDD_ARG_BUS; convs++; tmpstr = (char *)strtok(NULL, ":"); if ((tmpstr != NULL) && (*tmpstr != '\0')) { *target = strtol(tmpstr, NULL, 0); *arglst |= CAMDD_ARG_TARGET; convs++; tmpstr = (char *)strtok(NULL, ":"); if ((tmpstr != NULL) && (*tmpstr != '\0')) { *lun = strtol(tmpstr, NULL, 0); *arglst |= CAMDD_ARG_LUN; convs++; } } } return convs; } /* * XXX KDM clean up and free all of the buffers on the queue! */ void camdd_free_dev(struct camdd_dev *dev) { if (dev == NULL) return; switch (dev->dev_type) { case CAMDD_DEV_FILE: { struct camdd_dev_file *file_dev = &dev->dev_spec.file; if (file_dev->fd != -1) close(file_dev->fd); free(file_dev->tmp_buf); break; } case CAMDD_DEV_PASS: { struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass; if (pass_dev->dev != NULL) cam_close_device(pass_dev->dev); break; } default: break; } free(dev); } struct camdd_dev * camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke, int retry_count, int timeout) { struct camdd_dev *dev = NULL; struct kevent *ke; size_t ke_size; int retval = 0; dev = calloc(1, sizeof(*dev)); if (dev == NULL) { warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev)); goto bailout; } dev->dev_type = dev_type; dev->io_timeout = timeout; dev->retry_count = retry_count; STAILQ_INIT(&dev->free_queue); STAILQ_INIT(&dev->free_indirect_queue); STAILQ_INIT(&dev->active_queue); STAILQ_INIT(&dev->pending_queue); STAILQ_INIT(&dev->run_queue); STAILQ_INIT(&dev->reorder_queue); STAILQ_INIT(&dev->work_queue); STAILQ_INIT(&dev->peer_done_queue); STAILQ_INIT(&dev->peer_work_queue); retval = pthread_mutex_init(&dev->mutex, NULL); if (retval != 0) { warnc(retval, "%s: failed to initialize mutex", __func__); goto bailout; } retval = pthread_cond_init(&dev->cond, NULL); if (retval != 0) { warnc(retval, "%s: failed to initialize condition variable", __func__); goto bailout; } dev->kq = kqueue(); if (dev->kq == -1) { warn("%s: Unable to create kqueue", __func__); goto bailout; } ke_size = sizeof(struct kevent) * (num_ke + 4); ke = calloc(1, ke_size); if (ke == NULL) { warn("%s: unable to malloc %zu bytes", __func__, ke_size); goto bailout; } if (num_ke > 0) bcopy(new_ke, ke, num_ke * sizeof(struct kevent)); EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER, EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0); EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER, EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0); EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0); EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0); retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL); if (retval == -1) { warn("%s: Unable to register kevents", __func__); goto bailout; } return (dev); bailout: free(dev); return (NULL); } static struct camdd_buf * camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type) { struct camdd_buf *buf = NULL; uint8_t *data_ptr = NULL; /* * We only need to allocate data space for data buffers. */ switch (buf_type) { case CAMDD_BUF_DATA: data_ptr = malloc(dev->blocksize); if (data_ptr == NULL) { warn("unable to allocate %u bytes", dev->blocksize); goto bailout_error; } break; default: break; } buf = calloc(1, sizeof(*buf)); if (buf == NULL) { warn("unable to allocate %zu bytes", sizeof(*buf)); goto bailout_error; } buf->buf_type = buf_type; buf->dev = dev; switch (buf_type) { case CAMDD_BUF_DATA: { struct camdd_buf_data *data; data = &buf->buf_type_spec.data; data->alloc_len = dev->blocksize; data->buf = data_ptr; break; } case CAMDD_BUF_INDIRECT: break; default: break; } STAILQ_INIT(&buf->src_list); return (buf); bailout_error: free(data_ptr); return (NULL); } void camdd_release_buf(struct camdd_buf *buf) { struct camdd_dev *dev; dev = buf->dev; switch (buf->buf_type) { case CAMDD_BUF_DATA: { struct camdd_buf_data *data; data = &buf->buf_type_spec.data; if (data->segs != NULL) { if (data->extra_buf != 0) { void *extra_buf; extra_buf = (void *) data->segs[data->sg_count - 1].ds_addr; free(extra_buf); data->extra_buf = 0; } free(data->segs); data->segs = NULL; data->sg_count = 0; } else if (data->iovec != NULL) { if (data->extra_buf != 0) { free(data->iovec[data->sg_count - 1].iov_base); data->extra_buf = 0; } free(data->iovec); data->iovec = NULL; data->sg_count = 0; } STAILQ_INSERT_TAIL(&dev->free_queue, buf, links); break; } case CAMDD_BUF_INDIRECT: STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links); break; default: err(1, "%s: Invalid buffer type %d for released buffer", __func__, buf->buf_type); break; } } struct camdd_buf * camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type) { struct camdd_buf *buf = NULL; switch (buf_type) { case CAMDD_BUF_DATA: buf = STAILQ_FIRST(&dev->free_queue); if (buf != NULL) { struct camdd_buf_data *data; uint8_t *data_ptr; uint32_t alloc_len; STAILQ_REMOVE_HEAD(&dev->free_queue, links); data = &buf->buf_type_spec.data; data_ptr = data->buf; alloc_len = data->alloc_len; bzero(buf, sizeof(*buf)); data->buf = data_ptr; data->alloc_len = alloc_len; } break; case CAMDD_BUF_INDIRECT: buf = STAILQ_FIRST(&dev->free_indirect_queue); if (buf != NULL) { STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links); bzero(buf, sizeof(*buf)); } break; default: warnx("Unknown buffer type %d requested", buf_type); break; } if (buf == NULL) return (camdd_alloc_buf(dev, buf_type)); else { STAILQ_INIT(&buf->src_list); buf->dev = dev; buf->buf_type = buf_type; return (buf); } } int camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size, uint32_t *num_sectors_used, int *double_buf_needed) { struct camdd_buf *tmp_buf; struct camdd_buf_data *data; uint8_t *extra_buf = NULL; size_t extra_buf_len = 0; int extra_buf_attached = 0; int i, retval = 0; data = &buf->buf_type_spec.data; data->sg_count = buf->src_count; /* * Compose a scatter/gather list from all of the buffers in the list. * If the length of the buffer isn't a multiple of the sector size, * we'll have to add an extra buffer. This should only happen * at the end of a transfer. */ if ((data->fill_len % sector_size) != 0) { extra_buf_len = sector_size - (data->fill_len % sector_size); extra_buf = calloc(extra_buf_len, 1); if (extra_buf == NULL) { warn("%s: unable to allocate %zu bytes for extra " "buffer space", __func__, extra_buf_len); retval = 1; goto bailout; } data->extra_buf = 1; data->sg_count++; } if (iovec == 0) { data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t)); if (data->segs == NULL) { warn("%s: unable to allocate %zu bytes for S/G list", __func__, sizeof(bus_dma_segment_t) * data->sg_count); retval = 1; goto bailout; } } else { data->iovec = calloc(data->sg_count, sizeof(struct iovec)); if (data->iovec == NULL) { warn("%s: unable to allocate %zu bytes for S/G list", __func__, sizeof(struct iovec) * data->sg_count); retval = 1; goto bailout; } } for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list); i < buf->src_count && tmp_buf != NULL; i++, tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) { if (tmp_buf->buf_type == CAMDD_BUF_DATA) { struct camdd_buf_data *tmp_data; tmp_data = &tmp_buf->buf_type_spec.data; if (iovec == 0) { data->segs[i].ds_addr = (bus_addr_t) tmp_data->buf; data->segs[i].ds_len = tmp_data->fill_len - tmp_data->resid; } else { data->iovec[i].iov_base = tmp_data->buf; data->iovec[i].iov_len = tmp_data->fill_len - tmp_data->resid; } if (((tmp_data->fill_len - tmp_data->resid) % sector_size) != 0) *double_buf_needed = 1; } else { struct camdd_buf_indirect *tmp_ind; tmp_ind = &tmp_buf->buf_type_spec.indirect; if (iovec == 0) { data->segs[i].ds_addr = (bus_addr_t)tmp_ind->start_ptr; data->segs[i].ds_len = tmp_ind->len; } else { data->iovec[i].iov_base = tmp_ind->start_ptr; data->iovec[i].iov_len = tmp_ind->len; } if ((tmp_ind->len % sector_size) != 0) *double_buf_needed = 1; } } if (extra_buf != NULL) { if (iovec == 0) { data->segs[i].ds_addr = (bus_addr_t)extra_buf; data->segs[i].ds_len = extra_buf_len; } else { data->iovec[i].iov_base = extra_buf; data->iovec[i].iov_len = extra_buf_len; } extra_buf_attached = 1; i++; } if ((tmp_buf != NULL) || (i != data->sg_count)) { warnx("buffer source count does not match " "number of buffers in list!"); retval = 1; goto bailout; } bailout: if (retval == 0) { *num_sectors_used = (data->fill_len + extra_buf_len) / sector_size; } else if (extra_buf_attached == 0) { /* * If extra_buf isn't attached yet, we need to free it * to avoid leaking. */ free(extra_buf); data->extra_buf = 0; data->sg_count--; } return (retval); } uint32_t camdd_buf_get_len(struct camdd_buf *buf) { uint32_t len = 0; if (buf->buf_type != CAMDD_BUF_DATA) { struct camdd_buf_indirect *indirect; indirect = &buf->buf_type_spec.indirect; len = indirect->len; } else { struct camdd_buf_data *data; data = &buf->buf_type_spec.data; len = data->fill_len; } return (len); } void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf) { struct camdd_buf_data *data; assert(buf->buf_type == CAMDD_BUF_DATA); data = &buf->buf_type_spec.data; STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links); buf->src_count++; data->fill_len += camdd_buf_get_len(child_buf); } typedef enum { CAMDD_TS_MAX_BLK, CAMDD_TS_MIN_BLK, CAMDD_TS_BLK_GRAN, CAMDD_TS_EFF_IOSIZE } camdd_status_item_index; static struct camdd_status_items { const char *name; struct mt_status_entry *entry; } req_status_items[] = { { "max_blk", NULL }, { "min_blk", NULL }, { "blk_gran", NULL }, { "max_effective_iosize", NULL } }; int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize, uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran) { struct mt_status_data status_data; char *xml_str = NULL; unsigned int i; int retval = 0; retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str); if (retval != 0) err(1, "Couldn't get XML string from %s", filename); retval = mt_get_status(xml_str, &status_data); if (retval != XML_STATUS_OK) { warn("couldn't get status for %s", filename); retval = 1; goto bailout; } else retval = 0; if (status_data.error != 0) { warnx("%s", status_data.error_str); retval = 1; goto bailout; } for (i = 0; i < nitems(req_status_items); i++) { char *name; name = __DECONST(char *, req_status_items[i].name); req_status_items[i].entry = mt_status_entry_find(&status_data, name); if (req_status_items[i].entry == NULL) { errx(1, "Cannot find status entry %s", req_status_items[i].name); } } *max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned; *max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned; *min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned; *blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned; bailout: free(xml_str); mt_status_free(&status_data); return (retval); } struct camdd_dev * camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count, int timeout) { struct camdd_dev *dev = NULL; struct camdd_dev_file *file_dev; uint64_t blocksize = io_opts->blocksize; dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout); if (dev == NULL) goto bailout; file_dev = &dev->dev_spec.file; file_dev->fd = fd; strlcpy(file_dev->filename, io_opts->dev_name, sizeof(file_dev->filename)); strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name)); if (blocksize == 0) dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK; else dev->blocksize = blocksize; if ((io_opts->queue_depth != 0) && (io_opts->queue_depth != 1)) { warnx("Queue depth %ju for %s ignored, only 1 outstanding " "command supported", (uintmax_t)io_opts->queue_depth, io_opts->dev_name); } dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH; dev->run = camdd_file_run; dev->fetch = NULL; /* * We can effectively access files on byte boundaries. We'll reset * this for devices like disks that can be accessed on sector * boundaries. */ dev->sector_size = 1; if ((fd != STDIN_FILENO) && (fd != STDOUT_FILENO)) { int retval; retval = fstat(fd, &file_dev->sb); if (retval != 0) { warn("Cannot stat %s", dev->device_name); goto bailout_error; } if (S_ISREG(file_dev->sb.st_mode)) { file_dev->file_type = CAMDD_FILE_REG; } else if (S_ISCHR(file_dev->sb.st_mode)) { int type; if (ioctl(fd, FIODTYPE, &type) == -1) err(1, "FIODTYPE ioctl failed on %s", dev->device_name); else { if (type & D_TAPE) file_dev->file_type = CAMDD_FILE_TAPE; else if (type & D_DISK) file_dev->file_type = CAMDD_FILE_DISK; else if (type & D_MEM) file_dev->file_type = CAMDD_FILE_MEM; else if (type & D_TTY) file_dev->file_type = CAMDD_FILE_TTY; } } else if (S_ISDIR(file_dev->sb.st_mode)) { errx(1, "cannot operate on directory %s", dev->device_name); } else if (S_ISFIFO(file_dev->sb.st_mode)) { file_dev->file_type = CAMDD_FILE_PIPE; } else errx(1, "Cannot determine file type for %s", dev->device_name); switch (file_dev->file_type) { case CAMDD_FILE_REG: if (file_dev->sb.st_size != 0) dev->max_sector = file_dev->sb.st_size - 1; else dev->max_sector = 0; file_dev->file_flags |= CAMDD_FF_CAN_SEEK; break; case CAMDD_FILE_TAPE: { uint64_t max_iosize, max_blk, min_blk, blk_gran; /* * Check block limits and maximum effective iosize. * Make sure the blocksize is within the block * limits (and a multiple of the minimum blocksize) * and that the blocksize is <= maximum effective * iosize. */ retval = camdd_probe_tape(fd, dev->device_name, &max_iosize, &max_blk, &min_blk, &blk_gran); if (retval != 0) errx(1, "Unable to probe tape %s", dev->device_name); /* * The blocksize needs to be <= the maximum * effective I/O size of the tape device. Note * that this also takes into account the maximum * blocksize reported by READ BLOCK LIMITS. */ if (dev->blocksize > max_iosize) { warnx("Blocksize %u too big for %s, limiting " "to %ju", dev->blocksize, dev->device_name, max_iosize); dev->blocksize = max_iosize; } /* * The blocksize needs to be at least min_blk; */ if (dev->blocksize < min_blk) { warnx("Blocksize %u too small for %s, " "increasing to %ju", dev->blocksize, dev->device_name, min_blk); dev->blocksize = min_blk; } /* * And the blocksize needs to be a multiple of * the block granularity. */ if ((blk_gran != 0) && (dev->blocksize % (1 << blk_gran))) { warnx("Blocksize %u for %s not a multiple of " "%d, adjusting to %d", dev->blocksize, dev->device_name, (1 << blk_gran), dev->blocksize & ~((1 << blk_gran) - 1)); dev->blocksize &= ~((1 << blk_gran) - 1); } if (dev->blocksize == 0) { errx(1, "Unable to derive valid blocksize for " "%s", dev->device_name); } /* * For tape drives, set the sector size to the * blocksize so that we make sure not to write * less than the blocksize out to the drive. */ dev->sector_size = dev->blocksize; break; } case CAMDD_FILE_DISK: { off_t media_size; unsigned int sector_size; file_dev->file_flags |= CAMDD_FF_CAN_SEEK; if (ioctl(fd, DIOCGSECTORSIZE, §or_size) == -1) { err(1, "DIOCGSECTORSIZE ioctl failed on %s", dev->device_name); } if (sector_size == 0) { errx(1, "DIOCGSECTORSIZE ioctl returned " "invalid sector size %u for %s", sector_size, dev->device_name); } if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) { err(1, "DIOCGMEDIASIZE ioctl failed on %s", dev->device_name); } if (media_size == 0) { errx(1, "DIOCGMEDIASIZE ioctl returned " "invalid media size %ju for %s", (uintmax_t)media_size, dev->device_name); } if (dev->blocksize % sector_size) { errx(1, "%s blocksize %u not a multiple of " "sector size %u", dev->device_name, dev->blocksize, sector_size); } dev->sector_size = sector_size; dev->max_sector = (media_size / sector_size) - 1; break; } case CAMDD_FILE_MEM: file_dev->file_flags |= CAMDD_FF_CAN_SEEK; break; default: break; } } if ((io_opts->offset != 0) && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) { warnx("Offset %ju specified for %s, but we cannot seek on %s", io_opts->offset, io_opts->dev_name, io_opts->dev_name); goto bailout_error; } #if 0 else if ((io_opts->offset != 0) && ((io_opts->offset % dev->sector_size) != 0)) { warnx("Offset %ju for %s is not a multiple of the " "sector size %u", io_opts->offset, io_opts->dev_name, dev->sector_size); goto bailout_error; } else { dev->start_offset_bytes = io_opts->offset; } #endif bailout: return (dev); bailout_error: camdd_free_dev(dev); return (NULL); } /* * Get a get device CCB for the specified device. */ int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd) { union ccb *ccb; int retval = 0; ccb = cam_getccb(device); if (ccb == NULL) { warnx("%s: couldn't allocate CCB", __func__); return -1; } CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd); ccb->ccb_h.func_code = XPT_GDEV_TYPE; if (cam_send_ccb(device, ccb) < 0) { warn("%s: error sending Get Device Information CCB", __func__); cam_error_print(device, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); retval = -1; goto bailout; } if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { cam_error_print(device, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); retval = -1; goto bailout; } bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev)); bailout: cam_freeccb(ccb); return retval; } int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb, camdd_argmask arglist, int probe_retry_count, int probe_timeout, uint64_t *maxsector, uint32_t *block_len) { struct scsi_read_capacity_data rcap; struct scsi_read_capacity_data_long rcaplong; int retval = -1; if (ccb == NULL) { warnx("%s: error passed ccb is NULL", __func__); goto bailout; } CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio); scsi_read_capacity(&ccb->csio, /*retries*/ probe_retry_count, /*cbfcnp*/ NULL, /*tag_action*/ MSG_SIMPLE_Q_TAG, &rcap, SSD_FULL_SIZE, /*timeout*/ probe_timeout ? probe_timeout : 5000); /* Disable freezing the device queue */ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; if (arglist & CAMDD_ARG_ERR_RECOVER) ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER; if (cam_send_ccb(cam_dev, ccb) < 0) { warn("error sending READ CAPACITY command"); cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); goto bailout; } if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); goto bailout; } *maxsector = scsi_4btoul(rcap.addr); *block_len = scsi_4btoul(rcap.length); /* * A last block of 2^32-1 means that the true capacity is over 2TB, * and we need to issue the long READ CAPACITY to get the real * capacity. Otherwise, we're all set. */ if (*maxsector != 0xffffffff) { retval = 0; goto bailout; } scsi_read_capacity_16(&ccb->csio, /*retries*/ probe_retry_count, /*cbfcnp*/ NULL, /*tag_action*/ MSG_SIMPLE_Q_TAG, /*lba*/ 0, /*reladdr*/ 0, /*pmi*/ 0, (uint8_t *)&rcaplong, sizeof(rcaplong), /*sense_len*/ SSD_FULL_SIZE, /*timeout*/ probe_timeout ? probe_timeout : 5000); /* Disable freezing the device queue */ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; if (arglist & CAMDD_ARG_ERR_RECOVER) ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER; if (cam_send_ccb(cam_dev, ccb) < 0) { warn("error sending READ CAPACITY (16) command"); cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); goto bailout; } if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); goto bailout; } *maxsector = scsi_8btou64(rcaplong.addr); *block_len = scsi_4btoul(rcaplong.length); retval = 0; bailout: return retval; } /* * Need to implement this. Do a basic probe: * - Check the inquiry data, make sure we're talking to a device that we * can reasonably expect to talk to -- direct, RBC, CD, WORM. * - Send a test unit ready, make sure the device is available. * - Get the capacity and block size. */ struct camdd_dev * camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts, camdd_argmask arglist, int probe_retry_count, int probe_timeout, int io_retry_count, int io_timeout) { union ccb *ccb; uint64_t maxsector = 0; uint32_t cpi_maxio, max_iosize, pass_numblocks; uint32_t block_len = 0; struct camdd_dev *dev = NULL; struct camdd_dev_pass *pass_dev; struct kevent ke; struct ccb_getdev cgd; int retval; int scsi_dev_type; if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) { warnx("%s: error retrieving CGD", __func__); return NULL; } ccb = cam_getccb(cam_dev); if (ccb == NULL) { warnx("%s: error allocating ccb", __func__); goto bailout; } switch (cgd.protocol) { case PROTO_SCSI: scsi_dev_type = SID_TYPE(&cam_dev->inq_data); /* * For devices that support READ CAPACITY, we'll attempt to get the * capacity. Otherwise, we really don't support tape or other * devices via SCSI passthrough, so just return an error in that case. */ switch (scsi_dev_type) { case T_DIRECT: case T_WORM: case T_CDROM: case T_OPTICAL: case T_RBC: case T_ZBC_HM: break; default: errx(1, "Unsupported SCSI device type %d", scsi_dev_type); break; /*NOTREACHED*/ } if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count, arglist, probe_timeout, &maxsector, &block_len))) { goto bailout; } break; default: errx(1, "Unsupported PROTO type %d", cgd.protocol); break; /*NOTREACHED*/ } if (block_len == 0) { warnx("Sector size for %s%u is 0, cannot continue", cam_dev->device_name, cam_dev->dev_unit_num); goto bailout_error; } CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi); ccb->ccb_h.func_code = XPT_PATH_INQ; ccb->ccb_h.flags = CAM_DIR_NONE; ccb->ccb_h.retry_count = 1; if (cam_send_ccb(cam_dev, ccb) < 0) { warn("error sending XPT_PATH_INQ CCB"); cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); goto bailout; } EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0); dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count, io_timeout); if (dev == NULL) goto bailout; pass_dev = &dev->dev_spec.pass; pass_dev->scsi_dev_type = scsi_dev_type; pass_dev->protocol = cgd.protocol; pass_dev->dev = cam_dev; pass_dev->max_sector = maxsector; pass_dev->block_len = block_len; pass_dev->cpi_maxio = ccb->cpi.maxio; snprintf(dev->device_name, sizeof(dev->device_name), "%s%u", pass_dev->dev->device_name, pass_dev->dev->dev_unit_num); dev->sector_size = block_len; dev->max_sector = maxsector; /* * Determine the optimal blocksize to use for this device. */ /* * If the controller has not specified a maximum I/O size, * just go with 128K as a somewhat conservative value. */ if (pass_dev->cpi_maxio == 0) cpi_maxio = 131072; else cpi_maxio = pass_dev->cpi_maxio; /* * If the controller has a large maximum I/O size, limit it * to something smaller so that the kernel doesn't have trouble * allocating buffers to copy data in and out for us. * XXX KDM this is until we have unmapped I/O support in the kernel. */ max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK); /* * If we weren't able to get a block size for some reason, * default to 512 bytes. */ block_len = pass_dev->block_len; if (block_len == 0) block_len = 512; /* * Figure out how many blocksize chunks will fit in the * maximum I/O size. */ pass_numblocks = max_iosize / block_len; /* * And finally, multiple the number of blocks by the LBA * length to get our maximum block size; */ dev->blocksize = pass_numblocks * block_len; if (io_opts->blocksize != 0) { if ((io_opts->blocksize % dev->sector_size) != 0) { warnx("Blocksize %ju for %s is not a multiple of " "sector size %u", (uintmax_t)io_opts->blocksize, dev->device_name, dev->sector_size); goto bailout_error; } dev->blocksize = io_opts->blocksize; } dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH; if (io_opts->queue_depth != 0) dev->target_queue_depth = io_opts->queue_depth; if (io_opts->offset != 0) { if (io_opts->offset > (dev->max_sector * dev->sector_size)) { warnx("Offset %ju is past the end of device %s", io_opts->offset, dev->device_name); goto bailout_error; } #if 0 else if ((io_opts->offset % dev->sector_size) != 0) { warnx("Offset %ju for %s is not a multiple of the " "sector size %u", io_opts->offset, dev->device_name, dev->sector_size); goto bailout_error; } dev->start_offset_bytes = io_opts->offset; #endif } dev->min_cmd_size = io_opts->min_cmd_size; dev->run = camdd_pass_run; dev->fetch = camdd_pass_fetch; bailout: cam_freeccb(ccb); return (dev); bailout_error: cam_freeccb(ccb); camdd_free_dev(dev); return (NULL); } void * camdd_worker(void *arg) { struct camdd_dev *dev = arg; struct camdd_buf *buf; struct timespec ts, *kq_ts; ts.tv_sec = 0; ts.tv_nsec = 0; pthread_mutex_lock(&dev->mutex); dev->flags |= CAMDD_DEV_FLAG_ACTIVE; for (;;) { struct kevent ke; int retval = 0; /* * XXX KDM check the reorder queue depth? */ if (dev->write_dev == 0) { uint32_t our_depth, peer_depth, peer_bytes, our_bytes; uint32_t target_depth = dev->target_queue_depth; uint32_t peer_target_depth = dev->peer_dev->target_queue_depth; uint32_t peer_blocksize = dev->peer_dev->blocksize; camdd_get_depth(dev, &our_depth, &peer_depth, &our_bytes, &peer_bytes); #if 0 while (((our_depth < target_depth) && (peer_depth < peer_target_depth)) || ((peer_bytes + our_bytes) < (peer_blocksize * 2))) { #endif while (((our_depth + peer_depth) < (target_depth + peer_target_depth)) || ((peer_bytes + our_bytes) < (peer_blocksize * 3))) { retval = camdd_queue(dev, NULL); if (retval == 1) break; else if (retval != 0) { error_exit = 1; goto bailout; } camdd_get_depth(dev, &our_depth, &peer_depth, &our_bytes, &peer_bytes); } } /* * See if we have any I/O that is ready to execute. */ buf = STAILQ_FIRST(&dev->run_queue); if (buf != NULL) { while (dev->target_queue_depth > dev->cur_active_io) { retval = dev->run(dev); if (retval == -1) { dev->flags |= CAMDD_DEV_FLAG_EOF; error_exit = 1; break; } else if (retval != 0) { break; } } } /* * We've reached EOF, or our partner has reached EOF. */ if ((dev->flags & CAMDD_DEV_FLAG_EOF) || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) { if (dev->write_dev != 0) { if ((STAILQ_EMPTY(&dev->work_queue)) && (dev->num_run_queue == 0) && (dev->cur_active_io == 0)) { goto bailout; } } else { /* * If we're the reader, and the writer * got EOF, he is already done. If we got * the EOF, then we need to wait until * everything is flushed out for the writer. */ if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) { goto bailout; } else if ((dev->num_peer_work_queue == 0) && (dev->num_peer_done_queue == 0) && (dev->cur_active_io == 0) && (dev->num_run_queue == 0)) { goto bailout; } } /* * XXX KDM need to do something about the pending * queue and cleanup resources. */ } if ((dev->write_dev == 0) && (dev->cur_active_io == 0) && (dev->peer_bytes_queued < dev->peer_dev->blocksize)) kq_ts = &ts; else kq_ts = NULL; /* * Run kevent to see if there are events to process. */ pthread_mutex_unlock(&dev->mutex); retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts); pthread_mutex_lock(&dev->mutex); if (retval == -1) { warn("%s: error returned from kevent",__func__); goto bailout; } else if (retval != 0) { switch (ke.filter) { case EVFILT_READ: if (dev->fetch != NULL) { retval = dev->fetch(dev); if (retval == -1) { error_exit = 1; goto bailout; } } break; case EVFILT_SIGNAL: /* * We register for this so we don't get * an error as a result of a SIGINFO or a * SIGINT. It will actually get handled * by the signal handler. If we get a * SIGINT, bail out without printing an * error message. Any other signals * will result in the error message above. */ if (ke.ident == SIGINT) goto bailout; break; case EVFILT_USER: retval = 0; /* * Check to see if the other thread has * queued any I/O for us to do. (In this * case we're the writer.) */ for (buf = STAILQ_FIRST(&dev->work_queue); buf != NULL; buf = STAILQ_FIRST(&dev->work_queue)) { STAILQ_REMOVE_HEAD(&dev->work_queue, work_links); retval = camdd_queue(dev, buf); /* * We keep going unless we get an * actual error. If we get EOF, we * still want to remove the buffers * from the queue and send the back * to the reader thread. */ if (retval == -1) { error_exit = 1; goto bailout; } else retval = 0; } /* * Next check to see if the other thread has * queued any completed buffers back to us. * (In this case we're the reader.) */ for (buf = STAILQ_FIRST(&dev->peer_done_queue); buf != NULL; buf = STAILQ_FIRST(&dev->peer_done_queue)){ STAILQ_REMOVE_HEAD( &dev->peer_done_queue, work_links); dev->num_peer_done_queue--; camdd_peer_done(buf); } break; default: warnx("%s: unknown kevent filter %d", __func__, ke.filter); break; } } } bailout: dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE; /* XXX KDM cleanup resources here? */ pthread_mutex_unlock(&dev->mutex); need_exit = 1; sem_post(&camdd_sem); return (NULL); } /* * Simplistic translation of CCB status to our local status. */ camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol) { camdd_buf_status status = CAMDD_STATUS_NONE; cam_status ccb_status; ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK; switch (protocol) { case PROTO_SCSI: switch (ccb_status) { case CAM_REQ_CMP: { if (ccb->csio.resid == 0) { status = CAMDD_STATUS_OK; } else if (ccb->csio.dxfer_len > ccb->csio.resid) { status = CAMDD_STATUS_SHORT_IO; } else { status = CAMDD_STATUS_EOF; } break; } case CAM_SCSI_STATUS_ERROR: { switch (ccb->csio.scsi_status) { case SCSI_STATUS_OK: case SCSI_STATUS_COND_MET: case SCSI_STATUS_INTERMED: case SCSI_STATUS_INTERMED_COND_MET: status = CAMDD_STATUS_OK; break; case SCSI_STATUS_CMD_TERMINATED: case SCSI_STATUS_CHECK_COND: case SCSI_STATUS_QUEUE_FULL: case SCSI_STATUS_BUSY: case SCSI_STATUS_RESERV_CONFLICT: default: status = CAMDD_STATUS_ERROR; break; } break; } default: status = CAMDD_STATUS_ERROR; break; } break; default: status = CAMDD_STATUS_ERROR; break; } return (status); } /* * Queue a buffer to our peer's work thread for writing. * * Returns 0 for success, -1 for failure, 1 if the other thread exited. */ int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf) { struct kevent ke; STAILQ_HEAD(, camdd_buf) local_queue; struct camdd_buf *buf1, *buf2; struct camdd_buf_data *data = NULL; uint64_t peer_bytes_queued = 0; int active = 1; int retval = 0; STAILQ_INIT(&local_queue); /* * Since we're the reader, we need to queue our I/O to the writer * in sequential order in order to make sure it gets written out * in sequential order. * * Check the next expected I/O starting offset. If this doesn't * match, put it on the reorder queue. */ if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) { /* * If there is nothing on the queue, there is no sorting * needed. */ if (STAILQ_EMPTY(&dev->reorder_queue)) { STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links); dev->num_reorder_queue++; goto bailout; } /* * Sort in ascending order by starting LBA. There should * be no identical LBAs. */ for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL; buf1 = buf2) { buf2 = STAILQ_NEXT(buf1, links); if (buf->lba < buf1->lba) { /* * If we're less than the first one, then * we insert at the head of the list * because this has to be the first element * on the list. */ STAILQ_INSERT_HEAD(&dev->reorder_queue, buf, links); dev->num_reorder_queue++; break; } else if (buf->lba > buf1->lba) { if (buf2 == NULL) { STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links); dev->num_reorder_queue++; break; } else if (buf->lba < buf2->lba) { STAILQ_INSERT_AFTER(&dev->reorder_queue, buf1, buf, links); dev->num_reorder_queue++; break; } } else { errx(1, "Found buffers with duplicate LBA %ju!", buf->lba); } } goto bailout; } else { /* * We're the next expected I/O completion, so put ourselves * on the local queue to be sent to the writer. We use * work_links here so that we can queue this to the * peer_work_queue before taking the buffer off of the * local_queue. */ dev->next_completion_pos_bytes += buf->len; STAILQ_INSERT_TAIL(&local_queue, buf, work_links); /* * Go through the reorder queue looking for more sequential * I/O and add it to the local queue. */ for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL; buf1 = STAILQ_FIRST(&dev->reorder_queue)) { /* * As soon as we see an I/O that is out of sequence, * we're done. */ if ((buf1->lba * dev->sector_size) != dev->next_completion_pos_bytes) break; STAILQ_REMOVE_HEAD(&dev->reorder_queue, links); dev->num_reorder_queue--; STAILQ_INSERT_TAIL(&local_queue, buf1, work_links); dev->next_completion_pos_bytes += buf1->len; } } /* * Setup the event to let the other thread know that it has work * pending. */ EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); /* * Put this on our shadow queue so that we know what we've queued * to the other thread. */ STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) { if (buf1->buf_type != CAMDD_BUF_DATA) { errx(1, "%s: should have a data buffer, not an " "indirect buffer", __func__); } data = &buf1->buf_type_spec.data; /* * We only need to send one EOF to the writer, and don't * need to continue sending EOFs after that. */ if (buf1->status == CAMDD_STATUS_EOF) { if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) { STAILQ_REMOVE(&local_queue, buf1, camdd_buf, work_links); camdd_release_buf(buf1); retval = 1; continue; } dev->flags |= CAMDD_DEV_FLAG_EOF_SENT; } STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links); peer_bytes_queued += (data->fill_len - data->resid); dev->peer_bytes_queued += (data->fill_len - data->resid); dev->num_peer_work_queue++; } if (STAILQ_FIRST(&local_queue) == NULL) goto bailout; /* * Drop our mutex and pick up the other thread's mutex. We need to * do this to avoid deadlocks. */ pthread_mutex_unlock(&dev->mutex); pthread_mutex_lock(&dev->peer_dev->mutex); if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) { /* * Put the buffers on the other thread's incoming work queue. */ for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL; buf1 = STAILQ_FIRST(&local_queue)) { STAILQ_REMOVE_HEAD(&local_queue, work_links); STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1, work_links); } /* * Send an event to the other thread's kqueue to let it know * that there is something on the work queue. */ retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL); if (retval == -1) warn("%s: unable to add peer work_queue kevent", __func__); else retval = 0; } else active = 0; pthread_mutex_unlock(&dev->peer_dev->mutex); pthread_mutex_lock(&dev->mutex); /* * If the other side isn't active, run through the queue and * release all of the buffers. */ if (active == 0) { for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL; buf1 = STAILQ_FIRST(&local_queue)) { STAILQ_REMOVE_HEAD(&local_queue, work_links); STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf, links); dev->num_peer_work_queue--; camdd_release_buf(buf1); } dev->peer_bytes_queued -= peer_bytes_queued; retval = 1; } bailout: return (retval); } /* * Return a buffer to the reader thread when we have completed writing it. */ int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf) { struct kevent ke; int retval = 0; /* * Setup the event to let the other thread know that we have * completed a buffer. */ EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); /* * Drop our lock and acquire the other thread's lock before * manipulating */ pthread_mutex_unlock(&dev->mutex); pthread_mutex_lock(&dev->peer_dev->mutex); /* * Put the buffer on the reader thread's peer done queue now that * we have completed it. */ STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf, work_links); dev->peer_dev->num_peer_done_queue++; /* * Send an event to the peer thread to let it know that we've added * something to its peer done queue. */ retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL); if (retval == -1) warn("%s: unable to add peer_done_queue kevent", __func__); else retval = 0; /* * Drop the other thread's lock and reacquire ours. */ pthread_mutex_unlock(&dev->peer_dev->mutex); pthread_mutex_lock(&dev->mutex); return (retval); } /* * Free a buffer that was written out by the writer thread and returned to * the reader thread. */ void camdd_peer_done(struct camdd_buf *buf) { struct camdd_dev *dev; struct camdd_buf_data *data; dev = buf->dev; if (buf->buf_type != CAMDD_BUF_DATA) { errx(1, "%s: should have a data buffer, not an " "indirect buffer", __func__); } data = &buf->buf_type_spec.data; STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links); dev->num_peer_work_queue--; dev->peer_bytes_queued -= (data->fill_len - data->resid); if (buf->status == CAMDD_STATUS_EOF) dev->flags |= CAMDD_DEV_FLAG_PEER_EOF; STAILQ_INSERT_TAIL(&dev->free_queue, buf, links); } /* * Assumes caller holds the lock for this device. */ void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf, int *error_count) { int retval = 0; /* * If we're the reader, we need to send the completed I/O * to the writer. If we're the writer, we need to just * free up resources, or let the reader know if we've * encountered an error. */ if (dev->write_dev == 0) { retval = camdd_queue_peer_buf(dev, buf); if (retval != 0) (*error_count)++; } else { struct camdd_buf *tmp_buf, *next_buf; STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links, next_buf) { struct camdd_buf *src_buf; struct camdd_buf_indirect *indirect; STAILQ_REMOVE(&buf->src_list, tmp_buf, camdd_buf, src_links); tmp_buf->status = buf->status; if (tmp_buf->buf_type == CAMDD_BUF_DATA) { camdd_complete_peer_buf(dev, tmp_buf); continue; } indirect = &tmp_buf->buf_type_spec.indirect; src_buf = indirect->src_buf; src_buf->refcount--; /* * XXX KDM we probably need to account for * exactly how many bytes we were able to * write. Allocate the residual to the * first N buffers? Or just track the * number of bytes written? Right now the reader * doesn't do anything with a residual. */ src_buf->status = buf->status; if (src_buf->refcount <= 0) camdd_complete_peer_buf(dev, src_buf); STAILQ_INSERT_TAIL(&dev->free_indirect_queue, tmp_buf, links); } STAILQ_INSERT_TAIL(&dev->free_queue, buf, links); } } /* * Fetch all completed commands from the pass(4) device. * * Returns the number of commands received, or -1 if any of the commands * completed with an error. Returns 0 if no commands are available. */ int camdd_pass_fetch(struct camdd_dev *dev) { struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass; union ccb ccb; int retval = 0, num_fetched = 0, error_count = 0; pthread_mutex_unlock(&dev->mutex); /* * XXX KDM we don't distinguish between EFAULT and ENOENT. */ while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) { struct camdd_buf *buf; struct camdd_buf_data *data; cam_status ccb_status; union ccb *buf_ccb; buf = ccb.ccb_h.ccb_buf; data = &buf->buf_type_spec.data; buf_ccb = &data->ccb; num_fetched++; /* * Copy the CCB back out so we get status, sense data, etc. */ bcopy(&ccb, buf_ccb, sizeof(ccb)); pthread_mutex_lock(&dev->mutex); /* * We're now done, so take this off the active queue. */ STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links); dev->cur_active_io--; ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK; if (ccb_status != CAM_REQ_CMP) { cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr); } switch (pass_dev->protocol) { case PROTO_SCSI: data->resid = ccb.csio.resid; dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid); break; default: return -1; break; } if (buf->status == CAMDD_STATUS_NONE) buf->status = camdd_ccb_status(&ccb, pass_dev->protocol); if (buf->status == CAMDD_STATUS_ERROR) error_count++; else if (buf->status == CAMDD_STATUS_EOF) { /* * Once we queue this buffer to our partner thread, * he will know that we've hit EOF. */ dev->flags |= CAMDD_DEV_FLAG_EOF; } camdd_complete_buf(dev, buf, &error_count); /* * Unlock in preparation for the ioctl call. */ pthread_mutex_unlock(&dev->mutex); } pthread_mutex_lock(&dev->mutex); if (error_count > 0) return (-1); else return (num_fetched); } /* * Returns -1 for error, 0 for success/continue, and 1 for resource * shortage/stop processing. */ int camdd_file_run(struct camdd_dev *dev) { struct camdd_dev_file *file_dev = &dev->dev_spec.file; struct camdd_buf_data *data; struct camdd_buf *buf; off_t io_offset; int retval = 0, write_dev = dev->write_dev; int error_count = 0, no_resources = 0, double_buf_needed = 0; uint32_t num_sectors = 0, db_len = 0; buf = STAILQ_FIRST(&dev->run_queue); if (buf == NULL) { no_resources = 1; goto bailout; } else if ((dev->write_dev == 0) && (dev->flags & (CAMDD_DEV_FLAG_EOF | CAMDD_DEV_FLAG_EOF_SENT))) { STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links); dev->num_run_queue--; buf->status = CAMDD_STATUS_EOF; error_count++; goto bailout; } /* * If we're writing, we need to go through the source buffer list * and create an S/G list. */ if (write_dev != 0) { retval = camdd_buf_sg_create(buf, /*iovec*/ 1, dev->sector_size, &num_sectors, &double_buf_needed); if (retval != 0) { no_resources = 1; goto bailout; } } STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links); dev->num_run_queue--; data = &buf->buf_type_spec.data; /* * pread(2) and pwrite(2) offsets are byte offsets. */ io_offset = buf->lba * dev->sector_size; /* * Unlock the mutex while we read or write. */ pthread_mutex_unlock(&dev->mutex); /* * Note that we don't need to double buffer if we're the reader * because in that case, we have allocated a single buffer of * sufficient size to do the read. This copy is necessary on * writes because if one of the components of the S/G list is not * a sector size multiple, the kernel will reject the write. This * is unfortunate but not surprising. So this will make sure that * we're using a single buffer that is a multiple of the sector size. */ if ((double_buf_needed != 0) && (data->sg_count > 1) && (write_dev != 0)) { uint32_t cur_offset; int i; if (file_dev->tmp_buf == NULL) file_dev->tmp_buf = calloc(dev->blocksize, 1); if (file_dev->tmp_buf == NULL) { buf->status = CAMDD_STATUS_ERROR; error_count++; pthread_mutex_lock(&dev->mutex); goto bailout; } for (i = 0, cur_offset = 0; i < data->sg_count; i++) { bcopy(data->iovec[i].iov_base, &file_dev->tmp_buf[cur_offset], data->iovec[i].iov_len); cur_offset += data->iovec[i].iov_len; } db_len = cur_offset; } if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) { if (write_dev == 0) { /* * XXX KDM is there any way we would need a S/G * list here? */ retval = pread(file_dev->fd, data->buf, buf->len, io_offset); } else { if (double_buf_needed != 0) { retval = pwrite(file_dev->fd, file_dev->tmp_buf, db_len, io_offset); } else if (data->sg_count == 0) { retval = pwrite(file_dev->fd, data->buf, data->fill_len, io_offset); } else { retval = pwritev(file_dev->fd, data->iovec, data->sg_count, io_offset); } } } else { if (write_dev == 0) { /* * XXX KDM is there any way we would need a S/G * list here? */ retval = read(file_dev->fd, data->buf, buf->len); } else { if (double_buf_needed != 0) { retval = write(file_dev->fd, file_dev->tmp_buf, db_len); } else if (data->sg_count == 0) { retval = write(file_dev->fd, data->buf, data->fill_len); } else { retval = writev(file_dev->fd, data->iovec, data->sg_count); } } } /* We're done, re-acquire the lock */ pthread_mutex_lock(&dev->mutex); if (retval >= (ssize_t)data->fill_len) { /* * If the bytes transferred is more than the request size, * that indicates an overrun, which should only happen at * the end of a transfer if we have to round up to a sector * boundary. */ if (buf->status == CAMDD_STATUS_NONE) buf->status = CAMDD_STATUS_OK; data->resid = 0; dev->bytes_transferred += retval; } else if (retval == -1) { warn("Error %s %s", (write_dev) ? "writing to" : "reading from", file_dev->filename); buf->status = CAMDD_STATUS_ERROR; data->resid = data->fill_len; error_count++; if (dev->debug == 0) goto bailout; if ((double_buf_needed != 0) && (write_dev != 0)) { fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju " "offset %ju\n", __func__, file_dev->fd, file_dev->tmp_buf, db_len, (uintmax_t)buf->lba, (uintmax_t)io_offset); } else if (data->sg_count == 0) { fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju " "offset %ju\n", __func__, file_dev->fd, data->buf, data->fill_len, (uintmax_t)buf->lba, (uintmax_t)io_offset); } else { int i; fprintf(stderr, "%s: fd %d, len %u, lba %ju " "offset %ju\n", __func__, file_dev->fd, data->fill_len, (uintmax_t)buf->lba, (uintmax_t)io_offset); for (i = 0; i < data->sg_count; i++) { fprintf(stderr, "index %d ptr %p len %zu\n", i, data->iovec[i].iov_base, data->iovec[i].iov_len); } } } else if (retval == 0) { buf->status = CAMDD_STATUS_EOF; if (dev->debug != 0) printf("%s: got EOF from %s!\n", __func__, file_dev->filename); data->resid = data->fill_len; error_count++; } else if (retval < (ssize_t)data->fill_len) { if (buf->status == CAMDD_STATUS_NONE) buf->status = CAMDD_STATUS_SHORT_IO; data->resid = data->fill_len - retval; dev->bytes_transferred += retval; } bailout: if (buf != NULL) { if (buf->status == CAMDD_STATUS_EOF) { struct camdd_buf *buf2; dev->flags |= CAMDD_DEV_FLAG_EOF; STAILQ_FOREACH(buf2, &dev->run_queue, links) buf2->status = CAMDD_STATUS_EOF; } camdd_complete_buf(dev, buf, &error_count); } if (error_count != 0) return (-1); else if (no_resources != 0) return (1); else return (0); } /* * Execute one command from the run queue. Returns 0 for success, 1 for * stop processing, and -1 for error. */ int camdd_pass_run(struct camdd_dev *dev) { struct camdd_buf *buf = NULL; struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass; struct camdd_buf_data *data; uint32_t num_blocks, sectors_used = 0; union ccb *ccb; int retval = 0, is_write = dev->write_dev; int double_buf_needed = 0; buf = STAILQ_FIRST(&dev->run_queue); if (buf == NULL) { retval = 1; goto bailout; } /* * If we're writing, we need to go through the source buffer list * and create an S/G list. */ if (is_write != 0) { retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size, §ors_used, &double_buf_needed); if (retval != 0) { retval = -1; goto bailout; } } STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links); dev->num_run_queue--; data = &buf->buf_type_spec.data; /* * In almost every case the number of blocks should be the device * block size. The exception may be at the end of an I/O stream * for a partial block or at the end of a device. */ if (is_write != 0) num_blocks = sectors_used; else num_blocks = data->fill_len / pass_dev->block_len; ccb = &data->ccb; switch (pass_dev->protocol) { case PROTO_SCSI: CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio); scsi_read_write(&ccb->csio, /*retries*/ dev->retry_count, /*cbfcnp*/ NULL, /*tag_action*/ MSG_SIMPLE_Q_TAG, /*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ : SCSI_RW_WRITE, /*byte2*/ 0, /*minimum_cmd_size*/ dev->min_cmd_size, /*lba*/ buf->lba, /*block_count*/ num_blocks, /*data_ptr*/ (data->sg_count != 0) ? (uint8_t *)data->segs : data->buf, /*dxfer_len*/ (num_blocks * pass_dev->block_len), /*sense_len*/ SSD_FULL_SIZE, /*timeout*/ dev->io_timeout); if (data->sg_count != 0) { ccb->csio.sglist_cnt = data->sg_count; } break; default: retval = -1; goto bailout; } /* Disable freezing the device queue */ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS; if (dev->retry_count != 0) ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER; if (data->sg_count != 0) { ccb->ccb_h.flags |= CAM_DATA_SG; } /* * Store a pointer to the buffer in the CCB. The kernel will * restore this when we get it back, and we'll use it to identify * the buffer this CCB came from. */ ccb->ccb_h.ccb_buf = buf; /* * Unlock our mutex in preparation for issuing the ioctl. */ pthread_mutex_unlock(&dev->mutex); /* * Queue the CCB to the pass(4) driver. */ if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) { pthread_mutex_lock(&dev->mutex); warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__, pass_dev->dev->device_name, pass_dev->dev->dev_unit_num); warn("%s: CCB address is %p", __func__, ccb); retval = -1; STAILQ_INSERT_TAIL(&dev->free_queue, buf, links); } else { pthread_mutex_lock(&dev->mutex); dev->cur_active_io++; STAILQ_INSERT_TAIL(&dev->active_queue, buf, links); } bailout: return (retval); } int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len) { struct camdd_dev_pass *pass_dev; uint32_t num_blocks; int retval = 0; pass_dev = &dev->dev_spec.pass; *lba = dev->next_io_pos_bytes / dev->sector_size; *len = dev->blocksize; num_blocks = *len / dev->sector_size; /* * If max_sector is 0, then we have no set limit. This can happen * if we're writing to a file in a filesystem, or reading from * something like /dev/zero. */ if ((dev->max_sector != 0) || (dev->sector_io_limit != 0)) { uint64_t max_sector; if ((dev->max_sector != 0) && (dev->sector_io_limit != 0)) max_sector = min(dev->sector_io_limit, dev->max_sector); else if (dev->max_sector != 0) max_sector = dev->max_sector; else max_sector = dev->sector_io_limit; /* * Check to see whether we're starting off past the end of * the device. If so, we need to just send an EOF * notification to the writer. */ if (*lba > max_sector) { *len = 0; retval = 1; } else if (((*lba + num_blocks) > max_sector + 1) || ((*lba + num_blocks) < *lba)) { /* * If we get here (but pass the first check), we * can trim the request length down to go to the * end of the device. */ num_blocks = (max_sector + 1) - *lba; *len = num_blocks * dev->sector_size; retval = 1; } } dev->next_io_pos_bytes += *len; return (retval); } /* * Returns 0 for success, 1 for EOF detected, and -1 for failure. */ int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf) { struct camdd_buf *buf = NULL; struct camdd_buf_data *data; struct camdd_dev_pass *pass_dev; size_t new_len; struct camdd_buf_data *rb_data; int is_write = dev->write_dev; int eof_flush_needed = 0; int retval = 0; int error; pass_dev = &dev->dev_spec.pass; /* * If we've gotten EOF or our partner has, we should not continue * queueing I/O. If we're a writer, though, we should continue * to write any buffers that don't have EOF status. */ if ((dev->flags & CAMDD_DEV_FLAG_EOF) || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF) && (is_write == 0))) { /* * Tell the worker thread that we have seen EOF. */ retval = 1; /* * If we're the writer, send the buffer back with EOF status. */ if (is_write) { read_buf->status = CAMDD_STATUS_EOF; error = camdd_complete_peer_buf(dev, read_buf); } goto bailout; } if (is_write == 0) { buf = camdd_get_buf(dev, CAMDD_BUF_DATA); if (buf == NULL) { retval = -1; goto bailout; } data = &buf->buf_type_spec.data; retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len); if (retval != 0) { buf->status = CAMDD_STATUS_EOF; if ((buf->len == 0) && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT | CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) { camdd_release_buf(buf); goto bailout; } dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED; } data->fill_len = buf->len; data->src_start_offset = buf->lba * dev->sector_size; /* * Put this on the run queue. */ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links); dev->num_run_queue++; /* We're done. */ goto bailout; } /* * Check for new EOF status from the reader. */ if ((read_buf->status == CAMDD_STATUS_EOF) || (read_buf->status == CAMDD_STATUS_ERROR)) { dev->flags |= CAMDD_DEV_FLAG_PEER_EOF; if ((STAILQ_FIRST(&dev->pending_queue) == NULL) && (read_buf->len == 0)) { camdd_complete_peer_buf(dev, read_buf); retval = 1; goto bailout; } else eof_flush_needed = 1; } /* * See if we have a buffer we're composing with pieces from our * partner thread. */ buf = STAILQ_FIRST(&dev->pending_queue); if (buf == NULL) { uint64_t lba; ssize_t len; retval = camdd_get_next_lba_len(dev, &lba, &len); if (retval != 0) { read_buf->status = CAMDD_STATUS_EOF; if (len == 0) { dev->flags |= CAMDD_DEV_FLAG_EOF; error = camdd_complete_peer_buf(dev, read_buf); goto bailout; } } /* * If we don't have a pending buffer, we need to grab a new * one from the free list or allocate another one. */ buf = camdd_get_buf(dev, CAMDD_BUF_DATA); if (buf == NULL) { retval = 1; goto bailout; } buf->lba = lba; buf->len = len; STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links); dev->num_pending_queue++; } data = &buf->buf_type_spec.data; rb_data = &read_buf->buf_type_spec.data; if ((rb_data->src_start_offset != dev->next_peer_pos_bytes) && (dev->debug != 0)) { printf("%s: WARNING: reader offset %#jx != expected offset " "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset, (uintmax_t)dev->next_peer_pos_bytes); } dev->next_peer_pos_bytes = rb_data->src_start_offset + (rb_data->fill_len - rb_data->resid); new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len; if (new_len < buf->len) { /* * There are three cases here: * 1. We need more data to fill up a block, so we put * this I/O on the queue and wait for more I/O. * 2. We have a pending buffer in the queue that is * smaller than our blocksize, but we got an EOF. So we * need to go ahead and flush the write out. * 3. We got an error. */ /* * Increment our fill length. */ data->fill_len += (rb_data->fill_len - rb_data->resid); /* * Add the new read buffer to the list for writing. */ STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links); /* Increment the count */ buf->src_count++; if (eof_flush_needed == 0) { /* * We need to exit, because we don't have enough * data yet. */ goto bailout; } else { /* * Take the buffer off of the pending queue. */ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links); dev->num_pending_queue--; /* * If we need an EOF flush, but there is no data * to flush, go ahead and return this buffer. */ if (data->fill_len == 0) { camdd_complete_buf(dev, buf, /*error_count*/0); retval = 1; goto bailout; } /* * Put this on the next queue for execution. */ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links); dev->num_run_queue++; } } else if (new_len == buf->len) { /* * We have enough data to completey fill one block, * so we're ready to issue the I/O. */ /* * Take the buffer off of the pending queue. */ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links); dev->num_pending_queue--; /* * Add the new read buffer to the list for writing. */ STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links); /* Increment the count */ buf->src_count++; /* * Increment our fill length. */ data->fill_len += (rb_data->fill_len - rb_data->resid); /* * Put this on the next queue for execution. */ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links); dev->num_run_queue++; } else { struct camdd_buf *idb; struct camdd_buf_indirect *indirect; uint32_t len_to_go, cur_offset; idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT); if (idb == NULL) { retval = 1; goto bailout; } indirect = &idb->buf_type_spec.indirect; indirect->src_buf = read_buf; read_buf->refcount++; indirect->offset = 0; indirect->start_ptr = rb_data->buf; /* * We've already established that there is more * data in read_buf than we have room for in our * current write request. So this particular chunk * of the request should just be the remainder * needed to fill up a block. */ indirect->len = buf->len - (data->fill_len - data->resid); camdd_buf_add_child(buf, idb); /* * This buffer is ready to execute, so we can take * it off the pending queue and put it on the run * queue. */ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links); dev->num_pending_queue--; STAILQ_INSERT_TAIL(&dev->run_queue, buf, links); dev->num_run_queue++; cur_offset = indirect->offset + indirect->len; /* * The resulting I/O would be too large to fit in * one block. We need to split this I/O into * multiple pieces. Allocate as many buffers as needed. */ for (len_to_go = rb_data->fill_len - rb_data->resid - indirect->len; len_to_go > 0;) { struct camdd_buf *new_buf; struct camdd_buf_data *new_data; uint64_t lba; ssize_t len; retval = camdd_get_next_lba_len(dev, &lba, &len); if ((retval != 0) && (len == 0)) { /* * The device has already been marked * as EOF, and there is no space left. */ goto bailout; } new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA); if (new_buf == NULL) { retval = 1; goto bailout; } new_buf->lba = lba; new_buf->len = len; idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT); if (idb == NULL) { retval = 1; goto bailout; } indirect = &idb->buf_type_spec.indirect; indirect->src_buf = read_buf; read_buf->refcount++; indirect->offset = cur_offset; indirect->start_ptr = rb_data->buf + cur_offset; indirect->len = min(len_to_go, new_buf->len); #if 0 if (((indirect->len % dev->sector_size) != 0) || ((indirect->offset % dev->sector_size) != 0)) { warnx("offset %ju len %ju not aligned with " "sector size %u", indirect->offset, (uintmax_t)indirect->len, dev->sector_size); } #endif cur_offset += indirect->len; len_to_go -= indirect->len; camdd_buf_add_child(new_buf, idb); new_data = &new_buf->buf_type_spec.data; if ((new_data->fill_len == new_buf->len) || (eof_flush_needed != 0)) { STAILQ_INSERT_TAIL(&dev->run_queue, new_buf, links); dev->num_run_queue++; } else if (new_data->fill_len < buf->len) { STAILQ_INSERT_TAIL(&dev->pending_queue, new_buf, links); dev->num_pending_queue++; } else { warnx("%s: too much data in new " "buffer!", __func__); retval = 1; goto bailout; } } } bailout: return (retval); } void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth, uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes) { *our_depth = dev->cur_active_io + dev->num_run_queue; if (dev->num_peer_work_queue > dev->num_peer_done_queue) *peer_depth = dev->num_peer_work_queue - dev->num_peer_done_queue; else *peer_depth = 0; *our_bytes = *our_depth * dev->blocksize; *peer_bytes = dev->peer_bytes_queued; } void camdd_sig_handler(int sig) { if (sig == SIGINFO) need_status = 1; else { need_exit = 1; error_exit = 1; } sem_post(&camdd_sem); } void camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, struct timespec *start_time) { struct timespec done_time; uint64_t total_ns; long double mb_sec, total_sec; int error = 0; error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time); if (error != 0) { warn("Unable to get done time"); return; } timespecsub(&done_time, start_time, &done_time); total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000); total_sec = total_ns; total_sec /= 1000000000; fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n" "%.4Lf seconds elapsed\n", (uintmax_t)camdd_dev->bytes_transferred, (camdd_dev->write_dev == 0) ? "read from" : "written to", camdd_dev->device_name, (uintmax_t)other_dev->bytes_transferred, (other_dev->write_dev == 0) ? "read from" : "written to", other_dev->device_name, total_sec); mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred); mb_sec /= 1024 * 1024; mb_sec *= 1000000000; mb_sec /= total_ns; fprintf(stderr, "%.2Lf MB/sec\n", mb_sec); } int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io, int retry_count, int timeout) { struct cam_device *new_cam_dev = NULL; struct camdd_dev *devs[2]; struct timespec start_time; pthread_t threads[2]; int unit = 0; int error = 0; int i; if (num_io_opts != 2) { warnx("Must have one input and one output path"); error = 1; goto bailout; } bzero(devs, sizeof(devs)); for (i = 0; i < num_io_opts; i++) { switch (io_opts[i].dev_type) { case CAMDD_DEV_PASS: { if (isdigit(io_opts[i].dev_name[0])) { camdd_argmask new_arglist = CAMDD_ARG_NONE; int bus = 0, target = 0, lun = 0; int rv; /* device specified as bus:target[:lun] */ rv = parse_btl(io_opts[i].dev_name, &bus, &target, &lun, &new_arglist); if (rv < 2) { warnx("numeric device specification " "must be either bus:target, or " "bus:target:lun"); error = 1; goto bailout; } /* default to 0 if lun was not specified */ if ((new_arglist & CAMDD_ARG_LUN) == 0) { lun = 0; new_arglist |= CAMDD_ARG_LUN; } new_cam_dev = cam_open_btl(bus, target, lun, O_RDWR, NULL); } else { char name[30]; if (cam_get_device(io_opts[i].dev_name, name, sizeof name, &unit) == -1) { warnx("%s", cam_errbuf); error = 1; goto bailout; } new_cam_dev = cam_open_spec_device(name, unit, O_RDWR, NULL); } if (new_cam_dev == NULL) { warnx("%s", cam_errbuf); error = 1; goto bailout; } devs[i] = camdd_probe_pass(new_cam_dev, /*io_opts*/ &io_opts[i], CAMDD_ARG_ERR_RECOVER, /*probe_retry_count*/ 3, /*probe_timeout*/ 5000, /*io_retry_count*/ retry_count, /*io_timeout*/ timeout); if (devs[i] == NULL) { warn("Unable to probe device %s%u", new_cam_dev->device_name, new_cam_dev->dev_unit_num); error = 1; goto bailout; } break; } case CAMDD_DEV_FILE: { int fd = -1; if (io_opts[i].dev_name[0] == '-') { if (io_opts[i].write_dev != 0) fd = STDOUT_FILENO; else fd = STDIN_FILENO; } else { if (io_opts[i].write_dev != 0) { fd = open(io_opts[i].dev_name, O_RDWR | O_CREAT, S_IWUSR |S_IRUSR); } else { fd = open(io_opts[i].dev_name, O_RDONLY); } } if (fd == -1) { warn("error opening file %s", io_opts[i].dev_name); error = 1; goto bailout; } devs[i] = camdd_probe_file(fd, &io_opts[i], retry_count, timeout); if (devs[i] == NULL) { error = 1; goto bailout; } break; } default: warnx("Unknown device type %d (%s)", io_opts[i].dev_type, io_opts[i].dev_name); error = 1; goto bailout; break; /*NOTREACHED */ } devs[i]->write_dev = io_opts[i].write_dev; devs[i]->start_offset_bytes = io_opts[i].offset; if (max_io != 0) { devs[i]->sector_io_limit = (devs[i]->start_offset_bytes / devs[i]->sector_size) + (max_io / devs[i]->sector_size) - 1; } devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes; devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes; } devs[0]->peer_dev = devs[1]; devs[1]->peer_dev = devs[0]; devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes; devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes; sem_init(&camdd_sem, /*pshared*/ 0, 0); signal(SIGINFO, camdd_sig_handler); signal(SIGINT, camdd_sig_handler); error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time); if (error != 0) { warn("Unable to get start time"); goto bailout; } for (i = 0; i < num_io_opts; i++) { error = pthread_create(&threads[i], NULL, camdd_worker, (void *)devs[i]); if (error != 0) { warnc(error, "pthread_create() failed"); goto bailout; } } for (;;) { if ((sem_wait(&camdd_sem) == -1) || (need_exit != 0)) { struct kevent ke; for (i = 0; i < num_io_opts; i++) { EV_SET(&ke, (uintptr_t)&devs[i]->work_queue, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); devs[i]->flags |= CAMDD_DEV_FLAG_EOF; error = kevent(devs[i]->kq, &ke, 1, NULL, 0, NULL); if (error == -1) warn("%s: unable to wake up thread", __func__); error = 0; } break; } else if (need_status != 0) { camdd_print_status(devs[0], devs[1], &start_time); need_status = 0; } } for (i = 0; i < num_io_opts; i++) { pthread_join(threads[i], NULL); } camdd_print_status(devs[0], devs[1], &start_time); bailout: for (i = 0; i < num_io_opts; i++) camdd_free_dev(devs[i]); return (error + error_exit); } void usage(void) { fprintf(stderr, "usage: camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n" " <-i|-o file=/tmp/file,bs=512K,offset=1M>\n" " <-i|-o file=/dev/da0,bs=512K,offset=1M>\n" " <-i|-o file=/dev/nsa0,bs=512K>\n" " [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n" "Option description\n" "-i Specify input device/file and parameters\n" "-o Specify output device/file and parameters\n" "Input and Output parameters\n" "pass=name Specify a pass(4) device like pass0 or /dev/pass0\n" "file=name Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n" " or - for stdin/stdout\n" "bs=blocksize Specify blocksize in bytes, or using K, M, G, etc. suffix\n" "offset=len Specify starting offset in bytes or using K, M, G suffix\n" " NOTE: offset cannot be specified on tapes, pipes, stdin/out\n" "depth=N Specify a numeric queue depth. This only applies to pass(4)\n" "mcs=N Specify a minimum cmd size for pass(4) read/write commands\n" "Optional arguments\n" "-C retry_cnt Specify a retry count for pass(4) devices\n" "-E Enable CAM error recovery for pass(4) devices\n" "-m max_io Specify the maximum amount to be transferred in bytes or\n" " using K, G, M, etc. suffixes\n" "-t timeout Specify the I/O timeout to use with pass(4) devices\n" "-v Enable verbose error recovery\n" "-h Print this message\n"); } int camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts) { char *tmpstr, *tmpstr2; char *orig_tmpstr = NULL; int retval = 0; io_opts->write_dev = is_write; tmpstr = strdup(args); if (tmpstr == NULL) { warn("strdup failed"); retval = 1; goto bailout; } orig_tmpstr = tmpstr; while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) { char *name, *value; /* * If the user creates an empty parameter by putting in two * commas, skip over it and look for the next field. */ if (*tmpstr2 == '\0') continue; name = strsep(&tmpstr2, "="); if (*name == '\0') { warnx("Got empty I/O parameter name"); retval = 1; goto bailout; } value = strsep(&tmpstr2, "="); if ((value == NULL) || (*value == '\0')) { warnx("Empty I/O parameter value for %s", name); retval = 1; goto bailout; } if (strncasecmp(name, "file", 4) == 0) { io_opts->dev_type = CAMDD_DEV_FILE; io_opts->dev_name = strdup(value); if (io_opts->dev_name == NULL) { warn("Error allocating memory"); retval = 1; goto bailout; } } else if (strncasecmp(name, "pass", 4) == 0) { io_opts->dev_type = CAMDD_DEV_PASS; io_opts->dev_name = strdup(value); if (io_opts->dev_name == NULL) { warn("Error allocating memory"); retval = 1; goto bailout; } } else if ((strncasecmp(name, "bs", 2) == 0) || (strncasecmp(name, "blocksize", 9) == 0)) { retval = expand_number(value, &io_opts->blocksize); if (retval == -1) { warn("expand_number(3) failed on %s=%s", name, value); retval = 1; goto bailout; } } else if (strncasecmp(name, "depth", 5) == 0) { char *endptr; io_opts->queue_depth = strtoull(value, &endptr, 0); if (*endptr != '\0') { warnx("invalid queue depth %s", value); retval = 1; goto bailout; } } else if (strncasecmp(name, "mcs", 3) == 0) { char *endptr; io_opts->min_cmd_size = strtol(value, &endptr, 0); if ((*endptr != '\0') || ((io_opts->min_cmd_size > 16) || (io_opts->min_cmd_size < 0))) { warnx("invalid minimum cmd size %s", value); retval = 1; goto bailout; } } else if (strncasecmp(name, "offset", 6) == 0) { retval = expand_number(value, &io_opts->offset); if (retval == -1) { warn("expand_number(3) failed on %s=%s", name, value); retval = 1; goto bailout; } } else if (strncasecmp(name, "debug", 5) == 0) { char *endptr; io_opts->debug = strtoull(value, &endptr, 0); if (*endptr != '\0') { warnx("invalid debug level %s", value); retval = 1; goto bailout; } } else { warnx("Unrecognized parameter %s=%s", name, value); } } bailout: free(orig_tmpstr); return (retval); } int main(int argc, char **argv) { int c; camdd_argmask arglist = CAMDD_ARG_NONE; int timeout = 0, retry_count = 1; int error = 0; uint64_t max_io = 0; struct camdd_io_opts *opt_list = NULL; if (argc == 1) { usage(); exit(1); } opt_list = calloc(2, sizeof(struct camdd_io_opts)); if (opt_list == NULL) { warn("Unable to allocate option list"); error = 1; goto bailout; } while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){ switch (c) { case 'C': retry_count = strtol(optarg, NULL, 0); if (retry_count < 0) errx(1, "retry count %d is < 0", retry_count); arglist |= CAMDD_ARG_RETRIES; break; case 'E': arglist |= CAMDD_ARG_ERR_RECOVER; break; case 'i': case 'o': if (((c == 'i') && (opt_list[0].dev_type != CAMDD_DEV_NONE)) || ((c == 'o') && (opt_list[1].dev_type != CAMDD_DEV_NONE))) { errx(1, "Only one input and output path " "allowed"); } error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0, (c == 'o') ? &opt_list[1] : &opt_list[0]); if (error != 0) goto bailout; break; case 'm': error = expand_number(optarg, &max_io); if (error == -1) { warn("invalid maximum I/O amount %s", optarg); error = 1; goto bailout; } break; case 't': timeout = strtol(optarg, NULL, 0); if (timeout < 0) errx(1, "invalid timeout %d", timeout); /* Convert the timeout from seconds to ms */ timeout *= 1000; arglist |= CAMDD_ARG_TIMEOUT; break; case 'v': arglist |= CAMDD_ARG_VERBOSE; break; case 'h': default: usage(); exit(1); break; /*NOTREACHED*/ } } if ((opt_list[0].dev_type == CAMDD_DEV_NONE) || (opt_list[1].dev_type == CAMDD_DEV_NONE)) errx(1, "Must specify both -i and -o"); /* * Set the timeout if the user hasn't specified one. */ if (timeout == 0) timeout = CAMDD_PASS_RW_TIMEOUT; error = camdd_rw(opt_list, 2, max_io, retry_count, timeout); bailout: free(opt_list); exit(error); }