diff --git a/sys/arm/arm/busdma_machdep.c b/sys/arm/arm/busdma_machdep.c --- a/sys/arm/arm/busdma_machdep.c +++ b/sys/arm/arm/busdma_machdep.c @@ -318,7 +318,7 @@ alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) { - return (addr & (dmat->alignment - 1)); + return (!vm_addr_align_ok(addr, dmat->alignment)); } /* @@ -1007,18 +1007,13 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->boundary - 1); - if (dmat->boundary > 0) { - baddr = (curaddr + dmat->boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary)) + sgsize = roundup2(curaddr, dmat->boundary) - curaddr; /* * Insert chunk into a segment, coalescing with @@ -1032,8 +1027,8 @@ } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && - (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->boundary)) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c --- a/sys/arm64/arm64/busdma_bounce.c +++ b/sys/arm64/arm64/busdma_bounce.c @@ -197,7 +197,7 @@ alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) { - return ((addr & (dmat->common.alignment - 1)) != 0); + return (!vm_addr_align_ok(addr, dmat->common.alignment)); } static bool @@ -616,7 +616,7 @@ __func__, dmat, dmat->common.flags, ENOMEM); free(*mapp, M_DEVBUF); return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->alloc_alignment - 1)) { + } else if (!vm_addr_align_ok(vtophys(*vaddr), dmat->alloc_alignment)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } dmat->map_count++; @@ -767,18 +767,13 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->common.boundary - 1); - if (dmat->common.boundary > 0) { - baddr = (curaddr + dmat->common.boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->common.boundary)) + sgsize = roundup2(curaddr, dmat->common.boundary) - curaddr; /* * Insert chunk into a segment, coalescing with @@ -792,8 +787,8 @@ } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && - (dmat->common.boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->common.boundary)) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -619,8 +619,8 @@ if (buflen1 > tag->common.maxsegsz) buflen1 = tag->common.maxsegsz; - KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) - == 0, + KASSERT(vm_addr_align_ok(entry->start + offset, + tag->common.alignment), ("alignment failed: ctx %p start 0x%jx offset %x " "align 0x%jx", ctx, (uintmax_t)entry->start, offset, (uintmax_t)tag->common.alignment)); @@ -631,7 +631,7 @@ (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.lowaddr, (uintmax_t)tag->common.highaddr)); - KASSERT(iommu_test_boundary(entry->start + offset, buflen1, + KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, tag->common.boundary), ("boundary failed: ctx %p start 0x%jx end 0x%jx " "boundary 0x%jx", ctx, (uintmax_t)entry->start, diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -148,16 +148,6 @@ #define IOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->lock) #define IOMMU_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->lock, MA_OWNED) -static inline bool -iommu_test_boundary(iommu_gaddr_t start, iommu_gaddr_t size, - iommu_gaddr_t boundary) -{ - - if (boundary == 0) - return (true); - return (start + size <= ((start + boundary) & ~(boundary - 1))); -} - void iommu_free_ctx(struct iommu_ctx *ctx); void iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *ctx); struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev, diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -314,7 +314,7 @@ return (false); /* No boundary crossing. */ - if (iommu_test_boundary(a->entry->start + a->offset, a->size, + if (vm_addr_bound_ok(a->entry->start + a->offset, a->size, a->common->boundary)) return (true); diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/mips/mips/busdma_machdep.c copy from sys/powerpc/powerpc/busdma_machdep.c copy to sys/mips/mips/busdma_machdep.c --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/mips/mips/busdma_machdep.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 1997, 1998 Justin T. Gibbs. + * Copyright (c) 2006 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,77 +24,99 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - */ - -/* - * From amd64/busdma_machdep.c, r204214 + * + * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); +/* + * MIPS bus dma support routines + */ + #include #include #include #include +#include #include -#include -#include #include #include #include #include +#include +#include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include +#include #include -#include "iommu_if.h" +#define MAX_BPAGES 64 +#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 +#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 -#define MAX_BPAGES MIN(8192, physmem/40) +/* + * On XBurst cores from Ingenic, cache-line writeback is local + * only, unless accompanied by invalidation. Invalidations force + * dirty line writeout and invalidation requests forwarded to + * other cores if other cores have the cache line dirty. + */ +#if defined(SMP) && defined(CPU_XBURST) +#define BUS_DMA_FORCE_WBINV +#endif struct bounce_zone; struct bus_dma_tag { - bus_dma_tag_t parent; - bus_size_t alignment; - bus_addr_t boundary; - bus_addr_t lowaddr; - bus_addr_t highaddr; - bus_dma_filter_t *filter; - void *filterarg; - bus_size_t maxsize; - bus_size_t maxsegsz; - u_int nsegments; - int flags; - int ref_count; - int map_count; - bus_dma_lock_t *lockfunc; - void *lockfuncarg; + bus_dma_tag_t parent; + bus_size_t alignment; + bus_addr_t boundary; + bus_addr_t lowaddr; + bus_addr_t highaddr; + bus_dma_filter_t *filter; + void *filterarg; + bus_size_t maxsize; + u_int nsegments; + bus_size_t maxsegsz; + int flags; + int ref_count; + int map_count; + bus_dma_lock_t *lockfunc; + void *lockfuncarg; + bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; - device_t iommu; - void *iommu_cookie; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ + vm_offset_t vaddr_nocache; /* kva of bounce buffer uncached */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ - vm_page_t datapage; /* physical page of client data */ - vm_offset_t dataoffs; /* page offset of client data */ + bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; +struct sync_list { + vm_offset_t vaddr; /* kva of bounce buffer */ + bus_addr_t busaddr; /* Physical address */ + bus_size_t datacount; /* client data count */ +}; + struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; @@ -119,23 +141,27 @@ static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static void *busdma_ih; -static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, +static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); +#define DMAMAP_UNCACHEABLE 0x08 +#define DMAMAP_CACHE_ALIGNED 0x10 + struct bus_dmamap { - struct bp_list bpages; - int pagesneeded; - int pagesreserved; - bus_dma_tag_t dmat; - struct memdesc mem; - bus_dma_segment_t *segments; - int nsegs; - bus_dmamap_callback_t *callback; - void *callback_arg; + struct bp_list bpages; + int pagesneeded; + int pagesreserved; + bus_dma_tag_t dmat; + struct memdesc mem; + int flags; + TAILQ_ENTRY(bus_dmamap) freelist; STAILQ_ENTRY(bus_dmamap) links; - int contigalloc; + bus_dmamap_callback_t *callback; + void *callback_arg; + int sync_count; + struct sync_list *slist; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; @@ -150,7 +176,80 @@ vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); -static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); + +/* Default tag, as most drivers provide no parent tag. */ +bus_dma_tag_t mips_root_dma_tag; + +static uma_zone_t dmamap_zone; /* Cache of struct bus_dmamap items */ + +static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ +static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ + +MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); +MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); + +/* + * This is the ctor function passed to uma_zcreate() for the pool of dma maps. + * It'll need platform-specific changes if this code is copied. + */ +static int +dmamap_ctor(void *mem, int size, void *arg, int flags) +{ + bus_dmamap_t map; + bus_dma_tag_t dmat; + + map = (bus_dmamap_t)mem; + dmat = (bus_dma_tag_t)arg; + + dmat->map_count++; + + bzero(map, sizeof(*map)); + map->dmat = dmat; + STAILQ_INIT(&map->bpages); + + return (0); +} + +/* + * This is the dtor function passed to uma_zcreate() for the pool of dma maps. + * It may need platform-specific changes if this code is copied . + */ +static void +dmamap_dtor(void *mem, int size, void *arg) +{ + bus_dmamap_t map; + + map = (bus_dmamap_t)mem; + + map->dmat->map_count--; +} + +static void +busdma_init(void *dummy) +{ + + /* Create a cache of maps for bus_dmamap_create(). */ + dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap), + dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); + + /* Create a cache of buffers in standard (cacheable) memory. */ + standard_allocator = busdma_bufalloc_create("buffer", + mips_dcache_max_linesize, /* minimum_alignment */ + NULL, /* uma_alloc func */ + NULL, /* uma_free func */ + 0); /* uma_zcreate_flags */ + + /* + * Create a cache of buffers in uncacheable memory, to implement the + * BUS_DMA_COHERENT flag. + */ + coherent_allocator = busdma_bufalloc_create("coherent", + mips_dcache_max_linesize, /* minimum_alignment */ + busdma_bufalloc_alloc_uncacheable, + busdma_bufalloc_free_uncacheable, + 0); /* uma_zcreate_flags */ +} +SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_FOURTH, busdma_init, NULL); /* * Return true if a match is made. @@ -160,7 +259,7 @@ * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ -static __inline int +static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; @@ -168,21 +267,34 @@ retval = 0; do { - if (dmat->filter == NULL && dmat->iommu == NULL && - paddr > dmat->lowaddr && paddr <= dmat->highaddr) - retval = 1; - if (dmat->filter == NULL && - (paddr & (dmat->alignment - 1)) != 0) - retval = 1; - if (dmat->filter != NULL && - (*dmat->filter)(dmat->filterarg, paddr) != 0) + if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) + || !vm_addr_align_ok(paddr, dmat->alignment)) + && (dmat->filter == NULL + || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; - dmat = dmat->parent; + dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } +/* + * Check to see if the specified page is in an allowed DMA range. + */ + +static __inline int +_bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) +{ + int i; + for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { + if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) + || (lowaddr < phys_avail[i] && + highaddr > phys_avail[i])) + return (1); + } + return (0); +} + /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). @@ -214,38 +326,59 @@ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { +#ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); +#else + printf("DRIVER_ERROR: busdma dflt_lock called\n"); +#endif +} + +static __inline bus_dmamap_t +_busdma_alloc_dmamap(bus_dma_tag_t dmat) +{ + struct sync_list *slist; + bus_dmamap_t map; + + slist = malloc(sizeof(*slist) * dmat->nsegments, M_BUSDMA, M_NOWAIT); + if (slist == NULL) + return (NULL); + map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT); + if (map != NULL) + map->slist = slist; + else + free(slist, M_BUSDMA); + return (map); +} + +static __inline void +_busdma_free_dmamap(bus_dmamap_t map) +{ + + free(map->slist, M_BUSDMA); + uma_zfree(dmamap_zone, map); } -#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 -#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * Allocate a device specific dma_tag. */ +#define SEG_NB 1024 + int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, - bus_addr_t boundary, bus_addr_t lowaddr, - bus_addr_t highaddr, bus_dma_filter_t *filter, - void *filterarg, bus_size_t maxsize, int nsegments, - bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, - void *lockfuncarg, bus_dma_tag_t *dmat) + bus_addr_t boundary, bus_addr_t lowaddr, + bus_addr_t highaddr, bus_dma_filter_t *filter, + void *filterarg, bus_size_t maxsize, int nsegments, + bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, + void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; - - /* Basic sanity checking */ - if (boundary != 0 && boundary < maxsegsz) - maxsegsz = boundary; - - if (maxsegsz == 0) { - return (EINVAL); - } - /* Return a NULL tag on failure */ *dmat = NULL; + if (!parent) + parent = mips_root_dma_tag; - newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, - M_ZERO | M_NOWAIT); + newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); @@ -255,14 +388,16 @@ newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; - newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); - newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); + newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); + newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; + if (cpuinfo.cache_coherent_dma) + newtag->flags |= BUS_DMA_COHERENT; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { @@ -272,35 +407,36 @@ newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } + newtag->segments = NULL; - /* Take into account any restrictions imposed by our parent tag */ + /* + * Take into account any restrictions imposed by our parent tag + */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) - newtag->boundary = MIN(parent->boundary, - newtag->boundary); + newtag->boundary = + MIN(parent->boundary, newtag->boundary); + if ((newtag->filter != NULL) || + ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) + newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* - * Short circuit looking at our parent directly - * since we have encapsulated all of its information - */ + * Short circuit looking at our parent directly + * since we have encapsulated all of its information + */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); - newtag->iommu = parent->iommu; - newtag->iommu_cookie = parent->iommu_cookie; } - - if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL) - newtag->flags |= BUS_DMA_COULD_BOUNCE; - - if (newtag->alignment > 1) + if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) + || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && @@ -310,7 +446,7 @@ /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { - free(newtag, M_DEVBUF); + free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; @@ -326,15 +462,15 @@ } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; - } - - if (error != 0) { - free(newtag, M_DEVBUF); - } else { + } else + newtag->bounce_zone = NULL; + if (error != 0) + free(newtag, M_BUSDMA); + else *dmat = newtag; - } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); + return (error); } @@ -368,17 +504,13 @@ int bus_dma_tag_destroy(bus_dma_tag_t dmat) { - bus_dma_tag_t dmat_copy __unused; - int error; - - error = 0; - dmat_copy = dmat; +#ifdef KTR + bus_dma_tag_t dmat_copy = dmat; +#endif if (dmat != NULL) { - if (dmat->map_count != 0) { - error = EBUSY; - goto out; - } + if (dmat->map_count != 0) + return (EBUSY); while (dmat != NULL) { bus_dma_tag_t parent; @@ -386,7 +518,9 @@ parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { - free(dmat, M_DEVBUF); + if (dmat->segments != NULL) + free(dmat->segments, M_BUSDMA); + free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference @@ -397,11 +531,12 @@ dmat = NULL; } } -out: - CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); - return (error); + CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy); + + return (0); } +#include /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. @@ -409,17 +544,26 @@ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { - int error; + bus_dmamap_t newmap; + int error = 0; - error = 0; + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, + M_NOWAIT); + if (dmat->segments == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + } - *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (*mapp == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); + newmap = _busdma_alloc_dmamap(dmat); + if (newmap == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } + *mapp = newmap; /* * Bouncing might be required if the driver asks for an active @@ -432,8 +576,11 @@ int maxpages; if (dmat->bounce_zone == NULL) { - if ((error = alloc_bounce_zone(dmat)) != 0) + if ((error = alloc_bounce_zone(dmat)) != 0) { + _busdma_free_dmamap(newmap); + *mapp = NULL; return (error); + } } bz = dmat->bounce_zone; @@ -444,10 +591,7 @@ * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ - if (dmat->alignment > 1) - maxpages = MAX_BPAGES; - else - maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); + maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; @@ -468,21 +612,10 @@ bz->map_count++; } - (*mapp)->nsegs = 0; - (*mapp)->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - M_NOWAIT); - if ((*mapp)->segments == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - - if (error == 0) - dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); - return (error); + + return (0); } /* @@ -492,20 +625,17 @@ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { - if (dmat->flags & BUS_DMA_COULD_BOUNCE) { - if (STAILQ_FIRST(&map->bpages) != NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, EBUSY); - return (EBUSY); - } - if (dmat->bounce_zone) - dmat->bounce_zone->map_count--; + + if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, EBUSY); + return (EBUSY); } - free(map->segments, M_DEVBUF); - free(map, M_DEVBUF); - dmat->map_count--; + if (dmat->bounce_zone) + dmat->bounce_zone->map_count--; + _busdma_free_dmamap(map); CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); - return (0); + return (0); } /* @@ -514,76 +644,132 @@ * A dmamap to for use with dmamap_load is also allocated. */ int -bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) +bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddrp, int flags, + bus_dmamap_t *mapp) { - vm_memattr_t attr; + bus_dmamap_t newmap = NULL; + busdma_bufalloc_t ba; + struct busdma_bufzone *bufzone; + vm_memattr_t memattr; + void *vaddr; + int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; + if (dmat->segments == NULL) { + dmat->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, + mflags); + if (dmat->segments == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->flags, ENOMEM); + return (ENOMEM); + } + } + + newmap = _busdma_alloc_dmamap(dmat); + if (newmap == NULL) { + CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", + __func__, dmat, dmat->flags, ENOMEM); + return (ENOMEM); + } + + /* + * If all the memory is coherent with DMA then we don't need to + * do anything special for a coherent mapping request. + */ + if (dmat->flags & BUS_DMA_COHERENT) + flags &= ~BUS_DMA_COHERENT; - bus_dmamap_create(dmat, flags, mapp); + if (flags & BUS_DMA_COHERENT) { + memattr = VM_MEMATTR_UNCACHEABLE; + ba = coherent_allocator; + newmap->flags |= DMAMAP_UNCACHEABLE; + } else { + memattr = VM_MEMATTR_DEFAULT; + ba = standard_allocator; + } + /* All buffers we allocate are cache-aligned. */ + newmap->flags |= DMAMAP_CACHE_ALIGNED; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; - if (flags & BUS_DMA_NOCACHE) - attr = VM_MEMATTR_UNCACHEABLE; - else - attr = VM_MEMATTR_DEFAULT; - /* - * XXX: - * (dmat->alignment <= dmat->maxsize) is just a quick hack; the exact - * alignment guarantees of malloc need to be nailed down, and the - * code below should be rewritten to take that into account. - * - * In the meantime, we'll warn the user if malloc gets it wrong. + /* + * Try to find a bufzone in the allocator that holds a cache of buffers + * of the right size for this request. If the buffer is too big to be + * held in the allocator cache, this returns NULL. */ - if ((dmat->maxsize <= PAGE_SIZE) && - (dmat->alignment <= dmat->maxsize) && - dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) && - attr == VM_MEMATTR_DEFAULT) { - *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); + bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); + + /* + * Allocate the buffer from the uma(9) allocator if... + * - It's small enough to be in the allocator (bufzone not NULL). + * - The alignment constraint isn't larger than the allocation size + * (the allocator aligns buffers to their size boundaries). + * - There's no need to handle lowaddr/highaddr exclusion zones. + * else allocate non-contiguous pages if... + * - The page count that could get allocated doesn't exceed + * nsegments also when the maximum segment size is less + * than PAGE_SIZE. + * - The alignment constraint isn't larger than a page boundary. + * - There are no boundary-crossing constraints. + * else allocate a block of contiguous pages because one or more of the + * constraints is something that only the contig allocator can fulfill. + */ + if (bufzone != NULL && dmat->alignment <= bufzone->size && + !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { + vaddr = uma_zalloc(bufzone->umazone, mflags); + } else if (dmat->nsegments >= + howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && + dmat->alignment <= PAGE_SIZE && + (dmat->boundary % PAGE_SIZE) == 0) { + vaddr = (void *)kmem_alloc_attr(dmat->maxsize, mflags, 0, + dmat->lowaddr, memattr); } else { - /* - * XXX Use Contigmalloc until it is merged into this facility - * and handles multi-seg allocations. Nobody is doing - * multi-seg allocations yet though. - * XXX Certain AGP hardware does. - */ - *vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0ul, - dmat->lowaddr, dmat->alignment ? dmat->alignment : 1ul, - dmat->boundary, attr); - (*mapp)->contigalloc = 1; + vaddr = (void *)kmem_alloc_contig(dmat->maxsize, mflags, 0, + dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } - if (*vaddr == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { - printf("bus_dmamem_alloc failed to align memory properly.\n"); + if (vaddr == NULL) { + _busdma_free_dmamap(newmap); + newmap = NULL; + } else { + newmap->sync_count = 0; } - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, 0); - return (0); + *vaddrp = vaddr; + *mapp = newmap; + + return (vaddr == NULL ? ENOMEM : 0); } /* - * Free a piece of memory and it's allociated dmamap, that was allocated + * Free a piece of memory and it's allocated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { + struct busdma_bufzone *bufzone; + busdma_bufalloc_t ba; + + if (map->flags & DMAMAP_UNCACHEABLE) + ba = coherent_allocator; + else + ba = standard_allocator; + + free(map->slist, M_BUSDMA); + uma_zfree(dmamap_zone, map); - if (!map->contigalloc) - free(vaddr, M_DEVBUF); + bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); + + if (bufzone != NULL && dmat->alignment <= bufzone->size && + !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) + uma_zfree(bufzone->umazone, vaddr); else kmem_free((vm_offset_t)vaddr, dmat->maxsize); - bus_dmamap_destroy(dmat, map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } @@ -595,10 +781,10 @@ bus_size_t sgsize; if (map->pagesneeded == 0) { - CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " - "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), - dmat->boundary, dmat->alignment); - CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); + CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", + dmat->lowaddr, dmat->boundary, dmat->alignment); + CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", + map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer @@ -607,8 +793,7 @@ while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { - sgsize = MIN(sgsize, - PAGE_SIZE - (curaddr & PAGE_MASK)); + sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; @@ -622,15 +807,15 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { - vm_offset_t vaddr; - vm_offset_t vendaddr; - bus_addr_t paddr; + vm_offset_t vaddr; + vm_offset_t vendaddr; + bus_addr_t paddr; if (map->pagesneeded == 0) { - CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " - "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), - dmat->boundary, dmat->alignment); - CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); + CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", + dmat->lowaddr, dmat->boundary, dmat->alignment); + CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", + map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer @@ -641,12 +826,11 @@ while (vaddr < vendaddr) { bus_size_t sg_len; + KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - if (pmap == kernel_pmap) - paddr = pmap_kextract(vaddr); - else - paddr = pmap_extract(pmap, vaddr); - if (run_filter(dmat, paddr) != 0) { + paddr = pmap_kextract(vaddr); + if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && + run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } @@ -657,7 +841,7 @@ } static int -_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) +_bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags) { /* Reserve Necessary Bounce Pages */ @@ -686,42 +870,32 @@ */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, - bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) + bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->boundary - 1); - if (dmat->boundary > 0) { - baddr = (curaddr + dmat->boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary)) + sgsize = roundup2(curaddr, dmat->boundary) - curaddr; /* * Insert chunk into a segment, coalescing with - * previous segment if possible. + * the previous segment if possible. */ seg = *segp; - if (seg == -1) { - seg = 0; + if (seg >= 0 && + curaddr == segs[seg].ds_addr + segs[seg].ds_len && + (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->boundary)) + segs[seg].ds_len += sgsize; + } else { + if (++seg >= dmat->nsegments) + return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; - } else { - if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && - (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && - (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) - segs[seg].ds_len += sgsize; - else { - if (++seg >= dmat->nsegments) - return (0); - segs[seg].ds_addr = curaddr; - segs[seg].ds_len = sgsize; - } } *segp = seg; return (sgsize); @@ -732,19 +906,16 @@ * the starting segment on entrace, and the ending segment on exit. */ int -_bus_dmamap_load_phys(bus_dma_tag_t dmat, - bus_dmamap_t map, - vm_paddr_t buf, bus_size_t buflen, - int flags, - bus_dma_segment_t *segs, - int *segp) +_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, + int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) - segs = map->segments; + segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); @@ -758,8 +929,9 @@ while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); - if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { - sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); + if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && run_filter(dmat, curaddr)) { + sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } @@ -774,7 +946,11 @@ /* * Did we fit? */ - return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ + if (buflen != 0) { + bus_dmamap_unload(dmat, map); + return (EFBIG); /* XXX better return value here? */ + } + return (0); } int @@ -790,23 +966,23 @@ /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. + * first indicates if this is the first invocation of this function. */ int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, - bus_dmamap_t map, - void *buf, bus_size_t buflen, - pmap_t pmap, - int flags, - bus_dma_segment_t *segs, - int *segp) +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, + int *segp) { bus_size_t sgsize; bus_addr_t curaddr; - vm_offset_t kvaddr, vaddr; - int error; + struct sync_list *sl; + vm_offset_t vaddr = (vm_offset_t)buf; + int error = 0; if (segs == NULL) - segs = map->segments; + segs = dmat->segments; + if ((flags & BUS_DMA_LOAD_MBUF) != 0) + map->flags |= DMAMAP_CACHE_ALIGNED; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); @@ -816,37 +992,45 @@ return (error); } } - - vaddr = (vm_offset_t)buf; + CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " + "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); while (buflen > 0) { - bus_size_t max_sgsize; - /* * Get the physical address for this segment. + * + * XXX Don't support checking for coherent mappings + * XXX in user address space. */ - if (pmap == kernel_pmap) { - curaddr = pmap_kextract(vaddr); - kvaddr = vaddr; - } else { - curaddr = pmap_extract(pmap, vaddr); - kvaddr = 0; - } + KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); + curaddr = pmap_kextract(vaddr); /* * Compute the segment size, and adjust counts. */ - max_sgsize = MIN(buflen, dmat->maxsegsz); - sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); - if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { - sgsize = roundup2(sgsize, dmat->alignment); - sgsize = MIN(sgsize, max_sgsize); - curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, + sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); + if (sgsize > dmat->maxsegsz) + sgsize = dmat->maxsegsz; + if (buflen < sgsize) + sgsize = buflen; + + if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && run_filter(dmat, curaddr)) { + curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { - sgsize = MIN(sgsize, max_sgsize); + sl = &map->slist[map->sync_count - 1]; + if (map->sync_count == 0 || + vaddr != sl->vaddr + sl->datacount) { + if (++map->sync_count > dmat->nsegments) + goto cleanup; + sl++; + sl->vaddr = vaddr; + sl->datacount = sgsize; + sl->busaddr = curaddr; + } else + sl->datacount += sgsize; } - sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) @@ -855,44 +1039,36 @@ buflen -= sgsize; } +cleanup: /* * Did we fit? */ - return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ + if (buflen != 0) { + bus_dmamap_unload(dmat, map); + error = EFBIG; /* XXX better return value here? */ + } + return (error); } void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, - struct memdesc *mem, bus_dmamap_callback_t *callback, - void *callback_arg) + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { - if (dmat->flags & BUS_DMA_COULD_BOUNCE) { - map->dmat = dmat; - map->mem = *mem; - map->callback = callback; - map->callback_arg = callback_arg; - } + KASSERT(dmat != NULL, ("dmatag is NULL")); + KASSERT(map != NULL, ("dmamap is NULL")); + map->mem = *mem; + map->callback = callback; + map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, - bus_dma_segment_t *segs, int nsegs, int error) + bus_dma_segment_t *segs, int nsegs, int error) { - map->nsegs = nsegs; - if (segs != NULL) - memcpy(map->segments, segs, map->nsegs*sizeof(segs[0])); - if (dmat->iommu != NULL) - IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, - dmat->lowaddr, dmat->highaddr, dmat->alignment, - dmat->boundary, dmat->iommu_cookie); - - if (segs != NULL) - memcpy(segs, map->segments, map->nsegs*sizeof(segs[0])); - else - segs = map->segments; - + if (segs == NULL) + segs = dmat->segments; return (segs); } @@ -904,76 +1080,201 @@ { struct bounce_page *bpage; - if (dmat->iommu) { - IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie); - map->nsegs = 0; - } - while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } + map->sync_count = 0; + return; } -void -bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +static void +bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int aligned) { - struct bounce_page *bpage; - vm_offset_t datavaddr, tempvaddr; + char tmp_cl[mips_dcache_max_linesize], tmp_clend[mips_dcache_max_linesize]; + vm_offset_t buf_cl, buf_clend; + vm_size_t size_cl, size_clend; + int cache_linesize_mask = mips_dcache_max_linesize - 1; + + /* + * dcache invalidation operates on cache line aligned addresses + * and could modify areas of memory that share the same cache line + * at the beginning and the ending of the buffer. In order to + * prevent a data loss we save these chunks in temporary buffer + * before invalidation and restore them afer it. + * + * If the aligned flag is set the buffer is either an mbuf or came from + * our allocator caches. In both cases they are always sized and + * aligned to cacheline boundaries, so we can skip preserving nearby + * data if a transfer appears to overlap cachelines. An mbuf in + * particular will usually appear to be overlapped because of offsetting + * within the buffer to align the L3 headers, but we know that the bytes + * preceeding that offset are part of the same mbuf memory and are not + * unrelated adjacent data (and a rule of mbuf handling is that the cpu + * is not allowed to touch the mbuf while dma is in progress, including + * header fields). + */ + if (aligned) { + size_cl = 0; + size_clend = 0; + } else { + buf_cl = buf & ~cache_linesize_mask; + size_cl = buf & cache_linesize_mask; + buf_clend = buf + len; + size_clend = (mips_dcache_max_linesize - + (buf_clend & cache_linesize_mask)) & cache_linesize_mask; + } + + switch (op) { + case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: + case BUS_DMASYNC_POSTREAD: - if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { /* - * Handle data bouncing. We might also - * want to add support for invalidating - * the caches on broken hardware + * Save buffers that might be modified by invalidation */ - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " - "performing bounce", __func__, dmat, dmat->flags, op); + if (size_cl) + memcpy (tmp_cl, (void*)buf_cl, size_cl); + if (size_clend) + memcpy (tmp_clend, (void*)buf_clend, size_clend); + mips_dcache_inv_range(buf, len); + /* + * Restore them + */ + if (size_cl) + memcpy ((void*)buf_cl, tmp_cl, size_cl); + if (size_clend) + memcpy ((void*)buf_clend, tmp_clend, size_clend); + /* + * Copies above have brought corresponding memory + * cache lines back into dirty state. Write them back + * out and invalidate affected cache lines again if + * necessary. + */ + if (size_cl) + mips_dcache_wbinv_range(buf_cl, size_cl); + if (size_clend && (size_cl == 0 || + buf_clend - buf_cl > mips_dcache_max_linesize)) + mips_dcache_wbinv_range(buf_clend, size_clend); + break; + + case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE: + mips_dcache_wbinv_range(buf, len); + break; + + case BUS_DMASYNC_PREREAD: + /* + * Save buffers that might be modified by invalidation + */ + if (size_cl) + memcpy (tmp_cl, (void *)buf_cl, size_cl); + if (size_clend) + memcpy (tmp_clend, (void *)buf_clend, size_clend); + mips_dcache_inv_range(buf, len); + /* + * Restore them + */ + if (size_cl) + memcpy ((void *)buf_cl, tmp_cl, size_cl); + if (size_clend) + memcpy ((void *)buf_clend, tmp_clend, size_clend); + /* + * Copies above have brought corresponding memory + * cache lines back into dirty state. Write them back + * out and invalidate affected cache lines again if + * necessary. + */ + if (size_cl) + mips_dcache_wbinv_range(buf_cl, size_cl); + if (size_clend && (size_cl == 0 || + buf_clend - buf_cl > mips_dcache_max_linesize)) + mips_dcache_wbinv_range(buf_clend, size_clend); + break; + case BUS_DMASYNC_PREWRITE: +#ifdef BUS_DMA_FORCE_WBINV + mips_dcache_wbinv_range(buf, len); +#else + mips_dcache_wb_range(buf, len); +#endif + break; + } +} + +static void +_bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + struct bounce_page *bpage; + + STAILQ_FOREACH(bpage, &map->bpages, links) { if (op & BUS_DMASYNC_PREWRITE) { - while (bpage != NULL) { - tempvaddr = 0; - datavaddr = bpage->datavaddr; - if (datavaddr == 0) { - tempvaddr = pmap_quick_enter_page( - bpage->datapage); - datavaddr = tempvaddr | - bpage->dataoffs; - } - - bcopy((void *)datavaddr, - (void *)bpage->vaddr, bpage->datacount); - - if (tempvaddr != 0) - pmap_quick_remove_page(tempvaddr); - bpage = STAILQ_NEXT(bpage, links); + if (bpage->datavaddr != 0) + bcopy((void *)bpage->datavaddr, + (void *)(bpage->vaddr_nocache != 0 ? + bpage->vaddr_nocache : + bpage->vaddr), + bpage->datacount); + else + physcopyout(bpage->dataaddr, + (void *)(bpage->vaddr_nocache != 0 ? + bpage->vaddr_nocache : + bpage->vaddr), + bpage->datacount); + if (bpage->vaddr_nocache == 0) { +#ifdef BUS_DMA_FORCE_WBINV + mips_dcache_wbinv_range(bpage->vaddr, + bpage->datacount); +#else + mips_dcache_wb_range(bpage->vaddr, + bpage->datacount); +#endif } dmat->bounce_zone->total_bounced++; } - if (op & BUS_DMASYNC_POSTREAD) { - while (bpage != NULL) { - tempvaddr = 0; - datavaddr = bpage->datavaddr; - if (datavaddr == 0) { - tempvaddr = pmap_quick_enter_page( - bpage->datapage); - datavaddr = tempvaddr | - bpage->dataoffs; - } - - bcopy((void *)bpage->vaddr, - (void *)datavaddr, bpage->datacount); - - if (tempvaddr != 0) - pmap_quick_remove_page(tempvaddr); - bpage = STAILQ_NEXT(bpage, links); + if (bpage->vaddr_nocache == 0) { + mips_dcache_inv_range(bpage->vaddr, + bpage->datacount); } + if (bpage->datavaddr != 0) + bcopy((void *)(bpage->vaddr_nocache != 0 ? + bpage->vaddr_nocache : bpage->vaddr), + (void *)bpage->datavaddr, bpage->datacount); + else + physcopyin((void *)(bpage->vaddr_nocache != 0 ? + bpage->vaddr_nocache : bpage->vaddr), + bpage->dataaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } } +} - powerpc_sync(); +void +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + struct sync_list *sl, *end; + int aligned; + + if (op == BUS_DMASYNC_POSTWRITE) + return; + if (STAILQ_FIRST(&map->bpages)) + _bus_dmamap_sync_bp(dmat, map, op); + + if ((dmat->flags & BUS_DMA_COHERENT) || + (map->flags & DMAMAP_UNCACHEABLE)) { + if (op & BUS_DMASYNC_PREWRITE) + mips_sync(); + return; + } + + aligned = (map->flags & DMAMAP_CACHE_ALIGNED) ? 1 : 0; + + CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); + if (map->sync_count) { + end = &map->slist[map->sync_count]; + for (sl = &map->slist[0]; sl != end; sl++) + bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, + aligned); + } } static void @@ -1014,7 +1315,7 @@ } } - if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, + if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); @@ -1085,21 +1386,23 @@ while (numpages > 0) { struct bounce_page *bpage; - bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, + bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; - bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, + bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { - free(bpage, M_DEVBUF); + free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); + bpage->vaddr_nocache = + (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; @@ -1139,6 +1442,7 @@ struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); + KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) @@ -1165,8 +1469,7 @@ bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; - bpage->datapage = PHYS_TO_VM_PAGE(addr); - bpage->dataoffs = addr & PAGE_MASK; + bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); @@ -1223,9 +1526,8 @@ mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); - bus_dmamap_load_mem(map->dmat, map, &map->mem, - map->callback, map->callback_arg, - BUS_DMA_WAITOK); + bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, + map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } @@ -1241,12 +1543,3 @@ } SYSINIT(start_busdma_swi, SI_SUB_SOFTINTR, SI_ORDER_ANY, start_busdma_swi, NULL); - -int -bus_dma_tag_set_iommu(bus_dma_tag_t tag, device_t iommu, void *cookie) -{ - tag->iommu = iommu; - tag->iommu_cookie = cookie; - - return (0); -} diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/powerpc/powerpc/busdma_machdep.c @@ -172,7 +172,7 @@ paddr > dmat->lowaddr && paddr <= dmat->highaddr) retval = 1; if (dmat->filter == NULL && - (paddr & (dmat->alignment - 1)) != 0) + vm_addr_align_ok(paddr, dmat->alignment)) retval = 1; if (dmat->filter != NULL && (*dmat->filter)(dmat->filterarg, paddr) != 0) @@ -563,7 +563,7 @@ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { + } else if (!vm_addr_align_ok(vtophys(*vaddr), dmat->alignment)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", @@ -688,18 +688,13 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->boundary - 1); - if (dmat->boundary > 0) { - baddr = (curaddr + dmat->boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->boundary)) + sgsize = roundup2(curaddr, dmat->boundary) - curaddr; /* * Insert chunk into a segment, coalescing with @@ -713,8 +708,8 @@ } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && - (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->boundary)) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) diff --git a/sys/riscv/riscv/busdma_bounce.c b/sys/riscv/riscv/busdma_bounce.c --- a/sys/riscv/riscv/busdma_bounce.c +++ b/sys/riscv/riscv/busdma_bounce.c @@ -504,7 +504,7 @@ __func__, dmat, dmat->common.flags, ENOMEM); free(*mapp, M_DEVBUF); return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { + } else if (!vm_addr_align_ok(vtophys(*vaddr), dmat->common.alignment)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } dmat->map_count++; @@ -636,18 +636,13 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->common.boundary - 1); - if (dmat->common.boundary > 0) { - baddr = (curaddr + dmat->common.boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->common.boundary)) + sgsize = roundup2(curaddr, dmat->common.boundary) - curaddr; /* * Insert chunk into a segment, coalescing with @@ -661,8 +656,8 @@ } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && - (dmat->common.boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->common.boundary)) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) diff --git a/sys/riscv/riscv/busdma_machdep.c b/sys/riscv/riscv/busdma_machdep.c --- a/sys/riscv/riscv/busdma_machdep.c +++ b/sys/riscv/riscv/busdma_machdep.c @@ -102,7 +102,7 @@ retval = 0; do { if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || - ((paddr & (tc->alignment - 1)) != 0)) && + !vm_addr_align_ok(paddr, tc->alignment) && (tc->filter == NULL || (*tc->filter)(tc->filterarg, paddr) != 0)) retval = 1; diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -133,5 +133,32 @@ u_int vm_inactive_count(void); u_int vm_laundry_count(void); u_int vm_wait_count(void); + +/* + * Is pa a multiple of alignment, which is a power-of-two? + */ +static inline bool +vm_addr_align_ok(vm_paddr_t pa, u_long alignment) +{ + return ((pa & (alignment - 1)) == 0); +} + +/* + * Do the first and last addresses of a range match in all bits except the ones + * in -boundary (a power-of-two)? For boundary == 0, all addresses match. + */ +static inline bool +vm_addr_bound_ok(vm_paddr_t pa, vm_paddr_t size, vm_paddr_t boundary) +{ + return (((pa ^ (pa + size - 1)) & -boundary) == 0); +} + +static inline bool +vm_addr_ok(vm_paddr_t pa, vm_paddr_t size, u_long alignment, + vm_paddr_t boundary) +{ + return (vm_addr_align_ok(pa, alignment) && + vm_addr_bound_ok(pa, size, boundary)); +} #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2032,10 +2032,8 @@ */ if (alignment == 0) pmap_align_superpage(object, offset, addr, length); - else if ((*addr & (alignment - 1)) != 0) { - *addr &= ~(alignment - 1); - *addr += alignment; - } + else + *addr = roundup2(*addr, alignment); aligned_addr = *addr; if (aligned_addr == free_addr) { /* diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2656,12 +2656,11 @@ if (m + npages > m_end) break; pa = VM_PAGE_TO_PHYS(m); - if ((pa & (alignment - 1)) != 0) { + if (!vm_addr_align_ok(pa, alignment)) { m_inc = atop(roundup2(pa, alignment) - pa); continue; } - if (rounddown2(pa ^ (pa + ptoa(npages) - 1), - boundary) != 0) { + if (!vm_addr_bound_ok(pa, ptoa(npages), boundary)) { m_inc = atop(roundup2(pa, boundary) - pa); continue; } diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -179,6 +179,7 @@ static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order, int tail); + /* * Red-black tree helpers for vm fictitious range management. */ @@ -1465,8 +1466,7 @@ pa = VM_PAGE_TO_PHYS(m_ret); pa_end = pa + size; if (pa >= low && pa_end <= high && - (pa & (alignment - 1)) == 0 && - rounddown2(pa ^ (pa_end - 1), boundary) == 0) + vm_addr_ok(pa, size, alignment, boundary)) goto done; } } diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -656,10 +656,8 @@ * possible size satisfy the alignment and boundary requirements? */ pa = VM_RESERV_INDEX(object, pindex) << PAGE_SHIFT; - if ((pa & (alignment - 1)) != 0) - return (NULL); size = npages << PAGE_SHIFT; - if (((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + if (!vm_addr_ok(pa, size, alignment, boundary)) return (NULL); /* @@ -682,8 +680,7 @@ m = &rv->pages[index]; pa = VM_PAGE_TO_PHYS(m); if (pa < low || pa + size > high || - (pa & (alignment - 1)) != 0 || - ((pa ^ (pa + size - 1)) & ~(boundary - 1)) != 0) + !vm_addr_ok(pa, size, alignment, boundary)) goto out; /* Handle vm_page_rename(m, new_object, ...). */ for (i = 0; i < npages; i++) @@ -1333,7 +1330,7 @@ * doesn't include a boundary-multiple within it. Otherwise, * no boundary-constrained allocation is possible. */ - if (size > boundary && boundary > 0) + if (!vm_addr_bound_ok(0, size, boundary)) return (NULL); marker = &vm_rvd[domain].marker; queue = &vm_rvd[domain].partpop; @@ -1360,7 +1357,7 @@ /* This entire reservation is too high; go to next. */ continue; } - if ((pa & (alignment - 1)) != 0) { + if (!vm_addr_align_ok(pa, alignment)) { /* This entire reservation is unaligned; go to next. */ continue; } @@ -1397,12 +1394,10 @@ vm_reserv_unlock(rv); m_ret = &rv->pages[posn]; pa = VM_PAGE_TO_PHYS(m_ret); - KASSERT((pa & (alignment - 1)) == 0, - ("%s: adjusted address does not align to %lx", - __func__, alignment)); - KASSERT(((pa ^ (pa + size - 1)) & -boundary) == 0, - ("%s: adjusted address spans boundary to %jx", - __func__, (uintmax_t)boundary)); + KASSERT(vm_addr_ok(pa, size, alignment, boundary), + ("%s: adjusted address not aligned/bounded to " + "%lx/%jx", + __func__, alignment, (uintmax_t)boundary)); return (m_ret); } vm_reserv_domain_lock(domain); diff --git a/sys/x86/x86/busdma_bounce.c b/sys/x86/x86/busdma_bounce.c --- a/sys/x86/x86/busdma_bounce.c +++ b/sys/x86/x86/busdma_bounce.c @@ -501,7 +501,7 @@ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { + } else if (!vm_addr_align_ok(vtophys(*vaddr), dmat->common.alignment)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", @@ -644,8 +644,9 @@ sg_len = roundup2(sg_len, dmat->common.alignment); sg_len = MIN(sg_len, max_sgsize); - KASSERT((sg_len & (dmat->common.alignment - 1)) - == 0, ("Segment size is not aligned")); + KASSERT(vm_addr_align_ok(sg_len, + dmat->common.alignment), + ("Segment size is not aligned")); map->pagesneeded++; } if (((ma_offs + sg_len) & ~PAGE_MASK) != 0) @@ -690,7 +691,6 @@ _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { - bus_addr_t baddr, bmask; int seg; KASSERT(curaddr <= BUS_SPACE_MAXADDR, @@ -703,12 +703,8 @@ /* * Make sure we don't cross any boundaries. */ - bmask = ~(dmat->common.boundary - 1); - if (dmat->common.boundary > 0) { - baddr = (curaddr + dmat->common.boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } + if (!vm_addr_bound_ok(curaddr, sgsize, dmat->common.boundary)) + sgsize = roundup2(curaddr, dmat->common.boundary) - curaddr; /* * Insert chunk into a segment, coalescing with @@ -722,8 +718,8 @@ } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && - (dmat->common.boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) + vm_addr_bound_ok(segs[seg].ds_addr, segs[seg].ds_len, + dmat->common.boundary)) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) @@ -907,7 +903,8 @@ bus_dma_run_filter(&dmat->common, paddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); - KASSERT((sgsize & (dmat->common.alignment - 1)) == 0, + KASSERT(vm_addr_align_ok(sgsize, + dmat->common.alignment), ("Segment size is not aligned")); /* * Check if two pages of the user provided buffer diff --git a/sys/x86/x86/busdma_machdep.c b/sys/x86/x86/busdma_machdep.c --- a/sys/x86/x86/busdma_machdep.c +++ b/sys/x86/x86/busdma_machdep.c @@ -107,7 +107,7 @@ do { if ((paddr >= BUS_SPACE_MAXADDR || (paddr > tc->lowaddr && paddr <= tc->highaddr) || - (paddr & (tc->alignment - 1)) != 0) && + !vm_addr_align_ok(paddr, tc->alignment) && (tc->filter == NULL || (*tc->filter)(tc->filterarg, paddr) != 0)) retval = 1;