Index: head/sys/amd64/include/bus_dma.h =================================================================== --- head/sys/amd64/include/bus_dma.h (revision 320527) +++ head/sys/amd64/include/bus_dma.h (revision 320528) @@ -1,34 +1,34 @@ /*- * Copyright (c) 2005 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _AMD64_BUS_DMA_H_ #define _AMD64_BUS_DMA_H_ -#include +#include #endif /* _AMD64_BUS_DMA_H_ */ Index: head/sys/arm/arm/busdma_machdep-v4.c =================================================================== --- head/sys/arm/arm/busdma_machdep-v4.c (revision 320527) +++ head/sys/arm/arm/busdma_machdep-v4.c (revision 320528) @@ -1,1611 +1,1611 @@ /*- * Copyright (c) 2012 Ian Lepore * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * ARM bus dma support routines. * * XXX Things to investigate / fix some day... * - What is the earliest that this API can be called? Could there be any * fallout from changing the SYSINIT() order from SI_SUB_VM to SI_SUB_KMEM? * - The manpage mentions the BUS_DMA_NOWAIT flag only in the context of the * bus_dmamap_load() function. This code has historically (and still does) * honor it in bus_dmamem_alloc(). If we got rid of that we could lose some * error checking because some resource management calls would become WAITOK * and thus "cannot fail." * - The decisions made by _bus_dma_can_bounce() should be made once, at tag * creation time, and the result stored in the tag. * - It should be possible to take some shortcuts when mapping a buffer we know * came from the uma(9) allocators based on what we know about such buffers * (aligned, contiguous, etc). * - The allocation of bounce pages could probably be cleaned up, then we could * retire arm_remap_nocache(). */ #define _ARM32_BUS_DMA_PRIVATE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define MAX_DMA_SEGMENTS 4096 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; /* * DMA range for this tag. If the page doesn't fall within * one of these ranges, an error is returned. The caller * may then decide what to do with the transfer. If the * range pointer is NULL, it is ignored. */ struct arm32_dma_range *ranges; int _nranges; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of client data */ vm_page_t pages; /* starting page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static uint32_t tags_total; static uint32_t maps_total; static uint32_t maps_dmamem; static uint32_t maps_coherent; static counter_u64_t maploads_total; static counter_u64_t maploads_bounced; static counter_u64_t maploads_coherent; static counter_u64_t maploads_dmamem; static counter_u64_t maploads_mbuf; static counter_u64_t maploads_physmem; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, "Number of active tags"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, "Number of active maps"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, "Number of active maps for bus_dmamem_alloc buffers"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, "Number of active maps with BUS_DMA_COHERENT flag set"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, &maploads_total, "Number of load operations performed"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, &maploads_bounced, "Number of load operations that used bounce buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, &maploads_mbuf, "Number of load operations for mbufs"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, &maploads_physmem, "Number of load operations on physical buffers"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; int flags; #define DMAMAP_COHERENT (1 << 0) #define DMAMAP_DMAMEM_ALLOC (1 << 1) #define DMAMAP_MBUF (1 << 2) #define DMAMAP_CACHE_ALIGNED (1 << 3) STAILQ_ENTRY(bus_dmamap) links; bus_dma_segment_t *segments; int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static void bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op, int bufaligned); /* * ---------------------------------------------------------------------------- * Begin block of code useful to transplant to other implementations. */ static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); static void busdma_init(void *dummy) { maploads_total = counter_u64_alloc(M_WAITOK); maploads_bounced = counter_u64_alloc(M_WAITOK); maploads_coherent = counter_u64_alloc(M_WAITOK); maploads_dmamem = counter_u64_alloc(M_WAITOK); maploads_mbuf = counter_u64_alloc(M_WAITOK); maploads_physmem = counter_u64_alloc(M_WAITOK); /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", arm_dcache_align, /* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ 0); /* uma_zcreate_flags */ /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. */ coherent_allocator = busdma_bufalloc_create("coherent", arm_dcache_align, /* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, 0); /* uma_zcreate_flags */ } /* * This init historically used SI_SUB_VM, but now the init code requires * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by * using SI_SUB_KMEM+1. */ SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); /* * End block of code useful to transplant to other implementations. * ---------------------------------------------------------------------------- */ /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * This routine checks the exclusion zone constraints from a tag against the * physical RAM available on the machine. If a tag specifies an exclusion zone * but there's no RAM in that zone, then we avoid allocating resources to bounce * a request, and we can use any memory allocator (as opposed to needing * kmem_alloc_contig() just because it can allocate pages in an address range). * * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the * same value on 32-bit architectures) as their lowaddr constraint, and we can't * possibly have RAM at an address higher than the highest address we can * express, so we take a fast out. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; if (lowaddr >= BUS_SPACE_MAXADDR) return (0); for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } static __inline struct arm32_dma_range * _bus_dma_inrange(struct arm32_dma_range *ranges, int nranges, bus_addr_t curaddr) { struct arm32_dma_range *dr; int i; for (i = 0, dr = ranges; i < nranges; i++, dr++) { if (curaddr >= dr->dr_sysbase && round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len)) return (dr); } return (NULL); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment ? alignment : 1; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; newtag->ranges = bus_dma_get_range(); newtag->_nranges = bus_dma_get_range_nb(); if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) { free(newtag, M_BUSDMA); } else { atomic_add_32(&tags_total, 1); *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { atomic_subtract_32(&tags_total, 1); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t map) { int error; /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&(map->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) return (ENOMEM); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } bz->map_count++; } return (0); } static bus_dmamap_t allocate_map(bus_dma_tag_t dmat, int mflags) { int mapsize, segsize; bus_dmamap_t map; /* * Allocate the map. The map structure ends with an embedded * variable-sized array of sync_list structures. Following that * we allocate enough extra space to hold the array of bus_dma_segments. */ KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, ("cannot allocate %u dma segments (max is %u)", dmat->nsegments, MAX_DMA_SEGMENTS)); segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (NULL); } map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t map; int error = 0; *mapp = map = allocate_map(dmat, M_NOWAIT); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an exclusion * region, a data alignment that is stricter than 1, or DMA that begins * or ends with a partial cacheline. Whether bouncing will actually * happen can't be known until mapping time, but we need to pre-allocate * resources now because we might not be allowed to at mapping time. */ error = allocate_bz_and_pages(dmat, map); if (error != 0) { free(map, M_BUSDMA); *mapp = NULL; return (error); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); free(map, M_BUSDMA); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into bus device * space based on the constraints listed in the dma tag. Returns a pointer to * the allocated memory, and a pointer to an associated bus_dmamap. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, bus_dmamap_t *mapp) { busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; bus_dmamap_t map; vm_memattr_t memattr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; *mapp = map = allocate_map(dmat, mflags); if (map == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } map->flags = DMAMAP_DMAMEM_ALLOC; /* Choose a busdma buffer allocator based on memory type flags. */ if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; map->flags |= DMAMAP_COHERENT; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed nsegments. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { *vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { *vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); free(map, M_BUSDMA); *mapp = NULL; return (ENOMEM); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_dmamem, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory that was allocated via bus_dmamem_alloc, along with * its associated map. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if (map->flags & DMAMAP_COHERENT) ba = coherent_allocator; else ba = standard_allocator; bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); atomic_subtract_32(&maps_dmamem, 1); free(map, M_BUSDMA); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = trunc_page((vm_offset_t)buf); vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) map->pagesneeded++; vaddr += PAGE_SIZE; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } if (dmat->ranges) { struct arm32_dma_range *dr; dr = _bus_dma_inrange(dmat->ranges, dmat->_nranges, curaddr); if (dr == NULL) return (0); /* * In a valid DMA range. Translate the physical * memory address to an address in the DMA window. */ curaddr = (curaddr - dr->dr_sysbase) + dr->dr_busbase; } seg = *segp; /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_addr_t sl_end = 0; bus_size_t sgsize; struct sync_list *sl; int error; if (segs == NULL) segs = map->segments; counter_u64_add(maploads_total, 1); counter_u64_add(maploads_physmem, 1); if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else { if (map->sync_count > 0) sl_end = VM_PAGE_TO_PHYS(sl->pages) + sl->dataoffs + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->nsegments) break; sl++; sl->vaddr = 0; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); sl->dataoffs = curaddr & PAGE_MASK; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; bus_addr_t sl_pend = 0; struct sync_list *sl; vm_offset_t kvaddr; vm_offset_t vaddr = (vm_offset_t)buf; vm_offset_t sl_vend = 0; int error = 0; counter_u64_add(maploads_total, 1); if (map->flags & DMAMAP_COHERENT) counter_u64_add(maploads_coherent, 1); if (map->flags & DMAMAP_DMAMEM_ALLOC) counter_u64_add(maploads_dmamem, 1); if (segs == NULL) segs = map->segments; if (flags & BUS_DMA_LOAD_MBUF) { counter_u64_add(maploads_mbuf, 1); map->flags |= DMAMAP_CACHE_ALIGNED; } if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); sl = map->slist + map->sync_count - 1; while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); map->flags &= ~DMAMAP_COHERENT; kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else { if (map->sync_count > 0) { sl_pend = VM_PAGE_TO_PHYS(sl->pages) + sl->dataoffs + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (kvaddr == 0 && curaddr != sl_pend)) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); sl->dataoffs = curaddr & PAGE_MASK; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; struct bounce_zone *bz; if ((bz = dmat->bounce_zone) != NULL) { while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } bz = dmat->bounce_zone; bz->free_bpages += map->pagesreserved; bz->reserved_bpages -= map->pagesreserved; map->pagesreserved = 0; map->pagesneeded = 0; } map->sync_count = 0; map->flags &= ~DMAMAP_MBUF; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int bufaligned) { char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align]; register_t s; int partial; if ((op & BUS_DMASYNC_PREWRITE) && !(op & BUS_DMASYNC_PREREAD)) { cpu_dcache_wb_range(buf, len); cpu_l2cache_wb_range(buf, len); } /* * If the caller promises the buffer is properly aligned to a cache line * (even if the call parms make it look like it isn't) we can avoid * attempting to preserve the non-DMA part of the cache line in the * POSTREAD case, but we MUST still do a writeback in the PREREAD case. * * This covers the case of mbufs, where we know how they're aligned and * know the CPU doesn't touch the header in front of the DMA data area * during the IO, but it may have touched it right before invoking the * sync, so a PREREAD writeback is required. * * It also handles buffers we created in bus_dmamem_alloc(), which are * always aligned and padded to cache line size even if the IO length * isn't a multiple of cache line size. In this case the PREREAD * writeback probably isn't required, but it's harmless. */ partial = (((vm_offset_t)buf) | len) & arm_dcache_align_mask; if (op & BUS_DMASYNC_PREREAD) { if (!(op & BUS_DMASYNC_PREWRITE) && !partial) { cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); } else { cpu_dcache_wbinv_range(buf, len); cpu_l2cache_wbinv_range(buf, len); } } if (op & BUS_DMASYNC_POSTREAD) { if (partial && !bufaligned) { s = intr_disable(); if (buf & arm_dcache_align_mask) memcpy(_tmp_cl, (void *)(buf & ~arm_dcache_align_mask), buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy(_tmp_clend, (void *)(buf + len), arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); } cpu_dcache_inv_range(buf, len); cpu_l2cache_inv_range(buf, len); if (partial && !bufaligned) { if (buf & arm_dcache_align_mask) memcpy((void *)(buf & ~arm_dcache_align_mask), _tmp_cl, buf & arm_dcache_align_mask); if ((buf + len) & arm_dcache_align_mask) memcpy((void *)(buf + len), _tmp_clend, arm_dcache_align - ((buf + len) & arm_dcache_align_mask)); intr_restore(s); } } } static void bus_dmamap_sync_sl(struct sync_list *sl, bus_dmasync_op_t op, int bufaligned) { vm_offset_t tempvaddr; vm_page_t curpage; size_t npages; if (sl->vaddr != 0) { bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, bufaligned); return; } tempvaddr = 0; npages = atop(round_page(sl->dataoffs + sl->datacount)); for (curpage = sl->pages; curpage != sl->pages + npages; ++curpage) { /* * If the page is mapped to some other VA that hasn't * been supplied to busdma, then pmap_quick_enter_page() * will find all duplicate mappings and mark them * uncacheable. * That will also do any necessary wb/inv. Otherwise, * if the page is truly unmapped, then we don't actually * need to do cache maintenance. * XXX: May overwrite DMA'ed data in the POSTREAD * case where the CPU has written to a cacheline not * completely covered by the DMA region. */ KASSERT(VM_PAGE_TO_PHYS(curpage) == VM_PAGE_TO_PHYS(sl->pages) + ptoa(curpage - sl->pages), ("unexpected vm_page_t phys: 0x%08x != 0x%08x", VM_PAGE_TO_PHYS(curpage), VM_PAGE_TO_PHYS(sl->pages) + ptoa(curpage - sl->pages))); tempvaddr = pmap_quick_enter_page(curpage); pmap_quick_remove_page(tempvaddr); } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; if ((op & (BUS_DMASYNC_PREWRITE | BUS_DMASYNC_POSTREAD)) == 0) return; STAILQ_FOREACH(bpage, &map->bpages, links) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (op & BUS_DMASYNC_PREWRITE) { if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); cpu_l2cache_wb_range(bpage->vaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); cpu_l2cache_inv_range(bpage->vaddr, bpage->datacount); if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); dmat->bounce_zone->total_bounced++; } } } void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; int bufaligned; if (op == BUS_DMASYNC_POSTWRITE) return; if (map->flags & DMAMAP_COHERENT) goto drain; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); bufaligned = (map->flags & DMAMAP_CACHE_ALIGNED); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_sl(sl, op, bufaligned); } drain: cpu_drain_writebuf(); } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests (pages bounced)"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/arm/arm/busdma_machdep-v6.c =================================================================== --- head/sys/arm/arm/busdma_machdep-v6.c (revision 320527) +++ head/sys/arm/arm/busdma_machdep-v6.c (revision 320528) @@ -1,1711 +1,1711 @@ /*- * Copyright (c) 2012-2015 Ian Lepore * Copyright (c) 2010 Mark Tinguely * Copyright (c) 2004 Olivier Houchard * Copyright (c) 2002 Peter Grehan * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c 191438 2009-04-23 20:24:19Z jhb */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BUSDMA_DCACHE_ALIGN cpuinfo.dcache_line_size #define BUSDMA_DCACHE_MASK cpuinfo.dcache_line_mask #define MAX_BPAGES 64 #define MAX_DMA_SEGMENTS 4096 #define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 #define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of client data */ bus_addr_t paddr; /* physical address */ vm_page_t pages; /* starting page of client data */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static uint32_t tags_total; static uint32_t maps_total; static uint32_t maps_dmamem; static uint32_t maps_coherent; static counter_u64_t maploads_total; static counter_u64_t maploads_bounced; static counter_u64_t maploads_coherent; static counter_u64_t maploads_dmamem; static counter_u64_t maploads_mbuf; static counter_u64_t maploads_physmem; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, "Number of active tags"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, "Number of active maps"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, "Number of active maps for bus_dmamem_alloc buffers"); SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, "Number of active maps with BUS_DMA_COHERENT flag set"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, &maploads_total, "Number of load operations performed"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, &maploads_bounced, "Number of load operations that used bounce buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on BUS_DMA_COHERENT memory"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, &maploads_dmamem, "Number of load operations on bus_dmamem_alloc buffers"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, &maploads_mbuf, "Number of load operations for mbufs"); SYSCTL_COUNTER_U64(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, &maploads_physmem, "Number of load operations on physical buffers"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; int flags; #define DMAMAP_COHERENT (1 << 0) #define DMAMAP_DMAMEM_ALLOC (1 << 1) #define DMAMAP_MBUF (1 << 2) STAILQ_ENTRY(bus_dmamap) links; bus_dma_segment_t *segments; int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size); static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op); static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); static void busdma_init(void *dummy) { int uma_flags; maploads_total = counter_u64_alloc(M_WAITOK); maploads_bounced = counter_u64_alloc(M_WAITOK); maploads_coherent = counter_u64_alloc(M_WAITOK); maploads_dmamem = counter_u64_alloc(M_WAITOK); maploads_mbuf = counter_u64_alloc(M_WAITOK); maploads_physmem = counter_u64_alloc(M_WAITOK); uma_flags = 0; /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ uma_flags); /* uma_zcreate_flags */ #ifdef INVARIANTS /* * Force UMA zone to allocate service structures like * slabs using own allocator. uma_debug code performs * atomic ops on uma_slab_t fields and safety of this * operation is not guaranteed for write-back caches */ uma_flags = UMA_ZONE_OFFPAGE; #endif /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag. */ coherent_allocator = busdma_bufalloc_create("coherent", BUSDMA_DCACHE_ALIGN,/* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, uma_flags); /* uma_zcreate_flags */ } /* * This init historically used SI_SUB_VM, but now the init code requires * malloc(9) using M_BUSDMA memory and the pcpu zones for counter(9), which get * set up by SI_SUB_KMEM and SI_ORDER_LAST, so we'll go right after that by * using SI_SUB_KMEM+1. */ SYSINIT(busdma, SI_SUB_KMEM+1, SI_ORDER_FIRST, busdma_init, NULL); /* * This routine checks the exclusion zone constraints from a tag against the * physical RAM available on the machine. If a tag specifies an exclusion zone * but there's no RAM in that zone, then we avoid allocating resources to bounce * a request, and we can use any memory allocator (as opposed to needing * kmem_alloc_contig() just because it can allocate pages in an address range). * * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the * same value on 32-bit architectures) as their lowaddr constraint, and we can't * possibly have RAM at an address higher than the highest address we can * express, so we take a fast out. */ static int exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; if (lowaddr >= BUS_SPACE_MAXADDR) return (0); for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) return (1); } return (0); } /* * Return true if the tag has an exclusion zone that could lead to bouncing. */ static __inline int exclusion_bounce(bus_dma_tag_t dmat) { return (dmat->flags & BUS_DMA_EXCL_BOUNCE); } /* * Return true if the given address does not fall on the alignment boundary. */ static __inline int alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) { return (addr & (dmat->alignment - 1)); } /* * Return true if the DMA should bounce because the start or end does not fall * on a cacheline boundary (which would require a partial cacheline flush). * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a * strict rule that such memory cannot be accessed by the CPU while DMA is in * progress (or by multiple DMA engines at once), so that it's always safe to do * full cacheline flushes even if that affects memory outside the range of a * given DMA operation that doesn't involve the full allocated buffer. If we're * mapping an mbuf, that follows the same rules as a buffer we allocated. */ static __inline int cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) return (0); return ((addr | size) & BUSDMA_DCACHE_MASK); } /* * Return true if we might need to bounce the DMA described by addr and size. * * This is used to quick-check whether we need to do the more expensive work of * checking the DMA page-by-page looking for alignment and exclusion bounces. * * Note that the addr argument might be either virtual or physical. It doesn't * matter because we only look at the low-order bits, which are the same in both * address spaces. */ static __inline int might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, bus_size_t size) { return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || alignment_bounce(dmat, addr) || cacheline_bounce(map, addr, size)); } /* * Return true if we must bounce the DMA described by paddr and size. * * Bouncing can be triggered by DMA that doesn't begin and end on cacheline * boundaries, or doesn't begin on an alignment boundary, or falls within the * exclusion zone of any tag in the ancestry chain. * * For exclusions, walk the chain of tags comparing paddr to the exclusion zone * within each tag. If the tag has a filter function, use it to decide whether * the DMA needs to bounce, otherwise any DMA within the zone bounces. */ static int must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, bus_size_t size) { if (cacheline_bounce(map, paddr, size)) return (1); /* * The tag already contains ancestors' alignment restrictions so this * check doesn't need to be inside the loop. */ if (alignment_bounce(dmat, paddr)) return (1); /* * Even though each tag has an exclusion zone that is a superset of its * own and all its ancestors' exclusions, the exclusion zone of each tag * up the chain must be checked within the loop, because the busdma * rules say the filter function is called only when the address lies * within the low-highaddr range of the tag that filterfunc belongs to. */ while (dmat != NULL && exclusion_bounce(dmat)) { if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) && (dmat->filter == NULL || dmat->filter(dmat->filterarg, paddr) != 0)) return (1); dmat = dmat->parent; } return (0); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Basic sanity checking. */ KASSERT(boundary == 0 || powerof2(boundary), ("dma tag boundary %lu, must be a power of 2", boundary)); KASSERT(boundary == 0 || boundary >= maxsegsz, ("dma tag boundary %lu is < maxsegsz %lu\n", boundary, maxsegsz)); KASSERT(alignment != 0 && powerof2(alignment), ("dma tag alignment %lu, must be non-zero power of 2", alignment)); KASSERT(maxsegsz != 0, ("dma tag maxsegsz must not be zero")); /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); newtag->alignment = MAX(parent->alignment, newtag->alignment); newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; newtag->flags |= parent->flags & BUS_DMA_COHERENT; if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit to looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) newtag->flags |= BUS_DMA_EXCL_BOUNCE; if (alignment_bounce(newtag, 1)) newtag->flags |= BUS_DMA_ALIGN_BOUNCE; /* * Any request can auto-bounce due to cacheline alignment, in addition * to any alignment or boundary specifications in the tag, so if the * ALLOCNOW flag is set, there's always work to do. */ if ((flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* * Round size up to a full page, and add one more page because * there can always be one more boundary crossing than the * number of pages in a transfer. */ maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) { free(newtag, M_BUSDMA); } else { atomic_add_32(&tags_total, 1); *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { atomic_subtract_32(&tags_total, 1); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp) { struct bounce_zone *bz; int maxpages; int error; if (dmat->bounce_zone == NULL) if ((error = alloc_bounce_zone(dmat)) != 0) return (error); bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&(mapp->bpages)); /* * Attempt to add pages to our pool on a per-instance basis up to a sane * limit. Even if the tag isn't flagged as COULD_BOUNCE due to * alignment and boundary constraints, it could still auto-bounce due to * cacheline alignment, which requires at most two bounce pages. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) maxpages = MAX_BPAGES; else maxpages = 2 * bz->map_count; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 2); if (alloc_bounce_pages(dmat, pages) < pages) return (ENOMEM); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } bz->map_count++; return (0); } static bus_dmamap_t allocate_map(bus_dma_tag_t dmat, int mflags) { int mapsize, segsize; bus_dmamap_t map; /* * Allocate the map. The map structure ends with an embedded * variable-sized array of sync_list structures. Following that * we allocate enough extra space to hold the array of bus_dma_segments. */ KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, ("cannot allocate %u dma segments (max is %u)", dmat->nsegments, MAX_DMA_SEGMENTS)); segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; map = malloc(mapsize + segsize, M_BUSDMA, mflags | M_ZERO); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (NULL); } map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t map; int error = 0; *mapp = map = allocate_map(dmat, M_NOWAIT); if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an exclusion * region, a data alignment that is stricter than 1, or DMA that begins * or ends with a partial cacheline. Whether bouncing will actually * happen can't be known until mapping time, but we need to pre-allocate * resources now because we might not be allowed to at mapping time. */ error = allocate_bz_and_pages(dmat, map); if (error != 0) { free(map, M_BUSDMA); *mapp = NULL; return (error); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); free(map, M_BUSDMA); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into bus device * space based on the constraints listed in the dma tag. Returns a pointer to * the allocated memory, and a pointer to an associated bus_dmamap. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddr, int flags, bus_dmamap_t *mapp) { busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; bus_dmamap_t map; vm_memattr_t memattr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; *mapp = map = allocate_map(dmat, mflags); if (map == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } map->flags = DMAMAP_DMAMEM_ALLOC; /* For coherent memory, set the map flag that disables sync ops. */ if (flags & BUS_DMA_COHERENT) map->flags |= DMAMAP_COHERENT; /* * Choose a busdma buffer allocator based on memory type flags. * If the tag's COHERENT flag is set, that means normal memory * is already coherent, use the normal allocator. */ if ((flags & BUS_DMA_COHERENT) && ((dmat->flags & BUS_DMA_COHERENT) == 0)) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !exclusion_bounce(dmat)) { *vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { *vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); free(map, M_BUSDMA); *mapp = NULL; return (ENOMEM); } if (map->flags & DMAMAP_COHERENT) atomic_add_32(&maps_coherent, 1); atomic_add_32(&maps_dmamem, 1); atomic_add_32(&maps_total, 1); dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory that was allocated via bus_dmamem_alloc, along with * its associated map. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if ((map->flags & DMAMAP_COHERENT) && ((dmat->flags & BUS_DMA_COHERENT) == 0)) ba = coherent_allocator; else ba = standard_allocator; bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !exclusion_bounce(dmat)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; if (map->flags & DMAMAP_COHERENT) atomic_subtract_32(&maps_coherent, 1); atomic_subtract_32(&maps_total, 1); atomic_subtract_32(&maps_dmamem, 1); free(map, M_BUSDMA); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" " map= %p, pagesneeded= %d", dmat->lowaddr, dmat->boundary, dmat->alignment, map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (must_bounce(dmat, map, curaddr, sgsize) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, pmap_t pmap, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR5(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d" " map= %p, pagesneeded= %d", dmat->lowaddr, dmat->boundary, dmat->alignment, map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { if (__predict_true(pmap == kernel_pmap)) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (must_bounce(dmat, map, paddr, min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)))) != 0) { map->pagesneeded++; } vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); } CTR1(KTR_BUSDMA, "pagesneeded= %d", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { map->pagesneeded = 0; mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_addr_t sl_end = 0; bus_size_t sgsize; struct sync_list *sl; int error; if (segs == NULL) segs = map->segments; counter_u64_add(maploads_total, 1); counter_u64_add(maploads_physmem, 1); if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->nsegments) break; sl++; sl->vaddr = 0; sl->paddr = curaddr; sl->datacount = sgsize; sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not in " "vm_page_array", __func__, curaddr)); } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; bus_addr_t sl_pend = 0; vm_offset_t kvaddr, vaddr, sl_vend = 0; struct sync_list *sl; int error; counter_u64_add(maploads_total, 1); if (map->flags & DMAMAP_COHERENT) counter_u64_add(maploads_coherent, 1); if (map->flags & DMAMAP_DMAMEM_ALLOC) counter_u64_add(maploads_dmamem, 1); if (segs == NULL) segs = map->segments; if (flags & BUS_DMA_LOAD_MBUF) { counter_u64_add(maploads_mbuf, 1); map->flags |= DMAMAP_MBUF; } if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_pages(dmat, pmap, map, buf, buflen, flags); if (map->pagesneeded != 0) { counter_u64_add(maploads_bounced, 1); error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((dmat->flags & BUS_DMA_COHERENT) == 0) { if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (curaddr != sl_pend)) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = kvaddr; sl->paddr = curaddr; if (kvaddr != 0) { sl->pages = NULL; } else { sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not " "in vm_page_array", __func__, curaddr)); } sl->datacount = sgsize; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; struct bounce_zone *bz; if ((bz = dmat->bounce_zone) != NULL) { while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } bz = dmat->bounce_zone; bz->free_bpages += map->pagesreserved; bz->reserved_bpages -= map->pagesreserved; map->pagesreserved = 0; map->pagesneeded = 0; } map->sync_count = 0; map->flags &= ~DMAMAP_MBUF; } static void dma_preread_safe(vm_offset_t va, vm_paddr_t pa, vm_size_t size) { /* * Write back any partial cachelines immediately before and * after the DMA region. We don't need to round the address * down to the nearest cacheline or specify the exact size, * as dcache_wb_poc() will do the rounding for us and works * at cacheline granularity. */ if (va & BUSDMA_DCACHE_MASK) dcache_wb_poc(va, pa, 1); if ((va + size) & BUSDMA_DCACHE_MASK) dcache_wb_poc(va + size, pa + size, 1); dcache_inv_poc_dma(va, pa, size); } static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) { uint32_t len, offset; vm_page_t m; vm_paddr_t pa; vm_offset_t va, tempva; bus_size_t size; offset = sl->paddr & PAGE_MASK; m = sl->pages; size = sl->datacount; pa = sl->paddr; for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { tempva = 0; if (sl->vaddr == 0) { len = min(PAGE_SIZE - offset, size); tempva = pmap_quick_enter_page(m); va = tempva | offset; KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), ("unexpected vm_page_t phys: 0x%08x != 0x%08x", VM_PAGE_TO_PHYS(m) | offset, pa)); } else { len = sl->datacount; va = sl->vaddr; } switch (op) { case BUS_DMASYNC_PREWRITE: case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: dcache_wb_poc(va, pa, len); break; case BUS_DMASYNC_PREREAD: /* * An mbuf may start in the middle of a cacheline. There * will be no cpu writes to the beginning of that line * (which contains the mbuf header) while dma is in * progress. Handle that case by doing a writeback of * just the first cacheline before invalidating the * overall buffer. Any mbuf in a chain may have this * misalignment. Buffers which are not mbufs bounce if * they are not aligned to a cacheline. */ dma_preread_safe(va, pa, len); break; case BUS_DMASYNC_POSTREAD: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: dcache_inv_poc(va, pa, len); break; default: panic("unsupported combination of sync operations: " "0x%08x\n", op); } if (tempva != 0) pmap_quick_remove_page(tempva); } } void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; if (op == BUS_DMASYNC_POSTWRITE) return; /* * If the buffer was from user space, it is possible that this is not * the same vm map, especially on a POST operation. It's not clear that * dma on userland buffers can work at all right now. To be safe, until * we're able to test direct userland dma, panic on a map mismatch. */ if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->flags, op); /* * For PREWRITE do a writeback. Clean the caches from the * innermost to the outermost levels. */ if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_wb_poc(bpage->vaddr, bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } /* * Do an invalidate for PREREAD unless a writeback was already * done above due to PREWRITE also being set. The reason for a * PREREAD invalidate is to prevent dirty lines currently in the * cache from being evicted during the DMA. If a writeback was * done due to PREWRITE also being set there will be no dirty * lines and the POSTREAD invalidate handles the rest. The * invalidate is done from the innermost to outermost level. If * L2 were done first, a dirty cacheline could be automatically * evicted from L1 before we invalidated it, re-dirtying the L2. */ if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) { bpage = STAILQ_FIRST(&map->bpages); while (bpage != NULL) { if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_inv_poc_dma(bpage->vaddr, bpage->busaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } /* * Re-invalidate the caches on a POSTREAD, even though they were * already invalidated at PREREAD time. Aggressive prefetching * due to accesses to other data near the dma buffer could have * brought buffer data into the caches which is now stale. The * caches are invalidated from the outermost to innermost; the * prefetches could be happening right now, and if L1 were * invalidated first, stale L2 data could be prefetched into L1. */ if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { if ((dmat->flags & BUS_DMA_COHERENT) == 0) dcache_inv_poc(bpage->vaddr, bpage->busaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } /* * For COHERENT memory no cache maintenance is necessary, but ensure all * writes have reached memory for the PREWRITE case. No action is * needed for a PREREAD without PREWRITE also set, because that would * imply that the cpu had written to the COHERENT buffer and expected * the dma device to see that change, and by definition a PREWRITE sync * is required to make that happen. */ if (map->flags & DMAMAP_COHERENT) { if (op & BUS_DMASYNC_PREWRITE) { dsb(); if ((dmat->flags & BUS_DMA_COHERENT) == 0) cpu_l2cache_drain_writebuf(); } return; } /* * Cache maintenance for normal (non-COHERENT non-bounce) buffers. All * the comments about the sequences for flushing cache levels in the * bounce buffer code above apply here as well. In particular, the fact * that the sequence is inner-to-outer for PREREAD invalidation and * outer-to-inner for POSTREAD invalidation is not a mistake. */ if (map->sync_count != 0) { sl = &map->slist[0]; end = &map->slist[map->sync_count]; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing sync", __func__, dmat, dmat->flags, op); for ( ; sl != end; ++sl) dma_dcache_sync(sl, op); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests (pages bounced)"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_ULONG(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/arm/include/bus_dma.h =================================================================== --- head/sys/arm/include/bus_dma.h (revision 320527) +++ head/sys/arm/include/bus_dma.h (revision 320528) @@ -1,99 +1,100 @@ /* $NetBSD: bus.h,v 1.11 2003/07/28 17:35:54 thorpej Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ARM_BUS_DMA_H #define _ARM_BUS_DMA_H #include +#include /* Bus Space DMA macros */ #define BUS_DMA_TAG_VALID(t) ((t) != (bus_dma_tag_t)0) #if defined(_ARM32_BUS_DMA_PRIVATE) && __ARM_ARCH < 6 /* * arm32_dma_range * * This structure describes a valid DMA range. */ struct arm32_dma_range { bus_addr_t dr_sysbase; /* system base address */ bus_addr_t dr_busbase; /* appears here on bus */ bus_size_t dr_len; /* length of range */ }; /* _dm_buftype */ #define ARM32_BUFTYPE_INVALID 0 #define ARM32_BUFTYPE_LINEAR 1 #define ARM32_BUFTYPE_MBUF 2 #define ARM32_BUFTYPE_UIO 3 #define ARM32_BUFTYPE_RAW 4 struct arm32_dma_range *bus_dma_get_range(void); int bus_dma_get_range_nb(void); #endif /* _ARM32_BUS_DMA_PRIVATE */ #endif /* _ARM_BUS_DMA_H */ Index: head/sys/arm64/arm64/busdma_machdep.c =================================================================== --- head/sys/arm64/arm64/busdma_machdep.c (revision 320527) +++ head/sys/arm64/arm64/busdma_machdep.c (revision 320528) @@ -1,355 +1,225 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2013, 2015 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ int bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || ((paddr & (tc->alignment - 1)) != 0)) && (tc->filter == NULL || (*tc->filter)(tc->filterarg, paddr) != 0)) retval = 1; tc = tc->parent; } while (retval == 0 && tc != NULL); return (retval); } int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat) { void *newtag; struct bus_dma_tag_common *common; KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz")); /* Return a NULL tag on failure */ *dmat = NULL; /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) return (EINVAL); newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, ENOMEM); return (ENOMEM); } common = newtag; common->impl = &bus_dma_bounce_impl; common->parent = parent; common->alignment = alignment; common->boundary = boundary; common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); common->filter = filter; common->filterarg = filterarg; common->maxsize = maxsize; common->nsegments = nsegments; common->maxsegsz = maxsegsz; common->flags = flags; common->ref_count = 1; /* Count ourself */ if (lockfunc != NULL) { common->lockfunc = lockfunc; common->lockfuncarg = lockfuncarg; } else { common->lockfunc = bus_dma_dflt_lock; common->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { common->impl = parent->impl; common->lowaddr = MIN(parent->lowaddr, common->lowaddr); common->highaddr = MAX(parent->highaddr, common->highaddr); if (common->boundary == 0) common->boundary = parent->boundary; else if (parent->boundary != 0) { common->boundary = MIN(parent->boundary, common->boundary); } if (common->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ common->filter = parent->filter; common->filterarg = parent->filterarg; common->parent = parent->parent; } atomic_add_int(&parent->ref_count, 1); } *dmat = common; return (0); } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_common *tc; int error; if (parent == NULL) { error = bus_dma_bounce_impl.tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } else { tc = (struct bus_dma_tag_common *)parent; error = tc->impl->tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->tag_destroy(dmat)); } -/* - * Allocate a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_create(dmat, flags, mapp)); -} - -/* - * Destroy a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_destroy(dmat, map)); -} - - -/* - * Allocate a piece of memory that can be efficiently mapped into - * bus device space based on the constraints listed in the dma tag. - * A dmamap to for use with dmamap_load is also allocated. - */ -int -bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); -} - -/* - * Free a piece of memory and it's allociated dmamap, that was allocated - * via bus_dmamem_alloc. Make the same choice for free/contigfree. - */ -void -bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->mem_free(dmat, vaddr, map); -} - -int -_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, - bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, - segp)); -} - -int -_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, - bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, - int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, - segs, segp)); -} - -int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, - bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, - int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, - segp)); -} - -void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, - struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); -} - -bus_dma_segment_t * -_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, - bus_dma_segment_t *segs, int nsegs, int error) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); -} - -/* - * Release the mapping held by map. - */ -void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_unload(dmat, map); -} - -void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_sync(dmat, map, op); -} Index: head/sys/arm64/include/bus_dma.h =================================================================== --- head/sys/arm64/include/bus_dma.h (revision 320527) +++ head/sys/arm64/include/bus_dma.h (revision 320528) @@ -1,8 +1,141 @@ /* $FreeBSD$ */ #ifndef _MACHINE_BUS_DMA_H_ #define _MACHINE_BUS_DMA_H_ +#define WANT_INLINE_DMAMAP #include + +#include + +/* + * Allocate a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static inline int +bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_create(dmat, flags, mapp)); +} + +/* + * Destroy a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static inline int +bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_destroy(dmat, map)); +} + +/* + * Allocate a piece of memory that can be efficiently mapped into + * bus device space based on the constraints listed in the dma tag. + * A dmamap to for use with dmamap_load is also allocated. + */ +static inline int +bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); +} + +/* + * Free a piece of memory and it's allociated dmamap, that was allocated + * via bus_dmamem_alloc. Make the same choice for free/contigfree. + */ +static inline void +bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->mem_free(dmat, vaddr, map); +} + +/* + * Release the mapping held by map. + */ +static inline void +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_unload(dmat, map); +} + +static inline void +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_sync(dmat, map, op); +} + +static inline int +_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, + bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, + segp)); +} + +static inline int +_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, + bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, + segs, segp)); +} + +static inline int +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, + segp)); +} + +static inline void +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); +} + +static inline bus_dma_segment_t * +_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); +} #endif /* !_MACHINE_BUS_DMA_H_ */ Index: head/sys/arm64/include/bus_dma_impl.h =================================================================== --- head/sys/arm64/include/bus_dma_impl.h (revision 320527) +++ head/sys/arm64/include/bus_dma_impl.h (revision 320528) @@ -1,96 +1,96 @@ /*- * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_BUS_DMA_IMPL_H_ #define _MACHINE_BUS_DMA_IMPL_H_ struct bus_dma_tag_common { struct bus_dma_impl *impl; struct bus_dma_tag_common *parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; bus_dma_lock_t *lockfunc; void *lockfuncarg; int ref_count; }; struct bus_dma_impl { int (*tag_create)(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int (*tag_destroy)(bus_dma_tag_t dmat); int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, - void *buf, bus_size_t buflen, pmap_t pmap, int flags, + void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op); }; void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr); int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat); extern struct bus_dma_impl bus_dma_bounce_impl; #endif Index: head/sys/dev/aac/aac.c =================================================================== --- head/sys/dev/aac/aac.c (revision 320527) +++ head/sys/dev/aac/aac.c (revision 320528) @@ -1,3803 +1,3802 @@ /*- * Copyright (c) 2000 Michael Smith * Copyright (c) 2001 Scott Long * Copyright (c) 2000 BSDi * Copyright (c) 2001 Adaptec, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Adaptec 'FSA' family of PCI/SCSI RAID adapters. */ #define AAC_DRIVERNAME "aac" #include "opt_aac.h" /* #include */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include static void aac_startup(void *arg); static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f); static void aac_get_bus_info(struct aac_softc *sc); static void aac_daemon(void *arg); /* Command Processing */ static void aac_timeout(struct aac_softc *sc); static void aac_complete(void *context, int pending); static int aac_bio_command(struct aac_softc *sc, struct aac_command **cmp); static void aac_bio_complete(struct aac_command *cm); static int aac_wait_command(struct aac_command *cm); static void aac_command_thread(struct aac_softc *sc); /* Command Buffer Management */ static void aac_map_command_sg(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_alloc_commands(struct aac_softc *sc); static void aac_free_commands(struct aac_softc *sc); static void aac_unmap_command(struct aac_command *cm); /* Hardware Interface */ static int aac_alloc(struct aac_softc *sc); static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_check_firmware(struct aac_softc *sc); static int aac_init(struct aac_softc *sc); static int aac_sync_command(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3, u_int32_t *sp); static int aac_setup_intr(struct aac_softc *sc); static int aac_enqueue_fib(struct aac_softc *sc, int queue, struct aac_command *cm); static int aac_dequeue_fib(struct aac_softc *sc, int queue, u_int32_t *fib_size, struct aac_fib **fib_addr); static int aac_enqueue_response(struct aac_softc *sc, int queue, struct aac_fib *fib); /* StrongARM interface */ static int aac_sa_get_fwstatus(struct aac_softc *sc); static void aac_sa_qnotify(struct aac_softc *sc, int qbit); static int aac_sa_get_istatus(struct aac_softc *sc); static void aac_sa_clear_istatus(struct aac_softc *sc, int mask); static void aac_sa_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_sa_get_mailbox(struct aac_softc *sc, int mb); static void aac_sa_set_interrupts(struct aac_softc *sc, int enable); const struct aac_interface aac_sa_interface = { aac_sa_get_fwstatus, aac_sa_qnotify, aac_sa_get_istatus, aac_sa_clear_istatus, aac_sa_set_mailbox, aac_sa_get_mailbox, aac_sa_set_interrupts, NULL, NULL, NULL }; /* i960Rx interface */ static int aac_rx_get_fwstatus(struct aac_softc *sc); static void aac_rx_qnotify(struct aac_softc *sc, int qbit); static int aac_rx_get_istatus(struct aac_softc *sc); static void aac_rx_clear_istatus(struct aac_softc *sc, int mask); static void aac_rx_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_rx_get_mailbox(struct aac_softc *sc, int mb); static void aac_rx_set_interrupts(struct aac_softc *sc, int enable); static int aac_rx_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_rx_get_outb_queue(struct aac_softc *sc); static void aac_rx_set_outb_queue(struct aac_softc *sc, int index); const struct aac_interface aac_rx_interface = { aac_rx_get_fwstatus, aac_rx_qnotify, aac_rx_get_istatus, aac_rx_clear_istatus, aac_rx_set_mailbox, aac_rx_get_mailbox, aac_rx_set_interrupts, aac_rx_send_command, aac_rx_get_outb_queue, aac_rx_set_outb_queue }; /* Rocket/MIPS interface */ static int aac_rkt_get_fwstatus(struct aac_softc *sc); static void aac_rkt_qnotify(struct aac_softc *sc, int qbit); static int aac_rkt_get_istatus(struct aac_softc *sc); static void aac_rkt_clear_istatus(struct aac_softc *sc, int mask); static void aac_rkt_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_rkt_get_mailbox(struct aac_softc *sc, int mb); static void aac_rkt_set_interrupts(struct aac_softc *sc, int enable); static int aac_rkt_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_rkt_get_outb_queue(struct aac_softc *sc); static void aac_rkt_set_outb_queue(struct aac_softc *sc, int index); const struct aac_interface aac_rkt_interface = { aac_rkt_get_fwstatus, aac_rkt_qnotify, aac_rkt_get_istatus, aac_rkt_clear_istatus, aac_rkt_set_mailbox, aac_rkt_get_mailbox, aac_rkt_set_interrupts, aac_rkt_send_command, aac_rkt_get_outb_queue, aac_rkt_set_outb_queue }; /* Debugging and Diagnostics */ static void aac_describe_controller(struct aac_softc *sc); static const char *aac_describe_code(const struct aac_code_lookup *table, u_int32_t code); /* Management Interface */ static d_open_t aac_open; static d_ioctl_t aac_ioctl; static d_poll_t aac_poll; static void aac_cdevpriv_dtor(void *arg); static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib); static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg); static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib); static int aac_rev_check(struct aac_softc *sc, caddr_t udata); static int aac_open_aif(struct aac_softc *sc, caddr_t arg); static int aac_close_aif(struct aac_softc *sc, caddr_t arg); static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg); static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr); static int aac_query_disk(struct aac_softc *sc, caddr_t uptr); static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr); static int aac_supported_features(struct aac_softc *sc, caddr_t uptr); static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg); static struct aac_mntinforesp * aac_get_container_info(struct aac_softc *sc, struct aac_fib *fib, int cid); static struct cdevsw aac_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = aac_open, .d_ioctl = aac_ioctl, .d_poll = aac_poll, .d_name = "aac", }; static MALLOC_DEFINE(M_AACBUF, "aacbuf", "Buffers for the AAC driver"); /* sysctl node */ SYSCTL_NODE(_hw, OID_AUTO, aac, CTLFLAG_RD, 0, "AAC driver parameters"); /* * Device Interface */ /* * Initialize the controller and softc */ int aac_attach(struct aac_softc *sc) { int error, unit; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Initialize per-controller queues. */ aac_initq_free(sc); aac_initq_ready(sc); aac_initq_busy(sc); aac_initq_bio(sc); /* * Initialize command-completion task. */ TASK_INIT(&sc->aac_task_complete, 0, aac_complete, sc); /* mark controller as suspended until we get ourselves organised */ sc->aac_state |= AAC_STATE_SUSPEND; /* * Check that the firmware on the card is supported. */ if ((error = aac_check_firmware(sc)) != 0) return(error); /* * Initialize locks */ mtx_init(&sc->aac_aifq_lock, "AAC AIF lock", NULL, MTX_DEF); mtx_init(&sc->aac_io_lock, "AAC I/O lock", NULL, MTX_DEF); mtx_init(&sc->aac_container_lock, "AAC container lock", NULL, MTX_DEF); TAILQ_INIT(&sc->aac_container_tqh); TAILQ_INIT(&sc->aac_ev_cmfree); /* Initialize the clock daemon callout. */ callout_init_mtx(&sc->aac_daemontime, &sc->aac_io_lock, 0); /* * Initialize the adapter. */ if ((error = aac_alloc(sc)) != 0) return(error); if ((error = aac_init(sc)) != 0) return(error); /* * Allocate and connect our interrupt. */ if ((error = aac_setup_intr(sc)) != 0) return(error); /* * Print a little information about the controller. */ aac_describe_controller(sc); /* * Add sysctls. */ SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->aac_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->aac_dev)), OID_AUTO, "firmware_build", CTLFLAG_RD, &sc->aac_revision.buildNumber, 0, "firmware build number"); /* * Register to probe our containers later. */ sc->aac_ich.ich_func = aac_startup; sc->aac_ich.ich_arg = sc; if (config_intrhook_establish(&sc->aac_ich) != 0) { device_printf(sc->aac_dev, "can't establish configuration hook\n"); return(ENXIO); } /* * Make the control device. */ unit = device_get_unit(sc->aac_dev); sc->aac_dev_t = make_dev(&aac_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "aac%d", unit); (void)make_dev_alias(sc->aac_dev_t, "afa%d", unit); (void)make_dev_alias(sc->aac_dev_t, "hpn%d", unit); sc->aac_dev_t->si_drv1 = sc; /* Create the AIF thread */ if (kproc_create((void(*)(void *))aac_command_thread, sc, &sc->aifthread, 0, 0, "aac%daif", unit)) panic("Could not create AIF thread"); /* Register the shutdown method to only be called post-dump */ if ((sc->eh = EVENTHANDLER_REGISTER(shutdown_final, aac_shutdown, sc->aac_dev, SHUTDOWN_PRI_DEFAULT)) == NULL) device_printf(sc->aac_dev, "shutdown event registration failed\n"); /* Register with CAM for the non-DASD devices */ if ((sc->flags & AAC_FLAGS_ENABLE_CAM) != 0) { TAILQ_INIT(&sc->aac_sim_tqh); aac_get_bus_info(sc); } mtx_lock(&sc->aac_io_lock); callout_reset(&sc->aac_daemontime, 60 * hz, aac_daemon, sc); mtx_unlock(&sc->aac_io_lock); return(0); } static void aac_daemon(void *arg) { struct timeval tv; struct aac_softc *sc; struct aac_fib *fib; sc = arg; mtx_assert(&sc->aac_io_lock, MA_OWNED); if (callout_pending(&sc->aac_daemontime) || callout_active(&sc->aac_daemontime) == 0) return; getmicrotime(&tv); aac_alloc_sync_fib(sc, &fib); *(uint32_t *)fib->data = tv.tv_sec; aac_sync_fib(sc, SendHostTime, 0, fib, sizeof(uint32_t)); aac_release_sync_fib(sc); callout_schedule(&sc->aac_daemontime, 30 * 60 * hz); } void aac_add_event(struct aac_softc *sc, struct aac_event *event) { switch (event->ev_type & AAC_EVENT_MASK) { case AAC_EVENT_CMFREE: TAILQ_INSERT_TAIL(&sc->aac_ev_cmfree, event, ev_links); break; default: device_printf(sc->aac_dev, "aac_add event: unknown event %d\n", event->ev_type); break; } } /* * Request information of container #cid */ static struct aac_mntinforesp * aac_get_container_info(struct aac_softc *sc, struct aac_fib *fib, int cid) { struct aac_mntinfo *mi; mi = (struct aac_mntinfo *)&fib->data[0]; /* use 64-bit LBA if enabled */ mi->Command = (sc->flags & AAC_FLAGS_LBA_64BIT) ? VM_NameServe64 : VM_NameServe; mi->MntType = FT_FILESYS; mi->MntCount = cid; if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_mntinfo))) { device_printf(sc->aac_dev, "Error probing container %d\n", cid); return (NULL); } return ((struct aac_mntinforesp *)&fib->data[0]); } /* * Probe for containers, create disks. */ static void aac_startup(void *arg) { struct aac_softc *sc; struct aac_fib *fib; struct aac_mntinforesp *mir; int count = 0, i = 0; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* disconnect ourselves from the intrhook chain */ config_intrhook_disestablish(&sc->aac_ich); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); /* loop over possible containers */ do { if ((mir = aac_get_container_info(sc, fib, i)) == NULL) continue; if (i == 0) count = mir->MntRespCount; aac_add_container(sc, mir, 0); i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); /* poke the bus to actually attach the child devices */ if (bus_generic_attach(sc->aac_dev)) device_printf(sc->aac_dev, "bus_generic_attach failed\n"); /* mark the controller up */ sc->aac_state &= ~AAC_STATE_SUSPEND; /* enable interrupts now */ AAC_UNMASK_INTERRUPTS(sc); } /* * Create a device to represent a new container */ static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f) { struct aac_container *co; device_t child; /* * Check container volume type for validity. Note that many of * the possible types may never show up. */ if ((mir->Status == ST_OK) && (mir->MntTable[0].VolType != CT_NONE)) { co = (struct aac_container *)malloc(sizeof *co, M_AACBUF, M_NOWAIT | M_ZERO); if (co == NULL) panic("Out of memory?!"); fwprintf(sc, HBA_FLAGS_DBG_INIT_B, "id %x name '%.16s' size %u type %d", mir->MntTable[0].ObjectId, mir->MntTable[0].FileSystemName, mir->MntTable[0].Capacity, mir->MntTable[0].VolType); if ((child = device_add_child(sc->aac_dev, "aacd", -1)) == NULL) device_printf(sc->aac_dev, "device_add_child failed\n"); else device_set_ivars(child, co); device_set_desc(child, aac_describe_code(aac_container_types, mir->MntTable[0].VolType)); co->co_disk = child; co->co_found = f; bcopy(&mir->MntTable[0], &co->co_mntobj, sizeof(struct aac_mntobj)); mtx_lock(&sc->aac_container_lock); TAILQ_INSERT_TAIL(&sc->aac_container_tqh, co, co_link); mtx_unlock(&sc->aac_container_lock); } } /* * Allocate resources associated with (sc) */ static int aac_alloc(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Create DMA tag for mapping buffers into controller-addressable space. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_SG_64BIT) ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->aac_max_sectors << 9, /* maxsize */ sc->aac_sg_tablesize, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->aac_io_lock, /* lockfuncarg */ &sc->aac_buffer_dmat)) { device_printf(sc->aac_dev, "can't allocate buffer DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for mapping FIBs into controller-addressable space.. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->aac_max_fibs_alloc * sc->aac_max_fib_size, /* maxsize */ 1, /* nsegments */ sc->aac_max_fibs_alloc * sc->aac_max_fib_size, /* maxsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_fib_dmat)) { device_printf(sc->aac_dev, "can't allocate FIB DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for the common structure and allocate it. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ 8192 + sizeof(struct aac_common), /* maxsize */ 1, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_common_dmat)) { device_printf(sc->aac_dev, "can't allocate common structure DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->aac_common_dmat, (void **)&sc->aac_common, BUS_DMA_NOWAIT, &sc->aac_common_dmamap)) { device_printf(sc->aac_dev, "can't allocate common structure\n"); return (ENOMEM); } /* * Work around a bug in the 2120 and 2200 that cannot DMA commands * below address 8192 in physical memory. * XXX If the padding is not needed, can it be put to use instead * of ignored? */ (void)bus_dmamap_load(sc->aac_common_dmat, sc->aac_common_dmamap, sc->aac_common, 8192 + sizeof(*sc->aac_common), aac_common_map, sc, 0); if (sc->aac_common_busaddr < 8192) { sc->aac_common = (struct aac_common *) ((uint8_t *)sc->aac_common + 8192); sc->aac_common_busaddr += 8192; } bzero(sc->aac_common, sizeof(*sc->aac_common)); /* Allocate some FIBs and associated command structs */ TAILQ_INIT(&sc->aac_fibmap_tqh); sc->aac_commands = malloc(sc->aac_max_fibs * sizeof(struct aac_command), M_AACBUF, M_WAITOK|M_ZERO); while (sc->total_fibs < sc->aac_max_fibs) { if (aac_alloc_commands(sc) != 0) break; } if (sc->total_fibs == 0) return (ENOMEM); return (0); } /* * Free all of the resources associated with (sc) * * Should not be called if the controller is active. */ void aac_free(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* remove the control device */ if (sc->aac_dev_t != NULL) destroy_dev(sc->aac_dev_t); /* throw away any FIB buffers, discard the FIB DMA tag */ aac_free_commands(sc); if (sc->aac_fib_dmat) bus_dma_tag_destroy(sc->aac_fib_dmat); free(sc->aac_commands, M_AACBUF); /* destroy the common area */ if (sc->aac_common) { bus_dmamap_unload(sc->aac_common_dmat, sc->aac_common_dmamap); bus_dmamem_free(sc->aac_common_dmat, sc->aac_common, sc->aac_common_dmamap); } if (sc->aac_common_dmat) bus_dma_tag_destroy(sc->aac_common_dmat); /* disconnect the interrupt handler */ if (sc->aac_intr) bus_teardown_intr(sc->aac_dev, sc->aac_irq, sc->aac_intr); if (sc->aac_irq != NULL) { bus_release_resource(sc->aac_dev, SYS_RES_IRQ, rman_get_rid(sc->aac_irq), sc->aac_irq); pci_release_msi(sc->aac_dev); } /* destroy data-transfer DMA tag */ if (sc->aac_buffer_dmat) bus_dma_tag_destroy(sc->aac_buffer_dmat); /* destroy the parent DMA tag */ if (sc->aac_parent_dmat) bus_dma_tag_destroy(sc->aac_parent_dmat); /* release the register window mapping */ if (sc->aac_regs_res0 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, rman_get_rid(sc->aac_regs_res0), sc->aac_regs_res0); if (sc->aac_hwif == AAC_HWIF_NARK && sc->aac_regs_res1 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, rman_get_rid(sc->aac_regs_res1), sc->aac_regs_res1); } /* * Disconnect from the controller completely, in preparation for unload. */ int aac_detach(device_t dev) { struct aac_softc *sc; struct aac_container *co; struct aac_sim *sim; int error; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); callout_drain(&sc->aac_daemontime); mtx_lock(&sc->aac_io_lock); while (sc->aifflags & AAC_AIFFLAGS_RUNNING) { sc->aifflags |= AAC_AIFFLAGS_EXIT; wakeup(sc->aifthread); msleep(sc->aac_dev, &sc->aac_io_lock, PUSER, "aacdch", 0); } mtx_unlock(&sc->aac_io_lock); KASSERT((sc->aifflags & AAC_AIFFLAGS_RUNNING) == 0, ("%s: invalid detach state", __func__)); /* Remove the child containers */ while ((co = TAILQ_FIRST(&sc->aac_container_tqh)) != NULL) { error = device_delete_child(dev, co->co_disk); if (error) return (error); TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); free(co, M_AACBUF); } /* Remove the CAM SIMs */ while ((sim = TAILQ_FIRST(&sc->aac_sim_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_sim_tqh, sim, sim_link); error = device_delete_child(dev, sim->sim_dev); if (error) return (error); free(sim, M_AACBUF); } if ((error = aac_shutdown(dev))) return(error); EVENTHANDLER_DEREGISTER(shutdown_final, sc->eh); aac_free(sc); mtx_destroy(&sc->aac_aifq_lock); mtx_destroy(&sc->aac_io_lock); mtx_destroy(&sc->aac_container_lock); return(0); } /* * Bring the controller down to a dormant state and detach all child devices. * * This function is called before detach or system shutdown. * * Note that we can assume that the bioq on the controller is empty, as we won't * allow shutdown if any device is open. */ int aac_shutdown(device_t dev) { struct aac_softc *sc; struct aac_fib *fib; struct aac_close_command *cc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; /* * Send a Container shutdown followed by a HostShutdown FIB to the * controller to convince it that we don't want to talk to it anymore. * We've been closed and all I/O completed already */ device_printf(sc->aac_dev, "shutting down controller..."); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); cc = (struct aac_close_command *)&fib->data[0]; bzero(cc, sizeof(struct aac_close_command)); cc->Command = VM_CloseAll; cc->ContainerId = 0xffffffff; if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_close_command))) printf("FAILED.\n"); else printf("done\n"); #if 0 else { fib->data[0] = 0; /* * XXX Issuing this command to the controller makes it shut down * but also keeps it from coming back up without a reset of the * PCI bus. This is not desirable if you are just unloading the * driver module with the intent to reload it later. */ if (aac_sync_fib(sc, FsaHostShutdown, AAC_FIBSTATE_SHUTDOWN, fib, 1)) { printf("FAILED.\n"); } else { printf("done.\n"); } } #endif AAC_MASK_INTERRUPTS(sc); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return(0); } /* * Bring the controller to a quiescent state, ready for system suspend. */ int aac_suspend(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; AAC_MASK_INTERRUPTS(sc); return(0); } /* * Bring the controller back to a state ready for operation. */ int aac_resume(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state &= ~AAC_STATE_SUSPEND; AAC_UNMASK_INTERRUPTS(sc); return(0); } /* * Interrupt handler for NEW_COMM interface. */ void aac_new_intr(void *arg) { struct aac_softc *sc; u_int32_t index, fast; struct aac_command *cm; struct aac_fib *fib; int i; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); while (1) { index = AAC_GET_OUTB_QUEUE(sc); if (index == 0xffffffff) index = AAC_GET_OUTB_QUEUE(sc); if (index == 0xffffffff) break; if (index & 2) { if (index == 0xfffffffe) { /* XXX This means that the controller wants * more work. Ignore it for now. */ continue; } /* AIF */ fib = (struct aac_fib *)malloc(sizeof *fib, M_AACBUF, M_NOWAIT | M_ZERO); if (fib == NULL) { /* If we're really this short on memory, * hopefully breaking out of the handler will * allow something to get freed. This * actually sucks a whole lot. */ break; } index &= ~2; for (i = 0; i < sizeof(struct aac_fib)/4; ++i) ((u_int32_t *)fib)[i] = AAC_MEM1_GETREG4(sc, index + i*4); aac_handle_aif(sc, fib); free(fib, M_AACBUF); /* * AIF memory is owned by the adapter, so let it * know that we are done with it. */ AAC_SET_OUTB_QUEUE(sc, index); AAC_CLEAR_ISTATUS(sc, AAC_DB_RESPONSE_READY); } else { fast = index & 1; cm = sc->aac_commands + (index >> 2); fib = cm->cm_fib; if (fast) { fib->Header.XferState |= AAC_FIBSTATE_DONEADAP; *((u_int32_t *)(fib->data)) = AAC_ERROR_NORMAL; } aac_remove_busy(cm); aac_unmap_command(cm); cm->cm_flags |= AAC_CMD_COMPLETED; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this * command */ wakeup(cm); } sc->flags &= ~AAC_QUEUE_FRZN; } } /* see if we can start some more I/O */ if ((sc->flags & AAC_QUEUE_FRZN) == 0) aac_startio(sc); mtx_unlock(&sc->aac_io_lock); } /* * Interrupt filter for !NEW_COMM interface. */ int aac_filter(void *arg) { struct aac_softc *sc; u_int16_t reason; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Read the status register directly. This is faster than taking the * driver lock and reading the queues directly. It also saves having * to turn parts of the driver lock into a spin mutex, which would be * ugly. */ reason = AAC_GET_ISTATUS(sc); AAC_CLEAR_ISTATUS(sc, reason); /* handle completion processing */ if (reason & AAC_DB_RESPONSE_READY) taskqueue_enqueue(taskqueue_fast, &sc->aac_task_complete); /* controller wants to talk to us */ if (reason & (AAC_DB_PRINTF | AAC_DB_COMMAND_READY)) { /* * XXX Make sure that we don't get fooled by strange messages * that start with a NULL. */ if ((reason & AAC_DB_PRINTF) && (sc->aac_common->ac_printf[0] == 0)) sc->aac_common->ac_printf[0] = 32; /* * This might miss doing the actual wakeup. However, the * msleep that this is waking up has a timeout, so it will * wake up eventually. AIFs and printfs are low enough * priority that they can handle hanging out for a few seconds * if needed. */ wakeup(sc->aifthread); } return (FILTER_HANDLED); } /* * Command Processing */ /* * Start as much queued I/O as possible on the controller */ void aac_startio(struct aac_softc *sc) { struct aac_command *cm; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); for (;;) { /* * This flag might be set if the card is out of resources. * Checking it here prevents an infinite loop of deferrals. */ if (sc->flags & AAC_QUEUE_FRZN) break; /* * Try to get a command that's been put off for lack of * resources */ cm = aac_dequeue_ready(sc); /* * Try to build a command off the bio queue (ignore error * return) */ if (cm == NULL) aac_bio_command(sc, &cm); /* nothing to do? */ if (cm == NULL) break; /* don't map more than once */ if (cm->cm_flags & AAC_CMD_MAPPED) panic("aac: command %p already mapped", cm); /* * Set up the command to go to the controller. If there are no * data buffers associated with the command then it can bypass * busdma. */ if (cm->cm_datalen != 0) { if (cm->cm_flags & AAC_REQ_BIO) error = bus_dmamap_load_bio( sc->aac_buffer_dmat, cm->cm_datamap, (struct bio *)cm->cm_private, aac_map_command_sg, cm, 0); else error = bus_dmamap_load(sc->aac_buffer_dmat, cm->cm_datamap, cm->cm_data, cm->cm_datalen, aac_map_command_sg, cm, 0); if (error == EINPROGRESS) { fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "freezing queue\n"); sc->flags |= AAC_QUEUE_FRZN; } else if (error != 0) panic("aac_startio: unexpected error %d from " "busdma", error); } else aac_map_command_sg(cm, NULL, 0, 0); } } /* * Handle notification of one or more FIBs coming from the controller. */ static void aac_command_thread(struct aac_softc *sc) { struct aac_fib *fib; u_int32_t fib_size; int size, retval; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); sc->aifflags = AAC_AIFFLAGS_RUNNING; while ((sc->aifflags & AAC_AIFFLAGS_EXIT) == 0) { retval = 0; if ((sc->aifflags & AAC_AIFFLAGS_PENDING) == 0) retval = msleep(sc->aifthread, &sc->aac_io_lock, PRIBIO, "aifthd", AAC_PERIODIC_INTERVAL * hz); /* * First see if any FIBs need to be allocated. This needs * to be called without the driver lock because contigmalloc * can sleep. */ if ((sc->aifflags & AAC_AIFFLAGS_ALLOCFIBS) != 0) { mtx_unlock(&sc->aac_io_lock); aac_alloc_commands(sc); mtx_lock(&sc->aac_io_lock); sc->aifflags &= ~AAC_AIFFLAGS_ALLOCFIBS; aac_startio(sc); } /* * While we're here, check to see if any commands are stuck. * This is pretty low-priority, so it's ok if it doesn't * always fire. */ if (retval == EWOULDBLOCK) aac_timeout(sc); /* Check the hardware printf message buffer */ if (sc->aac_common->ac_printf[0] != 0) aac_print_printf(sc); /* Also check to see if the adapter has a command for us. */ if (sc->flags & AAC_FLAGS_NEW_COMM) continue; for (;;) { if (aac_dequeue_fib(sc, AAC_HOST_NORM_CMD_QUEUE, &fib_size, &fib)) break; AAC_PRINT_FIB(sc, fib); switch (fib->Header.Command) { case AifRequest: aac_handle_aif(sc, fib); break; default: device_printf(sc->aac_dev, "unknown command " "from controller\n"); break; } if ((fib->Header.XferState == 0) || (fib->Header.StructType != AAC_FIBTYPE_TFIB)) { break; } /* Return the AIF to the controller. */ if (fib->Header.XferState & AAC_FIBSTATE_FROMADAP) { fib->Header.XferState |= AAC_FIBSTATE_DONEHOST; *(AAC_FSAStatus*)fib->data = ST_OK; /* XXX Compute the Size field? */ size = fib->Header.Size; if (size > sizeof(struct aac_fib)) { size = sizeof(struct aac_fib); fib->Header.Size = size; } /* * Since we did not generate this command, it * cannot go through the normal * enqueue->startio chain. */ aac_enqueue_response(sc, AAC_ADAP_NORM_RESP_QUEUE, fib); } } } sc->aifflags &= ~AAC_AIFFLAGS_RUNNING; mtx_unlock(&sc->aac_io_lock); wakeup(sc->aac_dev); kproc_exit(0); } /* * Process completed commands. */ static void aac_complete(void *context, int pending) { struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; u_int32_t fib_size; sc = (struct aac_softc *)context; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); /* pull completed commands off the queue */ for (;;) { /* look for completed FIBs on our queue */ if (aac_dequeue_fib(sc, AAC_HOST_NORM_RESP_QUEUE, &fib_size, &fib)) break; /* nothing to do */ /* get the command, unmap and hand off for processing */ cm = sc->aac_commands + fib->Header.SenderData; if (cm == NULL) { AAC_PRINT_FIB(sc, fib); break; } if ((cm->cm_flags & AAC_CMD_TIMEDOUT) != 0) device_printf(sc->aac_dev, "COMMAND %p COMPLETED AFTER %d SECONDS\n", cm, (int)(time_uptime-cm->cm_timestamp)); aac_remove_busy(cm); aac_unmap_command(cm); cm->cm_flags |= AAC_CMD_COMPLETED; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this command */ wakeup(cm); } } /* see if we can start some more I/O */ sc->flags &= ~AAC_QUEUE_FRZN; aac_startio(sc); mtx_unlock(&sc->aac_io_lock); } /* * Handle a bio submitted from a disk device. */ void aac_submit_bio(struct bio *bp) { struct aac_disk *ad; struct aac_softc *sc; ad = (struct aac_disk *)bp->bio_disk->d_drv1; sc = ad->ad_controller; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* queue the BIO and try to get some work done */ aac_enqueue_bio(sc, bp); aac_startio(sc); } /* * Get a bio and build a command to go with it. */ static int aac_bio_command(struct aac_softc *sc, struct aac_command **cmp) { struct aac_command *cm; struct aac_fib *fib; struct aac_disk *ad; struct bio *bp; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* get the resources we will need */ cm = NULL; bp = NULL; if (aac_alloc_command(sc, &cm)) /* get a command */ goto fail; if ((bp = aac_dequeue_bio(sc)) == NULL) goto fail; /* fill out the command */ cm->cm_datalen = bp->bio_bcount; cm->cm_complete = aac_bio_complete; cm->cm_flags = AAC_REQ_BIO; cm->cm_private = bp; cm->cm_timestamp = time_uptime; /* build the FIB */ fib = cm->cm_fib; fib->Header.Size = sizeof(struct aac_fib_header); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; /* build the read/write request */ ad = (struct aac_disk *)bp->bio_disk->d_drv1; if (sc->flags & AAC_FLAGS_RAW_IO) { struct aac_raw_io *raw; raw = (struct aac_raw_io *)&fib->data[0]; fib->Header.Command = RawIo; raw->BlockNumber = (u_int64_t)bp->bio_pblkno; raw->ByteCount = bp->bio_bcount; raw->ContainerId = ad->ad_container->co_mntobj.ObjectId; raw->BpTotal = 0; raw->BpComplete = 0; fib->Header.Size += sizeof(struct aac_raw_io); cm->cm_sgtable = (struct aac_sg_table *)&raw->SgMapRaw; if (bp->bio_cmd == BIO_READ) { raw->Flags = 1; cm->cm_flags |= AAC_CMD_DATAIN; } else { raw->Flags = 0; cm->cm_flags |= AAC_CMD_DATAOUT; } } else if ((sc->flags & AAC_FLAGS_SG_64BIT) == 0) { fib->Header.Command = ContainerCommand; if (bp->bio_cmd == BIO_READ) { struct aac_blockread *br; br = (struct aac_blockread *)&fib->data[0]; br->Command = VM_CtBlockRead; br->ContainerId = ad->ad_container->co_mntobj.ObjectId; br->BlockNumber = bp->bio_pblkno; br->ByteCount = bp->bio_bcount; fib->Header.Size += sizeof(struct aac_blockread); cm->cm_sgtable = &br->SgMap; cm->cm_flags |= AAC_CMD_DATAIN; } else { struct aac_blockwrite *bw; bw = (struct aac_blockwrite *)&fib->data[0]; bw->Command = VM_CtBlockWrite; bw->ContainerId = ad->ad_container->co_mntobj.ObjectId; bw->BlockNumber = bp->bio_pblkno; bw->ByteCount = bp->bio_bcount; bw->Stable = CUNSTABLE; fib->Header.Size += sizeof(struct aac_blockwrite); cm->cm_flags |= AAC_CMD_DATAOUT; cm->cm_sgtable = &bw->SgMap; } } else { fib->Header.Command = ContainerCommand64; if (bp->bio_cmd == BIO_READ) { struct aac_blockread64 *br; br = (struct aac_blockread64 *)&fib->data[0]; br->Command = VM_CtHostRead64; br->ContainerId = ad->ad_container->co_mntobj.ObjectId; br->SectorCount = bp->bio_bcount / AAC_BLOCK_SIZE; br->BlockNumber = bp->bio_pblkno; br->Pad = 0; br->Flags = 0; fib->Header.Size += sizeof(struct aac_blockread64); cm->cm_flags |= AAC_CMD_DATAIN; cm->cm_sgtable = (struct aac_sg_table *)&br->SgMap64; } else { struct aac_blockwrite64 *bw; bw = (struct aac_blockwrite64 *)&fib->data[0]; bw->Command = VM_CtHostWrite64; bw->ContainerId = ad->ad_container->co_mntobj.ObjectId; bw->SectorCount = bp->bio_bcount / AAC_BLOCK_SIZE; bw->BlockNumber = bp->bio_pblkno; bw->Pad = 0; bw->Flags = 0; fib->Header.Size += sizeof(struct aac_blockwrite64); cm->cm_flags |= AAC_CMD_DATAOUT; cm->cm_sgtable = (struct aac_sg_table *)&bw->SgMap64; } } *cmp = cm; return(0); fail: if (bp != NULL) aac_enqueue_bio(sc, bp); if (cm != NULL) aac_release_command(cm); return(ENOMEM); } /* * Handle a bio-instigated command that has been completed. */ static void aac_bio_complete(struct aac_command *cm) { struct aac_blockread_response *brr; struct aac_blockwrite_response *bwr; struct bio *bp; AAC_FSAStatus status; /* fetch relevant status and then release the command */ bp = (struct bio *)cm->cm_private; if (bp->bio_cmd == BIO_READ) { brr = (struct aac_blockread_response *)&cm->cm_fib->data[0]; status = brr->Status; } else { bwr = (struct aac_blockwrite_response *)&cm->cm_fib->data[0]; status = bwr->Status; } aac_release_command(cm); /* fix up the bio based on status */ if (status == ST_OK) { bp->bio_resid = 0; } else { bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; } aac_biodone(bp); } /* * Submit a command to the controller, return when it completes. * XXX This is very dangerous! If the card has gone out to lunch, we could * be stuck here forever. At the same time, signals are not caught * because there is a risk that a signal could wakeup the sleep before * the card has a chance to complete the command. Since there is no way * to cancel a command that is in progress, we can't protect against the * card completing a command late and spamming the command and data * memory. So, we are held hostage until the command completes. */ static int aac_wait_command(struct aac_command *cm) { struct aac_softc *sc; int error; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* Put the command on the ready queue and get things going */ aac_enqueue_ready(cm); aac_startio(sc); error = msleep(cm, &sc->aac_io_lock, PRIBIO, "aacwait", 0); return(error); } /* *Command Buffer Management */ /* * Allocate a command. */ int aac_alloc_command(struct aac_softc *sc, struct aac_command **cmp) { struct aac_command *cm; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((cm = aac_dequeue_free(sc)) == NULL) { if (sc->total_fibs < sc->aac_max_fibs) { mtx_lock(&sc->aac_io_lock); sc->aifflags |= AAC_AIFFLAGS_ALLOCFIBS; mtx_unlock(&sc->aac_io_lock); wakeup(sc->aifthread); } return (EBUSY); } *cmp = cm; return(0); } /* * Release a command back to the freelist. */ void aac_release_command(struct aac_command *cm) { struct aac_event *event; struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* (re)initialize the command/FIB */ cm->cm_datalen = 0; cm->cm_sgtable = NULL; cm->cm_flags = 0; cm->cm_complete = NULL; cm->cm_private = NULL; cm->cm_queue = AAC_ADAP_NORM_CMD_QUEUE; cm->cm_fib->Header.XferState = AAC_FIBSTATE_EMPTY; cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB; cm->cm_fib->Header.Flags = 0; cm->cm_fib->Header.SenderSize = cm->cm_sc->aac_max_fib_size; /* * These are duplicated in aac_start to cover the case where an * intermediate stage may have destroyed them. They're left * initialized here for debugging purposes only. */ cm->cm_fib->Header.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; cm->cm_fib->Header.SenderData = 0; aac_enqueue_free(cm); if ((event = TAILQ_FIRST(&sc->aac_ev_cmfree)) != NULL) { TAILQ_REMOVE(&sc->aac_ev_cmfree, event, ev_links); event->ev_callback(sc, event, event->ev_arg); } } /* * Map helper for command/FIB allocation. */ static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error) { uint64_t *fibphys; fibphys = (uint64_t *)arg; *fibphys = segs[0].ds_addr; } /* * Allocate and initialize commands/FIBs for this adapter. */ static int aac_alloc_commands(struct aac_softc *sc) { struct aac_command *cm; struct aac_fibmap *fm; uint64_t fibphys; int i, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (sc->total_fibs + sc->aac_max_fibs_alloc > sc->aac_max_fibs) return (ENOMEM); fm = malloc(sizeof(struct aac_fibmap), M_AACBUF, M_NOWAIT|M_ZERO); if (fm == NULL) return (ENOMEM); /* allocate the FIBs in DMAable memory and load them */ if (bus_dmamem_alloc(sc->aac_fib_dmat, (void **)&fm->aac_fibs, BUS_DMA_NOWAIT, &fm->aac_fibmap)) { device_printf(sc->aac_dev, "Not enough contiguous memory available.\n"); free(fm, M_AACBUF); return (ENOMEM); } /* Ignore errors since this doesn't bounce */ (void)bus_dmamap_load(sc->aac_fib_dmat, fm->aac_fibmap, fm->aac_fibs, sc->aac_max_fibs_alloc * sc->aac_max_fib_size, aac_map_command_helper, &fibphys, 0); /* initialize constant fields in the command structure */ bzero(fm->aac_fibs, sc->aac_max_fibs_alloc * sc->aac_max_fib_size); for (i = 0; i < sc->aac_max_fibs_alloc; i++) { cm = sc->aac_commands + sc->total_fibs; fm->aac_commands = cm; cm->cm_sc = sc; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)fm->aac_fibs + i*sc->aac_max_fib_size); cm->cm_fibphys = fibphys + i*sc->aac_max_fib_size; cm->cm_index = sc->total_fibs; if ((error = bus_dmamap_create(sc->aac_buffer_dmat, 0, &cm->cm_datamap)) != 0) break; mtx_lock(&sc->aac_io_lock); aac_release_command(cm); sc->total_fibs++; mtx_unlock(&sc->aac_io_lock); } if (i > 0) { mtx_lock(&sc->aac_io_lock); TAILQ_INSERT_TAIL(&sc->aac_fibmap_tqh, fm, fm_link); fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "total_fibs= %d\n", sc->total_fibs); mtx_unlock(&sc->aac_io_lock); return (0); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACBUF); return (ENOMEM); } /* * Free FIBs owned by this adapter. */ static void aac_free_commands(struct aac_softc *sc) { struct aac_fibmap *fm; struct aac_command *cm; int i; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); while ((fm = TAILQ_FIRST(&sc->aac_fibmap_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_fibmap_tqh, fm, fm_link); /* * We check against total_fibs to handle partially * allocated blocks. */ for (i = 0; i < sc->aac_max_fibs_alloc && sc->total_fibs--; i++) { cm = fm->aac_commands + i; bus_dmamap_destroy(sc->aac_buffer_dmat, cm->cm_datamap); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACBUF); } } /* * Command-mapping helper function - populate this command's s/g table. */ static void aac_map_command_sg(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; int i; cm = (struct aac_command *)arg; sc = cm->cm_sc; fib = cm->cm_fib; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* copy into the FIB */ if (cm->cm_sgtable != NULL) { if (fib->Header.Command == RawIo) { struct aac_sg_tableraw *sg; sg = (struct aac_sg_tableraw *)cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntryRaw[i].SgAddress = segs[i].ds_addr; sg->SgEntryRaw[i].SgByteCount = segs[i].ds_len; sg->SgEntryRaw[i].Next = 0; sg->SgEntryRaw[i].Prev = 0; sg->SgEntryRaw[i].Flags = 0; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entryraw); } else if ((cm->cm_sc->flags & AAC_FLAGS_SG_64BIT) == 0) { struct aac_sg_table *sg; sg = cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntry[i].SgAddress = segs[i].ds_addr; sg->SgEntry[i].SgByteCount = segs[i].ds_len; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry); } else { struct aac_sg_table64 *sg; sg = (struct aac_sg_table64 *)cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntry64[i].SgAddress = segs[i].ds_addr; sg->SgEntry64[i].SgByteCount = segs[i].ds_len; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry64); } } /* Fix up the address values in the FIB. Use the command array index * instead of a pointer since these fields are only 32 bits. Shift * the SenderFibAddress over to make room for the fast response bit * and for the AIF bit */ cm->cm_fib->Header.SenderFibAddress = (cm->cm_index << 2); cm->cm_fib->Header.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; /* save a pointer to the command for speedy reverse-lookup */ cm->cm_fib->Header.SenderData = cm->cm_index; if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREWRITE); cm->cm_flags |= AAC_CMD_MAPPED; if (sc->flags & AAC_FLAGS_NEW_COMM) { int count = 10000000L; while (AAC_SEND_COMMAND(sc, cm) != 0) { if (--count == 0) { aac_unmap_command(cm); sc->flags |= AAC_QUEUE_FRZN; aac_requeue_ready(cm); } DELAY(5); /* wait 5 usec. */ } } else { /* Put the FIB on the outbound queue */ if (aac_enqueue_fib(sc, cm->cm_queue, cm) == EBUSY) { aac_unmap_command(cm); sc->flags |= AAC_QUEUE_FRZN; aac_requeue_ready(cm); } } } /* * Unmap a command from controller-visible space. */ static void aac_unmap_command(struct aac_command *cm) { struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (!(cm->cm_flags & AAC_CMD_MAPPED)) return; if (cm->cm_datalen != 0) { if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->aac_buffer_dmat, cm->cm_datamap); } cm->cm_flags &= ~AAC_CMD_MAPPED; } /* * Hardware Interface */ /* * Initialize the adapter. */ static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_common_busaddr = segs[0].ds_addr; } static int aac_check_firmware(struct aac_softc *sc) { u_int32_t code, major, minor, options = 0, atu_size = 0; int rid, status; time_t then; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Wait for the adapter to come ready. */ then = time_uptime; do { code = AAC_GET_FWSTATUS(sc); if (code & AAC_SELF_TEST_FAILED) { device_printf(sc->aac_dev, "FATAL: selftest failed\n"); return(ENXIO); } if (code & AAC_KERNEL_PANIC) { device_printf(sc->aac_dev, "FATAL: controller kernel panic"); return(ENXIO); } if (time_uptime > (then + AAC_BOOT_TIMEOUT)) { device_printf(sc->aac_dev, "FATAL: controller not coming ready, " "status %x\n", code); return(ENXIO); } } while (!(code & AAC_UP_AND_RUNNING)); /* * Retrieve the firmware version numbers. Dell PERC2/QC cards with * firmware version 1.x are not compatible with this driver. */ if (sc->flags & AAC_FLAGS_PERC2QC) { if (aac_sync_command(sc, AAC_MONKER_GETKERNVER, 0, 0, 0, 0, NULL)) { device_printf(sc->aac_dev, "Error reading firmware version\n"); return (EIO); } /* These numbers are stored as ASCII! */ major = (AAC_GET_MAILBOX(sc, 1) & 0xff) - 0x30; minor = (AAC_GET_MAILBOX(sc, 2) & 0xff) - 0x30; if (major == 1) { device_printf(sc->aac_dev, "Firmware version %d.%d is not supported.\n", major, minor); return (EINVAL); } } /* * Retrieve the capabilities/supported options word so we know what * work-arounds to enable. Some firmware revs don't support this * command. */ if (aac_sync_command(sc, AAC_MONKER_GETINFO, 0, 0, 0, 0, &status)) { if (status != AAC_SRB_STS_INVALID_REQUEST) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); return (EIO); } } else { options = AAC_GET_MAILBOX(sc, 1); atu_size = AAC_GET_MAILBOX(sc, 2); sc->supported_options = options; if ((options & AAC_SUPPORTED_4GB_WINDOW) != 0 && (sc->flags & AAC_FLAGS_NO4GB) == 0) sc->flags |= AAC_FLAGS_4GB_WINDOW; if (options & AAC_SUPPORTED_NONDASD) sc->flags |= AAC_FLAGS_ENABLE_CAM; if ((options & AAC_SUPPORTED_SGMAP_HOST64) != 0 && (sizeof(bus_addr_t) > 4)) { device_printf(sc->aac_dev, "Enabling 64-bit address support\n"); sc->flags |= AAC_FLAGS_SG_64BIT; } if ((options & AAC_SUPPORTED_NEW_COMM) && sc->aac_if->aif_send_command) sc->flags |= AAC_FLAGS_NEW_COMM; if (options & AAC_SUPPORTED_64BIT_ARRAYSIZE) sc->flags |= AAC_FLAGS_ARRAY_64BIT; } /* Check for broken hardware that does a lower number of commands */ sc->aac_max_fibs = (sc->flags & AAC_FLAGS_256FIBS ? 256:512); /* Remap mem. resource, if required */ if ((sc->flags & AAC_FLAGS_NEW_COMM) && atu_size > rman_get_size(sc->aac_regs_res1)) { rid = rman_get_rid(sc->aac_regs_res1); bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, rid, sc->aac_regs_res1); sc->aac_regs_res1 = bus_alloc_resource_anywhere(sc->aac_dev, SYS_RES_MEMORY, &rid, atu_size, RF_ACTIVE); if (sc->aac_regs_res1 == NULL) { sc->aac_regs_res1 = bus_alloc_resource_any( sc->aac_dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->aac_regs_res1 == NULL) { device_printf(sc->aac_dev, "couldn't allocate register window\n"); return (ENXIO); } sc->flags &= ~AAC_FLAGS_NEW_COMM; } sc->aac_btag1 = rman_get_bustag(sc->aac_regs_res1); sc->aac_bhandle1 = rman_get_bushandle(sc->aac_regs_res1); if (sc->aac_hwif == AAC_HWIF_NARK) { sc->aac_regs_res0 = sc->aac_regs_res1; sc->aac_btag0 = sc->aac_btag1; sc->aac_bhandle0 = sc->aac_bhandle1; } } /* Read preferred settings */ sc->aac_max_fib_size = sizeof(struct aac_fib); sc->aac_max_sectors = 128; /* 64KB */ if (sc->flags & AAC_FLAGS_SG_64BIT) sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite64)) / sizeof(struct aac_sg_entry64); else sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite)) / sizeof(struct aac_sg_entry); if (!aac_sync_command(sc, AAC_MONKER_GETCOMMPREF, 0, 0, 0, 0, NULL)) { options = AAC_GET_MAILBOX(sc, 1); sc->aac_max_fib_size = (options & 0xFFFF); sc->aac_max_sectors = (options >> 16) << 1; options = AAC_GET_MAILBOX(sc, 2); sc->aac_sg_tablesize = (options >> 16); options = AAC_GET_MAILBOX(sc, 3); sc->aac_max_fibs = (options & 0xFFFF); } if (sc->aac_max_fib_size > PAGE_SIZE) sc->aac_max_fib_size = PAGE_SIZE; sc->aac_max_fibs_alloc = PAGE_SIZE / sc->aac_max_fib_size; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { sc->flags |= AAC_FLAGS_RAW_IO; device_printf(sc->aac_dev, "Enable Raw I/O\n"); } if ((sc->flags & AAC_FLAGS_RAW_IO) && (sc->flags & AAC_FLAGS_ARRAY_64BIT)) { sc->flags |= AAC_FLAGS_LBA_64BIT; device_printf(sc->aac_dev, "Enable 64-bit array\n"); } return (0); } static int aac_init(struct aac_softc *sc) { struct aac_adapter_init *ip; u_int32_t qoffset; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Fill in the init structure. This tells the adapter about the * physical location of various important shared data structures. */ ip = &sc->aac_common->ac_init; ip->InitStructRevision = AAC_INIT_STRUCT_REVISION; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_4; sc->flags |= AAC_FLAGS_RAW_IO; } ip->MiniPortRevision = AAC_INIT_STRUCT_MINIPORT_REVISION; ip->AdapterFibsPhysicalAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_fibs); ip->AdapterFibsVirtualAddress = 0; ip->AdapterFibsSize = AAC_ADAPTER_FIBS * sizeof(struct aac_fib); ip->AdapterFibAlign = sizeof(struct aac_fib); ip->PrintfBufferAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_printf); ip->PrintfBufferSize = AAC_PRINTF_BUFSIZE; /* * The adapter assumes that pages are 4K in size, except on some * broken firmware versions that do the page->byte conversion twice, * therefore 'assuming' that this value is in 16MB units (2^24). * Round up since the granularity is so high. */ ip->HostPhysMemPages = ctob(physmem) / AAC_PAGE_SIZE; if (sc->flags & AAC_FLAGS_BROKEN_MEMMAP) { ip->HostPhysMemPages = (ip->HostPhysMemPages + AAC_PAGE_SIZE) / AAC_PAGE_SIZE; } ip->HostElapsedSeconds = time_uptime; /* reset later if invalid */ ip->InitFlags = 0; if (sc->flags & AAC_FLAGS_NEW_COMM) { ip->InitFlags |= AAC_INITFLAGS_NEW_COMM_SUPPORTED; device_printf(sc->aac_dev, "New comm. interface enabled\n"); } ip->MaxIoCommands = sc->aac_max_fibs; ip->MaxIoSize = sc->aac_max_sectors << 9; ip->MaxFibSize = sc->aac_max_fib_size; /* * Initialize FIB queues. Note that it appears that the layout of the * indexes and the segmentation of the entries may be mandated by the * adapter, which is only told about the base of the queue index fields. * * The initial values of the indices are assumed to inform the adapter * of the sizes of the respective queues, and theoretically it could * work out the entire layout of the queue structures from this. We * take the easy route and just lay this area out like everyone else * does. * * The Linux driver uses a much more complex scheme whereby several * header records are kept for each queue. We use a couple of generic * list manipulation functions which 'know' the size of each list by * virtue of a table. */ qoffset = offsetof(struct aac_common, ac_qbuf) + AAC_QUEUE_ALIGN; qoffset &= ~(AAC_QUEUE_ALIGN - 1); sc->aac_queues = (struct aac_queue_table *)((uintptr_t)sc->aac_common + qoffset); ip->CommHeaderAddress = sc->aac_common_busaddr + qoffset; sc->aac_queues->qt_qindex[AAC_HOST_NORM_CMD_QUEUE][AAC_PRODUCER_INDEX] = AAC_HOST_NORM_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_NORM_CMD_QUEUE][AAC_CONSUMER_INDEX] = AAC_HOST_NORM_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_HIGH_CMD_QUEUE][AAC_PRODUCER_INDEX] = AAC_HOST_HIGH_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_HIGH_CMD_QUEUE][AAC_CONSUMER_INDEX] = AAC_HOST_HIGH_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_NORM_CMD_QUEUE][AAC_PRODUCER_INDEX] = AAC_ADAP_NORM_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_NORM_CMD_QUEUE][AAC_CONSUMER_INDEX] = AAC_ADAP_NORM_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_HIGH_CMD_QUEUE][AAC_PRODUCER_INDEX] = AAC_ADAP_HIGH_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_HIGH_CMD_QUEUE][AAC_CONSUMER_INDEX] = AAC_ADAP_HIGH_CMD_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_NORM_RESP_QUEUE][AAC_PRODUCER_INDEX]= AAC_HOST_NORM_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_NORM_RESP_QUEUE][AAC_CONSUMER_INDEX]= AAC_HOST_NORM_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_HIGH_RESP_QUEUE][AAC_PRODUCER_INDEX]= AAC_HOST_HIGH_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_HOST_HIGH_RESP_QUEUE][AAC_CONSUMER_INDEX]= AAC_HOST_HIGH_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_NORM_RESP_QUEUE][AAC_PRODUCER_INDEX]= AAC_ADAP_NORM_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_NORM_RESP_QUEUE][AAC_CONSUMER_INDEX]= AAC_ADAP_NORM_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_HIGH_RESP_QUEUE][AAC_PRODUCER_INDEX]= AAC_ADAP_HIGH_RESP_ENTRIES; sc->aac_queues->qt_qindex[AAC_ADAP_HIGH_RESP_QUEUE][AAC_CONSUMER_INDEX]= AAC_ADAP_HIGH_RESP_ENTRIES; sc->aac_qentries[AAC_HOST_NORM_CMD_QUEUE] = &sc->aac_queues->qt_HostNormCmdQueue[0]; sc->aac_qentries[AAC_HOST_HIGH_CMD_QUEUE] = &sc->aac_queues->qt_HostHighCmdQueue[0]; sc->aac_qentries[AAC_ADAP_NORM_CMD_QUEUE] = &sc->aac_queues->qt_AdapNormCmdQueue[0]; sc->aac_qentries[AAC_ADAP_HIGH_CMD_QUEUE] = &sc->aac_queues->qt_AdapHighCmdQueue[0]; sc->aac_qentries[AAC_HOST_NORM_RESP_QUEUE] = &sc->aac_queues->qt_HostNormRespQueue[0]; sc->aac_qentries[AAC_HOST_HIGH_RESP_QUEUE] = &sc->aac_queues->qt_HostHighRespQueue[0]; sc->aac_qentries[AAC_ADAP_NORM_RESP_QUEUE] = &sc->aac_queues->qt_AdapNormRespQueue[0]; sc->aac_qentries[AAC_ADAP_HIGH_RESP_QUEUE] = &sc->aac_queues->qt_AdapHighRespQueue[0]; /* * Do controller-type-specific initialisation */ switch (sc->aac_hwif) { case AAC_HWIF_I960RX: AAC_MEM0_SETREG4(sc, AAC_RX_ODBR, ~0); break; case AAC_HWIF_RKT: AAC_MEM0_SETREG4(sc, AAC_RKT_ODBR, ~0); break; default: break; } /* * Give the init structure to the controller. */ if (aac_sync_command(sc, AAC_MONKER_INITSTRUCT, sc->aac_common_busaddr + offsetof(struct aac_common, ac_init), 0, 0, 0, NULL)) { device_printf(sc->aac_dev, "error establishing init structure\n"); error = EIO; goto out; } error = 0; out: return(error); } static int aac_setup_intr(struct aac_softc *sc) { if (sc->flags & AAC_FLAGS_NEW_COMM) { if (bus_setup_intr(sc->aac_dev, sc->aac_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, aac_new_intr, sc, &sc->aac_intr)) { device_printf(sc->aac_dev, "can't set up interrupt\n"); return (EINVAL); } } else { if (bus_setup_intr(sc->aac_dev, sc->aac_irq, INTR_TYPE_BIO, aac_filter, NULL, sc, &sc->aac_intr)) { device_printf(sc->aac_dev, "can't set up interrupt filter\n"); return (EINVAL); } } return (0); } /* * Send a synchronous command to the controller and wait for a result. * Indicate if the controller completed the command with an error status. */ static int aac_sync_command(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3, u_int32_t *sp) { time_t then; u_int32_t status; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* populate the mailbox */ AAC_SET_MAILBOX(sc, command, arg0, arg1, arg2, arg3); /* ensure the sync command doorbell flag is cleared */ AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* then set it to signal the adapter */ AAC_QNOTIFY(sc, AAC_DB_SYNC_COMMAND); /* spin waiting for the command to complete */ then = time_uptime; do { if (time_uptime > (then + AAC_IMMEDIATE_TIMEOUT)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "timed out"); return(EIO); } } while (!(AAC_GET_ISTATUS(sc) & AAC_DB_SYNC_COMMAND)); /* clear the completion flag */ AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* get the command status */ status = AAC_GET_MAILBOX(sc, 0); if (sp != NULL) *sp = status; if (status != AAC_SRB_STS_SUCCESS) return (-1); return(0); } int aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, struct aac_fib *fib, u_int16_t datasize) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (datasize > AAC_FIB_DATASIZE) return(EINVAL); /* * Set up the sync FIB */ fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY; fib->Header.XferState |= xferstate; fib->Header.Command = command; fib->Header.StructType = AAC_FIBTYPE_TFIB; fib->Header.Size = sizeof(struct aac_fib_header) + datasize; fib->Header.SenderSize = sizeof(struct aac_fib); fib->Header.SenderFibAddress = 0; /* Not needed */ fib->Header.ReceiverFibAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_sync_fib); /* * Give the FIB to the controller, wait for a response. */ if (aac_sync_command(sc, AAC_MONKER_SYNCFIB, fib->Header.ReceiverFibAddress, 0, 0, 0, NULL)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "IO error"); return(EIO); } return (0); } /* * Adapter-space FIB queue manipulation * * Note that the queue implementation here is a little funky; neither the PI or * CI will ever be zero. This behaviour is a controller feature. */ static const struct { int size; int notify; } aac_qinfo[] = { {AAC_HOST_NORM_CMD_ENTRIES, AAC_DB_COMMAND_NOT_FULL}, {AAC_HOST_HIGH_CMD_ENTRIES, 0}, {AAC_ADAP_NORM_CMD_ENTRIES, AAC_DB_COMMAND_READY}, {AAC_ADAP_HIGH_CMD_ENTRIES, 0}, {AAC_HOST_NORM_RESP_ENTRIES, AAC_DB_RESPONSE_NOT_FULL}, {AAC_HOST_HIGH_RESP_ENTRIES, 0}, {AAC_ADAP_NORM_RESP_ENTRIES, AAC_DB_RESPONSE_READY}, {AAC_ADAP_HIGH_RESP_ENTRIES, 0} }; /* * Atomically insert an entry into the nominated queue, returns 0 on success or * EBUSY if the queue is full. * * Note: it would be more efficient to defer notifying the controller in * the case where we may be inserting several entries in rapid succession, * but implementing this usefully may be difficult (it would involve a * separate queue/notify interface). */ static int aac_enqueue_fib(struct aac_softc *sc, int queue, struct aac_command *cm) { u_int32_t pi, ci; int error; u_int32_t fib_size; u_int32_t fib_addr; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); fib_size = cm->cm_fib->Header.Size; fib_addr = cm->cm_fib->Header.ReceiverFibAddress; /* get the producer/consumer indices */ pi = sc->aac_queues->qt_qindex[queue][AAC_PRODUCER_INDEX]; ci = sc->aac_queues->qt_qindex[queue][AAC_CONSUMER_INDEX]; /* wrap the queue? */ if (pi >= aac_qinfo[queue].size) pi = 0; /* check for queue full */ if ((pi + 1) == ci) { error = EBUSY; goto out; } /* * To avoid a race with its completion interrupt, place this command on * the busy queue prior to advertising it to the controller. */ aac_enqueue_busy(cm); /* populate queue entry */ (sc->aac_qentries[queue] + pi)->aq_fib_size = fib_size; (sc->aac_qentries[queue] + pi)->aq_fib_addr = fib_addr; /* update producer index */ sc->aac_queues->qt_qindex[queue][AAC_PRODUCER_INDEX] = pi + 1; /* notify the adapter if we know how */ if (aac_qinfo[queue].notify != 0) AAC_QNOTIFY(sc, aac_qinfo[queue].notify); error = 0; out: return(error); } /* * Atomically remove one entry from the nominated queue, returns 0 on * success or ENOENT if the queue is empty. */ static int aac_dequeue_fib(struct aac_softc *sc, int queue, u_int32_t *fib_size, struct aac_fib **fib_addr) { u_int32_t pi, ci; u_int32_t fib_index; int error; int notify; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* get the producer/consumer indices */ pi = sc->aac_queues->qt_qindex[queue][AAC_PRODUCER_INDEX]; ci = sc->aac_queues->qt_qindex[queue][AAC_CONSUMER_INDEX]; /* check for queue empty */ if (ci == pi) { error = ENOENT; goto out; } /* wrap the pi so the following test works */ if (pi >= aac_qinfo[queue].size) pi = 0; notify = 0; if (ci == pi + 1) notify++; /* wrap the queue? */ if (ci >= aac_qinfo[queue].size) ci = 0; /* fetch the entry */ *fib_size = (sc->aac_qentries[queue] + ci)->aq_fib_size; switch (queue) { case AAC_HOST_NORM_CMD_QUEUE: case AAC_HOST_HIGH_CMD_QUEUE: /* * The aq_fib_addr is only 32 bits wide so it can't be counted * on to hold an address. For AIF's, the adapter assumes * that it's giving us an address into the array of AIF fibs. * Therefore, we have to convert it to an index. */ fib_index = (sc->aac_qentries[queue] + ci)->aq_fib_addr / sizeof(struct aac_fib); *fib_addr = &sc->aac_common->ac_fibs[fib_index]; break; case AAC_HOST_NORM_RESP_QUEUE: case AAC_HOST_HIGH_RESP_QUEUE: { struct aac_command *cm; /* * As above, an index is used instead of an actual address. * Gotta shift the index to account for the fast response * bit. No other correction is needed since this value was * originally provided by the driver via the SenderFibAddress * field. */ fib_index = (sc->aac_qentries[queue] + ci)->aq_fib_addr; cm = sc->aac_commands + (fib_index >> 2); *fib_addr = cm->cm_fib; /* * Is this a fast response? If it is, update the fib fields in * local memory since the whole fib isn't DMA'd back up. */ if (fib_index & 0x01) { (*fib_addr)->Header.XferState |= AAC_FIBSTATE_DONEADAP; *((u_int32_t*)((*fib_addr)->data)) = AAC_ERROR_NORMAL; } break; } default: panic("Invalid queue in aac_dequeue_fib()"); break; } /* update consumer index */ sc->aac_queues->qt_qindex[queue][AAC_CONSUMER_INDEX] = ci + 1; /* if we have made the queue un-full, notify the adapter */ if (notify && (aac_qinfo[queue].notify != 0)) AAC_QNOTIFY(sc, aac_qinfo[queue].notify); error = 0; out: return(error); } /* * Put our response to an Adapter Initialed Fib on the response queue */ static int aac_enqueue_response(struct aac_softc *sc, int queue, struct aac_fib *fib) { u_int32_t pi, ci; int error; u_int32_t fib_size; u_int32_t fib_addr; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* Tell the adapter where the FIB is */ fib_size = fib->Header.Size; fib_addr = fib->Header.SenderFibAddress; fib->Header.ReceiverFibAddress = fib_addr; /* get the producer/consumer indices */ pi = sc->aac_queues->qt_qindex[queue][AAC_PRODUCER_INDEX]; ci = sc->aac_queues->qt_qindex[queue][AAC_CONSUMER_INDEX]; /* wrap the queue? */ if (pi >= aac_qinfo[queue].size) pi = 0; /* check for queue full */ if ((pi + 1) == ci) { error = EBUSY; goto out; } /* populate queue entry */ (sc->aac_qentries[queue] + pi)->aq_fib_size = fib_size; (sc->aac_qentries[queue] + pi)->aq_fib_addr = fib_addr; /* update producer index */ sc->aac_queues->qt_qindex[queue][AAC_PRODUCER_INDEX] = pi + 1; /* notify the adapter if we know how */ if (aac_qinfo[queue].notify != 0) AAC_QNOTIFY(sc, aac_qinfo[queue].notify); error = 0; out: return(error); } /* * Check for commands that have been outstanding for a suspiciously long time, * and complain about them. */ static void aac_timeout(struct aac_softc *sc) { struct aac_command *cm; time_t deadline; int timedout, code; /* * Traverse the busy command list, bitch about late commands once * only. */ timedout = 0; deadline = time_uptime - AAC_CMD_TIMEOUT; TAILQ_FOREACH(cm, &sc->aac_busy, cm_link) { if ((cm->cm_timestamp < deadline) && !(cm->cm_flags & AAC_CMD_TIMEDOUT)) { cm->cm_flags |= AAC_CMD_TIMEDOUT; device_printf(sc->aac_dev, "COMMAND %p (TYPE %d) TIMEOUT AFTER %d SECONDS\n", cm, cm->cm_fib->Header.Command, (int)(time_uptime-cm->cm_timestamp)); AAC_PRINT_FIB(sc, cm->cm_fib); timedout++; } } if (timedout) { code = AAC_GET_FWSTATUS(sc); if (code != AAC_UP_AND_RUNNING) { device_printf(sc->aac_dev, "WARNING! Controller is no " "longer running! code= 0x%x\n", code); } } } /* * Interface Function Vectors */ /* * Read the current firmware status word. */ static int aac_sa_get_fwstatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SA_FWSTATUS)); } static int aac_rx_get_fwstatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, sc->flags & AAC_FLAGS_NEW_COMM ? AAC_RX_OMR0 : AAC_RX_FWSTATUS)); } static int aac_rkt_get_fwstatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, sc->flags & AAC_FLAGS_NEW_COMM ? AAC_RKT_OMR0 : AAC_RKT_FWSTATUS)); } /* * Notify the controller of a change in a given queue */ static void aac_sa_qnotify(struct aac_softc *sc, int qbit) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG2(sc, AAC_SA_DOORBELL1_SET, qbit); } static void aac_rx_qnotify(struct aac_softc *sc, int qbit) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RX_IDBR, qbit); } static void aac_rkt_qnotify(struct aac_softc *sc, int qbit) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RKT_IDBR, qbit); } /* * Get the interrupt reason bits */ static int aac_sa_get_istatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG2(sc, AAC_SA_DOORBELL0)); } static int aac_rx_get_istatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_RX_ODBR)); } static int aac_rkt_get_istatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_RKT_ODBR)); } /* * Clear some interrupt reason bits */ static void aac_sa_clear_istatus(struct aac_softc *sc, int mask) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG2(sc, AAC_SA_DOORBELL0_CLEAR, mask); } static void aac_rx_clear_istatus(struct aac_softc *sc, int mask) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RX_ODBR, mask); } static void aac_rkt_clear_istatus(struct aac_softc *sc, int mask) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RKT_ODBR, mask); } /* * Populate the mailbox and set the command word */ static void aac_sa_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM1_SETREG4(sc, AAC_SA_MAILBOX, command); AAC_MEM1_SETREG4(sc, AAC_SA_MAILBOX + 4, arg0); AAC_MEM1_SETREG4(sc, AAC_SA_MAILBOX + 8, arg1); AAC_MEM1_SETREG4(sc, AAC_SA_MAILBOX + 12, arg2); AAC_MEM1_SETREG4(sc, AAC_SA_MAILBOX + 16, arg3); } static void aac_rx_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM1_SETREG4(sc, AAC_RX_MAILBOX, command); AAC_MEM1_SETREG4(sc, AAC_RX_MAILBOX + 4, arg0); AAC_MEM1_SETREG4(sc, AAC_RX_MAILBOX + 8, arg1); AAC_MEM1_SETREG4(sc, AAC_RX_MAILBOX + 12, arg2); AAC_MEM1_SETREG4(sc, AAC_RX_MAILBOX + 16, arg3); } static void aac_rkt_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM1_SETREG4(sc, AAC_RKT_MAILBOX, command); AAC_MEM1_SETREG4(sc, AAC_RKT_MAILBOX + 4, arg0); AAC_MEM1_SETREG4(sc, AAC_RKT_MAILBOX + 8, arg1); AAC_MEM1_SETREG4(sc, AAC_RKT_MAILBOX + 12, arg2); AAC_MEM1_SETREG4(sc, AAC_RKT_MAILBOX + 16, arg3); } /* * Fetch the immediate command status word */ static int aac_sa_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM1_GETREG4(sc, AAC_SA_MAILBOX + (mb * 4))); } static int aac_rx_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM1_GETREG4(sc, AAC_RX_MAILBOX + (mb * 4))); } static int aac_rkt_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM1_GETREG4(sc, AAC_RKT_MAILBOX + (mb * 4))); } /* * Set/clear interrupt masks */ static void aac_sa_set_interrupts(struct aac_softc *sc, int enable) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "%sable interrupts", enable ? "en" : "dis"); if (enable) { AAC_MEM0_SETREG2((sc), AAC_SA_MASK0_CLEAR, AAC_DB_INTERRUPTS); } else { AAC_MEM0_SETREG2((sc), AAC_SA_MASK0_SET, ~0); } } static void aac_rx_set_interrupts(struct aac_softc *sc, int enable) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "%sable interrupts", enable ? "en" : "dis"); if (enable) { if (sc->flags & AAC_FLAGS_NEW_COMM) AAC_MEM0_SETREG4(sc, AAC_RX_OIMR, ~AAC_DB_INT_NEW_COMM); else AAC_MEM0_SETREG4(sc, AAC_RX_OIMR, ~AAC_DB_INTERRUPTS); } else { AAC_MEM0_SETREG4(sc, AAC_RX_OIMR, ~0); } } static void aac_rkt_set_interrupts(struct aac_softc *sc, int enable) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "%sable interrupts", enable ? "en" : "dis"); if (enable) { if (sc->flags & AAC_FLAGS_NEW_COMM) AAC_MEM0_SETREG4(sc, AAC_RKT_OIMR, ~AAC_DB_INT_NEW_COMM); else AAC_MEM0_SETREG4(sc, AAC_RKT_OIMR, ~AAC_DB_INTERRUPTS); } else { AAC_MEM0_SETREG4(sc, AAC_RKT_OIMR, ~0); } } /* * New comm. interface: Send command functions */ static int aac_rx_send_command(struct aac_softc *sc, struct aac_command *cm) { u_int32_t index, device; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "send command (new comm.)"); index = AAC_MEM0_GETREG4(sc, AAC_RX_IQUE); if (index == 0xffffffffL) index = AAC_MEM0_GETREG4(sc, AAC_RX_IQUE); if (index == 0xffffffffL) return index; aac_enqueue_busy(cm); device = index; AAC_MEM1_SETREG4(sc, device, (u_int32_t)(cm->cm_fibphys & 0xffffffffUL)); device += 4; AAC_MEM1_SETREG4(sc, device, (u_int32_t)(cm->cm_fibphys >> 32)); device += 4; AAC_MEM1_SETREG4(sc, device, cm->cm_fib->Header.Size); AAC_MEM0_SETREG4(sc, AAC_RX_IQUE, index); return 0; } static int aac_rkt_send_command(struct aac_softc *sc, struct aac_command *cm) { u_int32_t index, device; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "send command (new comm.)"); index = AAC_MEM0_GETREG4(sc, AAC_RKT_IQUE); if (index == 0xffffffffL) index = AAC_MEM0_GETREG4(sc, AAC_RKT_IQUE); if (index == 0xffffffffL) return index; aac_enqueue_busy(cm); device = index; AAC_MEM1_SETREG4(sc, device, (u_int32_t)(cm->cm_fibphys & 0xffffffffUL)); device += 4; AAC_MEM1_SETREG4(sc, device, (u_int32_t)(cm->cm_fibphys >> 32)); device += 4; AAC_MEM1_SETREG4(sc, device, cm->cm_fib->Header.Size); AAC_MEM0_SETREG4(sc, AAC_RKT_IQUE, index); return 0; } /* * New comm. interface: get, set outbound queue index */ static int aac_rx_get_outb_queue(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_RX_OQUE)); } static int aac_rkt_get_outb_queue(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_RKT_OQUE)); } static void aac_rx_set_outb_queue(struct aac_softc *sc, int index) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RX_OQUE, index); } static void aac_rkt_set_outb_queue(struct aac_softc *sc, int index) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_RKT_OQUE, index); } /* * Debugging and Diagnostics */ /* * Print some information about the controller. */ static void aac_describe_controller(struct aac_softc *sc) { struct aac_fib *fib; struct aac_adapter_info *info; char *adapter_type = "Adaptec RAID controller"; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); fib->data[0] = 0; if (aac_sync_fib(sc, RequestAdapterInfo, 0, fib, 1)) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } /* save the kernel revision structure for later use */ info = (struct aac_adapter_info *)&fib->data[0]; sc->aac_revision = info->KernelRevision; if (bootverbose) { device_printf(sc->aac_dev, "%s %dMHz, %dMB memory " "(%dMB cache, %dMB execution), %s\n", aac_describe_code(aac_cpu_variant, info->CpuVariant), info->ClockSpeed, info->TotalMem / (1024 * 1024), info->BufferMem / (1024 * 1024), info->ExecutionMem / (1024 * 1024), aac_describe_code(aac_battery_platform, info->batteryPlatform)); device_printf(sc->aac_dev, "Kernel %d.%d-%d, Build %d, S/N %6X\n", info->KernelRevision.external.comp.major, info->KernelRevision.external.comp.minor, info->KernelRevision.external.comp.dash, info->KernelRevision.buildNumber, (u_int32_t)(info->SerialNumber & 0xffffff)); device_printf(sc->aac_dev, "Supported Options=%b\n", sc->supported_options, "\20" "\1SNAPSHOT" "\2CLUSTERS" "\3WCACHE" "\4DATA64" "\5HOSTTIME" "\6RAID50" "\7WINDOW4GB" "\10SCSIUPGD" "\11SOFTERR" "\12NORECOND" "\13SGMAP64" "\14ALARM" "\15NONDASD" "\16SCSIMGT" "\17RAIDSCSI" "\21ADPTINFO" "\22NEWCOMM" "\23ARRAY64BIT" "\24HEATSENSOR"); } if (sc->supported_options & AAC_SUPPORTED_SUPPLEMENT_ADAPTER_INFO) { fib->data[0] = 0; if (aac_sync_fib(sc, RequestSupplementAdapterInfo, 0, fib, 1)) device_printf(sc->aac_dev, "RequestSupplementAdapterInfo failed\n"); else adapter_type = ((struct aac_supplement_adapter_info *) &fib->data[0])->AdapterTypeText; } device_printf(sc->aac_dev, "%s, aac driver %d.%d.%d-%d\n", adapter_type, AAC_DRIVER_MAJOR_VERSION, AAC_DRIVER_MINOR_VERSION, AAC_DRIVER_BUGFIX_LEVEL, AAC_DRIVER_BUILD); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); } /* * Look up a text description of a numeric error code and return a pointer to * same. */ static const char * aac_describe_code(const struct aac_code_lookup *table, u_int32_t code) { int i; for (i = 0; table[i].string != NULL; i++) if (table[i].code == code) return(table[i].string); return(table[i + 1].string); } /* * Management Interface */ static int aac_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct aac_softc *sc; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); device_busy(sc->aac_dev); devfs_set_cdevpriv(sc, aac_cdevpriv_dtor); return 0; } static int aac_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { union aac_statrequest *as; struct aac_softc *sc; int error = 0; as = (union aac_statrequest *)arg; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); switch (cmd) { case AACIO_STATS: switch (as->as_item) { case AACQ_FREE: case AACQ_BIO: case AACQ_READY: case AACQ_BUSY: bcopy(&sc->aac_qstat[as->as_item], &as->as_qstat, sizeof(struct aac_qstat)); break; default: error = ENOENT; break; } break; case FSACTL_SENDFIB: case FSACTL_SEND_LARGE_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_SENDFIB: case FSACTL_LNX_SEND_LARGE_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SENDFIB"); error = aac_ioctl_sendfib(sc, arg); break; case FSACTL_SEND_RAW_SRB: arg = *(caddr_t*)arg; case FSACTL_LNX_SEND_RAW_SRB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SEND_RAW_SRB"); error = aac_ioctl_send_raw_srb(sc, arg); break; case FSACTL_AIF_THREAD: case FSACTL_LNX_AIF_THREAD: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_AIF_THREAD"); error = EINVAL; break; case FSACTL_OPEN_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_OPEN_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_OPEN_GET_ADAPTER_FIB"); error = aac_open_aif(sc, arg); break; case FSACTL_GET_NEXT_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_NEXT_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_NEXT_ADAPTER_FIB"); error = aac_getnext_aif(sc, arg); break; case FSACTL_CLOSE_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_CLOSE_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_CLOSE_GET_ADAPTER_FIB"); error = aac_close_aif(sc, arg); break; case FSACTL_MINIPORT_REV_CHECK: arg = *(caddr_t*)arg; case FSACTL_LNX_MINIPORT_REV_CHECK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_MINIPORT_REV_CHECK"); error = aac_rev_check(sc, arg); break; case FSACTL_QUERY_DISK: arg = *(caddr_t*)arg; case FSACTL_LNX_QUERY_DISK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_QUERY_DISK"); error = aac_query_disk(sc, arg); break; case FSACTL_DELETE_DISK: case FSACTL_LNX_DELETE_DISK: /* * We don't trust the underland to tell us when to delete a * container, rather we rely on an AIF coming from the * controller */ error = 0; break; case FSACTL_GET_PCI_INFO: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_PCI_INFO: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_PCI_INFO"); error = aac_get_pci_info(sc, arg); break; case FSACTL_GET_FEATURES: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_FEATURES: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_FEATURES"); error = aac_supported_features(sc, arg); break; default: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "unsupported cmd 0x%lx\n", cmd); error = EINVAL; break; } return(error); } static int aac_poll(struct cdev *dev, int poll_events, struct thread *td) { struct aac_softc *sc; struct aac_fib_context *ctx; int revents; sc = dev->si_drv1; revents = 0; mtx_lock(&sc->aac_aifq_lock); if ((poll_events & (POLLRDNORM | POLLIN)) != 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->ctx_idx != sc->aifq_idx || ctx->ctx_wrap) { revents |= poll_events & (POLLIN | POLLRDNORM); break; } } } mtx_unlock(&sc->aac_aifq_lock); if (revents == 0) { if (poll_events & (POLLIN | POLLRDNORM)) selrecord(td, &sc->rcv_select); } return (revents); } static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg) { switch (event->ev_type) { case AAC_EVENT_CMFREE: mtx_assert(&sc->aac_io_lock, MA_OWNED); if (aac_alloc_command(sc, (struct aac_command **)arg)) { aac_add_event(sc, event); return; } free(event, M_AACBUF); wakeup(arg); break; default: break; } } /* * Send a FIB supplied from userspace */ static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib) { struct aac_command *cm; int size, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; /* * Get a command */ mtx_lock(&sc->aac_io_lock); if (aac_alloc_command(sc, &cm)) { struct aac_event *event; event = malloc(sizeof(struct aac_event), M_AACBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aac_add_event(sc, event); msleep(&cm, &sc->aac_io_lock, 0, "sendfib", 0); } mtx_unlock(&sc->aac_io_lock); /* * Fetch the FIB header, then re-copy to get data as well. */ if ((error = copyin(ufib, cm->cm_fib, sizeof(struct aac_fib_header))) != 0) goto out; size = cm->cm_fib->Header.Size + sizeof(struct aac_fib_header); if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "incoming FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } if ((error = copyin(ufib, cm->cm_fib, size)) != 0) goto out; cm->cm_fib->Header.Size = size; cm->cm_timestamp = time_uptime; /* * Pass the FIB to the controller, wait for it to complete. */ mtx_lock(&sc->aac_io_lock); error = aac_wait_command(cm); mtx_unlock(&sc->aac_io_lock); if (error != 0) { device_printf(sc->aac_dev, "aac_wait_command return %d\n", error); goto out; } /* * Copy the FIB and data back out to the caller. */ size = cm->cm_fib->Header.Size; if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "outbound FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } error = copyout(cm->cm_fib, ufib, size); out: if (cm != NULL) { mtx_lock(&sc->aac_io_lock); aac_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * Send a passthrough FIB supplied from userspace */ static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg) { struct aac_command *cm; struct aac_event *event; struct aac_fib *fib; struct aac_srb *srbcmd, *user_srb; struct aac_sg_entry *sge; struct aac_sg_entry64 *sge64; void *srb_sg_address, *ureply; uint32_t fibsize, srb_sg_bytecount; int error, transfer_data; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; transfer_data = 0; fibsize = 0; user_srb = (struct aac_srb *)arg; mtx_lock(&sc->aac_io_lock); if (aac_alloc_command(sc, &cm)) { event = malloc(sizeof(struct aac_event), M_AACBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aac_add_event(sc, event); msleep(cm, &sc->aac_io_lock, 0, "aacraw", 0); } mtx_unlock(&sc->aac_io_lock); cm->cm_data = NULL; fib = cm->cm_fib; srbcmd = (struct aac_srb *)fib->data; error = copyin(&user_srb->data_len, &fibsize, sizeof(uint32_t)); if (error != 0) goto out; if (fibsize > (sc->aac_max_fib_size - sizeof(struct aac_fib_header))) { error = EINVAL; goto out; } error = copyin(user_srb, srbcmd, fibsize); if (error != 0) goto out; srbcmd->function = 0; srbcmd->retry_limit = 0; if (srbcmd->sg_map.SgCount > 1) { error = EINVAL; goto out; } /* Retrieve correct SG entries. */ if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry))) { struct aac_sg_entry sg; sge = srbcmd->sg_map.SgEntry; sge64 = NULL; if ((error = copyin(sge, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = (void *)(uintptr_t)sg.SgAddress; } #ifdef __amd64__ else if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry64))) { struct aac_sg_entry64 sg; sge = NULL; sge64 = (struct aac_sg_entry64 *)srbcmd->sg_map.SgEntry; if ((error = copyin(sge64, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = (void *)sg.SgAddress; if (sge64->SgAddress > 0xffffffffull && (sc->flags & AAC_FLAGS_SG_64BIT) == 0) { error = EINVAL; goto out; } } #endif else { error = EINVAL; goto out; } ureply = (char *)arg + fibsize; srbcmd->data_len = srb_sg_bytecount; if (srbcmd->sg_map.SgCount == 1) transfer_data = 1; cm->cm_sgtable = (struct aac_sg_table *)&srbcmd->sg_map; if (transfer_data) { cm->cm_datalen = srb_sg_bytecount; cm->cm_data = malloc(cm->cm_datalen, M_AACBUF, M_NOWAIT); if (cm->cm_data == NULL) { error = ENOMEM; goto out; } if (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN) cm->cm_flags |= AAC_CMD_DATAIN; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_OUT) { cm->cm_flags |= AAC_CMD_DATAOUT; error = copyin(srb_sg_address, cm->cm_data, cm->cm_datalen); if (error != 0) goto out; } } fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_srb); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = (sc->flags & AAC_FLAGS_SG_64BIT) != 0 ? ScsiPortCommandU64 : ScsiPortCommand; mtx_lock(&sc->aac_io_lock); aac_wait_command(cm); mtx_unlock(&sc->aac_io_lock); if (transfer_data && (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN) != 0) { error = copyout(cm->cm_data, srb_sg_address, cm->cm_datalen); if (error != 0) goto out; } error = copyout(fib->data, ureply, sizeof(struct aac_srb_response)); out: if (cm != NULL) { if (cm->cm_data != NULL) free(cm->cm_data, M_AACBUF); mtx_lock(&sc->aac_io_lock); aac_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * cdevpriv interface private destructor. */ static void aac_cdevpriv_dtor(void *arg) { struct aac_softc *sc; sc = arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&Giant); device_unbusy(sc->aac_dev); mtx_unlock(&Giant); } /* * Handle an AIF sent to us by the controller; queue it for later reference. * If the queue fills up, then drop the older entries. */ static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib) { struct aac_aif_command *aif; struct aac_container *co, *co_next; struct aac_fib_context *ctx; struct aac_mntinforesp *mir; int next, current, found; int count = 0, added = 0, i = 0; uint32_t channel; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); aif = (struct aac_aif_command*)&fib->data[0]; aac_print_aif(sc, aif); /* Is it an event that we should care about? */ switch (aif->command) { case AifCmdEventNotify: switch (aif->data.EN.type) { case AifEnAddContainer: case AifEnDeleteContainer: /* * A container was added or deleted, but the message * doesn't tell us anything else! Re-enumerate the * containers and sort things out. */ aac_alloc_sync_fib(sc, &fib); do { /* * Ask the controller for its containers one at * a time. * XXX What if the controller's list changes * midway through this enumaration? * XXX This should be done async. */ if ((mir = aac_get_container_info(sc, fib, i)) == NULL) continue; if (i == 0) count = mir->MntRespCount; /* * Check the container against our list. * co->co_found was already set to 0 in a * previous run. */ if ((mir->Status == ST_OK) && (mir->MntTable[0].VolType != CT_NONE)) { found = 0; TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == mir->MntTable[0].ObjectId) { co->co_found = 1; found = 1; break; } } /* * If the container matched, continue * in the list. */ if (found) { i++; continue; } /* * This is a new container. Do all the * appropriate things to set it up. */ aac_add_container(sc, mir, 1); added = 1; } i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); /* * Go through our list of containers and see which ones * were not marked 'found'. Since the controller didn't * list them they must have been deleted. Do the * appropriate steps to destroy the device. Also reset * the co->co_found field. */ co = TAILQ_FIRST(&sc->aac_container_tqh); while (co != NULL) { if (co->co_found == 0) { mtx_unlock(&sc->aac_io_lock); mtx_lock(&Giant); device_delete_child(sc->aac_dev, co->co_disk); mtx_unlock(&Giant); mtx_lock(&sc->aac_io_lock); co_next = TAILQ_NEXT(co, co_link); mtx_lock(&sc->aac_container_lock); TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); mtx_unlock(&sc->aac_container_lock); free(co, M_AACBUF); co = co_next; } else { co->co_found = 0; co = TAILQ_NEXT(co, co_link); } } /* Attach the newly created containers */ if (added) { mtx_unlock(&sc->aac_io_lock); mtx_lock(&Giant); bus_generic_attach(sc->aac_dev); mtx_unlock(&Giant); mtx_lock(&sc->aac_io_lock); } break; case AifEnEnclosureManagement: switch (aif->data.EN.data.EEE.eventType) { case AIF_EM_DRIVE_INSERTION: case AIF_EM_DRIVE_REMOVAL: channel = aif->data.EN.data.EEE.unitID; if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, (channel >> 24) & 0xF, (channel & 0xFFFF)); break; } break; case AifEnAddJBOD: case AifEnDeleteJBOD: channel = aif->data.EN.data.ECE.container; if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, (channel >> 24) & 0xF, AAC_CAM_TARGET_WILDCARD); break; default: break; } default: break; } /* Copy the AIF data to the AIF queue for ioctl retrieval */ mtx_lock(&sc->aac_aifq_lock); current = sc->aifq_idx; next = (current + 1) % AAC_AIFQ_LENGTH; if (next == 0) sc->aifq_filled = 1; bcopy(fib, &sc->aac_aifq[current], sizeof(struct aac_fib)); /* modify AIF contexts */ if (sc->aifq_filled) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (next == ctx->ctx_idx) ctx->ctx_wrap = 1; else if (current == ctx->ctx_idx && ctx->ctx_wrap) ctx->ctx_idx = next; } } sc->aifq_idx = next; /* On the off chance that someone is sleeping for an aif... */ if (sc->aac_state & AAC_STATE_AIF_SLEEPER) wakeup(sc->aac_aifq); /* Wakeup any poll()ers */ selwakeuppri(&sc->rcv_select, PRIBIO); mtx_unlock(&sc->aac_aifq_lock); } /* * Return the Revision of the driver to userspace and check to see if the * userspace app is possibly compatible. This is extremely bogus since * our driver doesn't follow Adaptec's versioning system. Cheat by just * returning what the card reported. */ static int aac_rev_check(struct aac_softc *sc, caddr_t udata) { struct aac_rev_check rev_check; struct aac_rev_check_resp rev_check_resp; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Copyin the revision struct from userspace */ if ((error = copyin(udata, (caddr_t)&rev_check, sizeof(struct aac_rev_check))) != 0) { return error; } fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "Userland revision= %d\n", rev_check.callingRevision.buildNumber); /* * Doctor up the response struct. */ rev_check_resp.possiblyCompatible = 1; rev_check_resp.adapterSWRevision.external.comp.major = AAC_DRIVER_MAJOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.minor = AAC_DRIVER_MINOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.type = AAC_DRIVER_TYPE; rev_check_resp.adapterSWRevision.external.comp.dash = AAC_DRIVER_BUGFIX_LEVEL; rev_check_resp.adapterSWRevision.buildNumber = AAC_DRIVER_BUILD; return(copyout((caddr_t)&rev_check_resp, udata, sizeof(struct aac_rev_check_resp))); } /* * Pass the fib context to the caller */ static int aac_open_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *fibctx, *ctx; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); fibctx = malloc(sizeof(struct aac_fib_context), M_AACBUF, M_NOWAIT|M_ZERO); if (fibctx == NULL) return (ENOMEM); mtx_lock(&sc->aac_aifq_lock); /* all elements are already 0, add to queue */ if (sc->fibctx == NULL) sc->fibctx = fibctx; else { for (ctx = sc->fibctx; ctx->next; ctx = ctx->next) ; ctx->next = fibctx; fibctx->prev = ctx; } /* evaluate unique value */ fibctx->unique = (*(u_int32_t *)&fibctx & 0xffffffff); ctx = sc->fibctx; while (ctx != fibctx) { if (ctx->unique == fibctx->unique) { fibctx->unique++; ctx = sc->fibctx; } else { ctx = ctx->next; } } mtx_unlock(&sc->aac_aifq_lock); error = copyout(&fibctx->unique, (void *)arg, sizeof(u_int32_t)); if (error) aac_close_aif(sc, (caddr_t)ctx); return error; } /* * Close the caller's fib context */ static int aac_close_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *ctx; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_aifq_lock); for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->unique == *(uint32_t *)&arg) { if (ctx == sc->fibctx) sc->fibctx = NULL; else { ctx->prev->next = ctx->next; if (ctx->next) ctx->next->prev = ctx->prev; } break; } } mtx_unlock(&sc->aac_aifq_lock); if (ctx) free(ctx, M_AACBUF); return 0; } /* * Pass the caller the next AIF in their queue */ static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg) { struct get_adapter_fib_ioctl agf; struct aac_fib_context *ctx; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((error = copyin(arg, &agf, sizeof(agf))) == 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (agf.AdapterFibContext == ctx->unique) break; } if (!ctx) return (EFAULT); error = aac_return_aif(sc, ctx, agf.AifFib); if (error == EAGAIN && agf.Wait) { fwprintf(sc, HBA_FLAGS_DBG_AIF_B, "aac_getnext_aif(): waiting for AIF"); sc->aac_state |= AAC_STATE_AIF_SLEEPER; while (error == EAGAIN) { error = tsleep(sc->aac_aifq, PRIBIO | PCATCH, "aacaif", 0); if (error == 0) error = aac_return_aif(sc, ctx, agf.AifFib); } sc->aac_state &= ~AAC_STATE_AIF_SLEEPER; } } return(error); } /* * Hand the next AIF off the top of the queue out to userspace. */ static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr) { int current, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_aifq_lock); current = ctx->ctx_idx; if (current == sc->aifq_idx && !ctx->ctx_wrap) { /* empty */ mtx_unlock(&sc->aac_aifq_lock); return (EAGAIN); } error = copyout(&sc->aac_aifq[current], (void *)uptr, sizeof(struct aac_fib)); if (error) device_printf(sc->aac_dev, "aac_return_aif: copyout returned %d\n", error); else { ctx->ctx_wrap = 0; ctx->ctx_idx = (current + 1) % AAC_AIFQ_LENGTH; } mtx_unlock(&sc->aac_aifq_lock); return(error); } static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr) { struct aac_pci_info { u_int32_t bus; u_int32_t slot; } pciinf; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); pciinf.bus = pci_get_bus(sc->aac_dev); pciinf.slot = pci_get_slot(sc->aac_dev); error = copyout((caddr_t)&pciinf, uptr, sizeof(struct aac_pci_info)); return (error); } static int aac_supported_features(struct aac_softc *sc, caddr_t uptr) { struct aac_features f; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((error = copyin(uptr, &f, sizeof (f))) != 0) return (error); /* * When the management driver receives FSACTL_GET_FEATURES ioctl with * ALL zero in the featuresState, the driver will return the current * state of all the supported features, the data field will not be * valid. * When the management driver receives FSACTL_GET_FEATURES ioctl with * a specific bit set in the featuresState, the driver will return the * current state of this specific feature and whatever data that are * associated with the feature in the data field or perform whatever * action needed indicates in the data field. */ if (f.feat.fValue == 0) { f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; /* TODO: In the future, add other features state here as well */ } else { if (f.feat.fBits.largeLBA) f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; /* TODO: Add other features state and data in the future */ } error = copyout(&f, uptr, sizeof (f)); return (error); } /* * Give the userland some information about the container. The AAC arch * expects the driver to be a SCSI passthrough type driver, so it expects * the containers to have b:t:l numbers. Fake it. */ static int aac_query_disk(struct aac_softc *sc, caddr_t uptr) { struct aac_query_disk query_disk; struct aac_container *co; struct aac_disk *disk; int error, id; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); disk = NULL; error = copyin(uptr, (caddr_t)&query_disk, sizeof(struct aac_query_disk)); if (error) return (error); id = query_disk.ContainerNumber; if (id == -1) return (EINVAL); mtx_lock(&sc->aac_container_lock); TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == id) break; } if (co == NULL) { query_disk.Valid = 0; query_disk.Locked = 0; query_disk.Deleted = 1; /* XXX is this right? */ } else { disk = device_get_softc(co->co_disk); query_disk.Valid = 1; query_disk.Locked = (disk->ad_flags & AAC_DISK_OPEN) ? 1 : 0; query_disk.Deleted = 0; query_disk.Bus = device_get_unit(sc->aac_dev); query_disk.Target = disk->unit; query_disk.Lun = 0; query_disk.UnMapped = 0; sprintf(&query_disk.diskDeviceName[0], "%s%d", disk->ad_disk->d_name, disk->ad_disk->d_unit); } mtx_unlock(&sc->aac_container_lock); error = copyout((caddr_t)&query_disk, uptr, sizeof(struct aac_query_disk)); return (error); } static void aac_get_bus_info(struct aac_softc *sc) { struct aac_fib *fib; struct aac_ctcfg *c_cmd; struct aac_ctcfg_resp *c_resp; struct aac_vmioctl *vmi; struct aac_vmi_businf_resp *vmi_resp; struct aac_getbusinf businfo; struct aac_sim *caminf; device_t child; int i, found, error; mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); c_cmd = (struct aac_ctcfg *)&fib->data[0]; bzero(c_cmd, sizeof(struct aac_ctcfg)); c_cmd->Command = VM_ContainerConfig; c_cmd->cmd = CT_GET_SCSI_METHOD; c_cmd->param = 0; error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_ctcfg)); if (error) { device_printf(sc->aac_dev, "Error %d sending " "VM_ContainerConfig command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } c_resp = (struct aac_ctcfg_resp *)&fib->data[0]; if (c_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_ContainerConfig returned 0x%x\n", c_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } sc->scsi_method_id = c_resp->param; vmi = (struct aac_vmioctl *)&fib->data[0]; bzero(vmi, sizeof(struct aac_vmioctl)); vmi->Command = VM_Ioctl; vmi->ObjType = FT_DRIVE; vmi->MethId = sc->scsi_method_id; vmi->ObjId = 0; vmi->IoctlCmd = GetBusInfo; error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_vmi_businf_resp)); if (error) { device_printf(sc->aac_dev, "Error %d sending VMIoctl command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } vmi_resp = (struct aac_vmi_businf_resp *)&fib->data[0]; if (vmi_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_Ioctl returned %d\n", vmi_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } bcopy(&vmi_resp->BusInf, &businfo, sizeof(struct aac_getbusinf)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); found = 0; for (i = 0; i < businfo.BusCount; i++) { if (businfo.BusValid[i] != AAC_BUS_VALID) continue; caminf = (struct aac_sim *)malloc( sizeof(struct aac_sim), M_AACBUF, M_NOWAIT | M_ZERO); if (caminf == NULL) { device_printf(sc->aac_dev, "No memory to add passthrough bus %d\n", i); break; } child = device_add_child(sc->aac_dev, "aacp", -1); if (child == NULL) { device_printf(sc->aac_dev, "device_add_child failed for passthrough bus %d\n", i); free(caminf, M_AACBUF); break; } caminf->TargetsPerBus = businfo.TargetsPerBus; caminf->BusNumber = i; caminf->InitiatorBusId = businfo.InitiatorBusId[i]; caminf->aac_sc = sc; caminf->sim_dev = child; device_set_ivars(child, caminf); device_set_desc(child, "SCSI Passthrough Bus"); TAILQ_INSERT_TAIL(&sc->aac_sim_tqh, caminf, sim_link); found = 1; } if (found) bus_generic_attach(sc->aac_dev); } Index: head/sys/dev/aacraid/aacraid.c =================================================================== --- head/sys/dev/aacraid/aacraid.c (revision 320527) +++ head/sys/dev/aacraid/aacraid.c (revision 320528) @@ -1,3863 +1,3862 @@ /*- * Copyright (c) 2000 Michael Smith * Copyright (c) 2001 Scott Long * Copyright (c) 2000 BSDi * Copyright (c) 2001-2010 Adaptec, Inc. * Copyright (c) 2010-2012 PMC-Sierra, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Adaptec by PMC Series 6,7,8,... families of RAID controllers */ #define AAC_DRIVERNAME "aacraid" #include "opt_aacraid.h" /* #include */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #ifndef FILTER_HANDLED #define FILTER_HANDLED 0x02 #endif static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f, u_int32_t uid); static void aac_get_bus_info(struct aac_softc *sc); static void aac_container_bus(struct aac_softc *sc); static void aac_daemon(void *arg); static int aac_convert_sgraw2(struct aac_softc *sc, struct aac_raw_io2 *raw, int pages, int nseg, int nseg_new); /* Command Processing */ static void aac_timeout(struct aac_softc *sc); static void aac_command_thread(struct aac_softc *sc); static int aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, struct aac_fib *fib, u_int16_t datasize); /* Command Buffer Management */ static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_alloc_commands(struct aac_softc *sc); static void aac_free_commands(struct aac_softc *sc); static void aac_unmap_command(struct aac_command *cm); /* Hardware Interface */ static int aac_alloc(struct aac_softc *sc); static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error); static int aac_check_firmware(struct aac_softc *sc); static void aac_define_int_mode(struct aac_softc *sc); static int aac_init(struct aac_softc *sc); static int aac_find_pci_capability(struct aac_softc *sc, int cap); static int aac_setup_intr(struct aac_softc *sc); static int aac_check_config(struct aac_softc *sc); /* PMC SRC interface */ static int aac_src_get_fwstatus(struct aac_softc *sc); static void aac_src_qnotify(struct aac_softc *sc, int qbit); static int aac_src_get_istatus(struct aac_softc *sc); static void aac_src_clear_istatus(struct aac_softc *sc, int mask); static void aac_src_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_src_get_mailbox(struct aac_softc *sc, int mb); static void aac_src_access_devreg(struct aac_softc *sc, int mode); static int aac_src_send_command(struct aac_softc *sc, struct aac_command *cm); static int aac_src_get_outb_queue(struct aac_softc *sc); static void aac_src_set_outb_queue(struct aac_softc *sc, int index); struct aac_interface aacraid_src_interface = { aac_src_get_fwstatus, aac_src_qnotify, aac_src_get_istatus, aac_src_clear_istatus, aac_src_set_mailbox, aac_src_get_mailbox, aac_src_access_devreg, aac_src_send_command, aac_src_get_outb_queue, aac_src_set_outb_queue }; /* PMC SRCv interface */ static void aac_srcv_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3); static int aac_srcv_get_mailbox(struct aac_softc *sc, int mb); struct aac_interface aacraid_srcv_interface = { aac_src_get_fwstatus, aac_src_qnotify, aac_src_get_istatus, aac_src_clear_istatus, aac_srcv_set_mailbox, aac_srcv_get_mailbox, aac_src_access_devreg, aac_src_send_command, aac_src_get_outb_queue, aac_src_set_outb_queue }; /* Debugging and Diagnostics */ static struct aac_code_lookup aac_cpu_variant[] = { {"i960JX", CPUI960_JX}, {"i960CX", CPUI960_CX}, {"i960HX", CPUI960_HX}, {"i960RX", CPUI960_RX}, {"i960 80303", CPUI960_80303}, {"StrongARM SA110", CPUARM_SA110}, {"PPC603e", CPUPPC_603e}, {"XScale 80321", CPU_XSCALE_80321}, {"MIPS 4KC", CPU_MIPS_4KC}, {"MIPS 5KC", CPU_MIPS_5KC}, {"Unknown StrongARM", CPUARM_xxx}, {"Unknown PowerPC", CPUPPC_xxx}, {NULL, 0}, {"Unknown processor", 0} }; static struct aac_code_lookup aac_battery_platform[] = { {"required battery present", PLATFORM_BAT_REQ_PRESENT}, {"REQUIRED BATTERY NOT PRESENT", PLATFORM_BAT_REQ_NOTPRESENT}, {"optional battery present", PLATFORM_BAT_OPT_PRESENT}, {"optional battery not installed", PLATFORM_BAT_OPT_NOTPRESENT}, {"no battery support", PLATFORM_BAT_NOT_SUPPORTED}, {NULL, 0}, {"unknown battery platform", 0} }; static void aac_describe_controller(struct aac_softc *sc); static char *aac_describe_code(struct aac_code_lookup *table, u_int32_t code); /* Management Interface */ static d_open_t aac_open; static d_ioctl_t aac_ioctl; static d_poll_t aac_poll; #if __FreeBSD_version >= 702000 static void aac_cdevpriv_dtor(void *arg); #else static d_close_t aac_close; #endif static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib); static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg); static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib); static void aac_request_aif(struct aac_softc *sc); static int aac_rev_check(struct aac_softc *sc, caddr_t udata); static int aac_open_aif(struct aac_softc *sc, caddr_t arg); static int aac_close_aif(struct aac_softc *sc, caddr_t arg); static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg); static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr); static int aac_query_disk(struct aac_softc *sc, caddr_t uptr); static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr); static int aac_supported_features(struct aac_softc *sc, caddr_t uptr); static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg); static int aac_reset_adapter(struct aac_softc *sc); static int aac_get_container_info(struct aac_softc *sc, struct aac_fib *fib, int cid, struct aac_mntinforesp *mir, u_int32_t *uid); static u_int32_t aac_check_adapter_health(struct aac_softc *sc, u_int8_t *bled); static struct cdevsw aacraid_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = aac_open, #if __FreeBSD_version < 702000 .d_close = aac_close, #endif .d_ioctl = aac_ioctl, .d_poll = aac_poll, .d_name = "aacraid", }; MALLOC_DEFINE(M_AACRAIDBUF, "aacraid_buf", "Buffers for the AACRAID driver"); /* sysctl node */ SYSCTL_NODE(_hw, OID_AUTO, aacraid, CTLFLAG_RD, 0, "AACRAID driver parameters"); /* * Device Interface */ /* * Initialize the controller and softc */ int aacraid_attach(struct aac_softc *sc) { int error, unit; struct aac_fib *fib; struct aac_mntinforesp mir; int count = 0, i = 0; u_int32_t uid; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->hint_flags = device_get_flags(sc->aac_dev); /* * Initialize per-controller queues. */ aac_initq_free(sc); aac_initq_ready(sc); aac_initq_busy(sc); /* mark controller as suspended until we get ourselves organised */ sc->aac_state |= AAC_STATE_SUSPEND; /* * Check that the firmware on the card is supported. */ sc->msi_enabled = FALSE; if ((error = aac_check_firmware(sc)) != 0) return(error); /* * Initialize locks */ mtx_init(&sc->aac_io_lock, "AACRAID I/O lock", NULL, MTX_DEF); TAILQ_INIT(&sc->aac_container_tqh); TAILQ_INIT(&sc->aac_ev_cmfree); #if __FreeBSD_version >= 800000 /* Initialize the clock daemon callout. */ callout_init_mtx(&sc->aac_daemontime, &sc->aac_io_lock, 0); #endif /* * Initialize the adapter. */ if ((error = aac_alloc(sc)) != 0) return(error); if (!(sc->flags & AAC_FLAGS_SYNC_MODE)) { aac_define_int_mode(sc); if ((error = aac_init(sc)) != 0) return(error); } /* * Allocate and connect our interrupt. */ if ((error = aac_setup_intr(sc)) != 0) return(error); /* * Print a little information about the controller. */ aac_describe_controller(sc); /* * Make the control device. */ unit = device_get_unit(sc->aac_dev); sc->aac_dev_t = make_dev(&aacraid_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "aacraid%d", unit); sc->aac_dev_t->si_drv1 = sc; /* Create the AIF thread */ if (aac_kthread_create((void(*)(void *))aac_command_thread, sc, &sc->aifthread, 0, 0, "aacraid%daif", unit)) panic("Could not create AIF thread"); /* Register the shutdown method to only be called post-dump */ if ((sc->eh = EVENTHANDLER_REGISTER(shutdown_final, aacraid_shutdown, sc->aac_dev, SHUTDOWN_PRI_DEFAULT)) == NULL) device_printf(sc->aac_dev, "shutdown event registration failed\n"); /* Find containers */ mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); /* loop over possible containers */ do { if ((aac_get_container_info(sc, fib, i, &mir, &uid)) != 0) continue; if (i == 0) count = mir.MntRespCount; aac_add_container(sc, &mir, 0, uid); i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); /* Register with CAM for the containers */ TAILQ_INIT(&sc->aac_sim_tqh); aac_container_bus(sc); /* Register with CAM for the non-DASD devices */ if ((sc->flags & AAC_FLAGS_ENABLE_CAM) != 0) aac_get_bus_info(sc); /* poke the bus to actually attach the child devices */ bus_generic_attach(sc->aac_dev); /* mark the controller up */ sc->aac_state &= ~AAC_STATE_SUSPEND; /* enable interrupts now */ AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); #if __FreeBSD_version >= 800000 mtx_lock(&sc->aac_io_lock); callout_reset(&sc->aac_daemontime, 60 * hz, aac_daemon, sc); mtx_unlock(&sc->aac_io_lock); #else { struct timeval tv; tv.tv_sec = 60; tv.tv_usec = 0; sc->timeout_id = timeout(aac_daemon, (void *)sc, tvtohz(&tv)); } #endif return(0); } static void aac_daemon(void *arg) { struct aac_softc *sc; struct timeval tv; struct aac_command *cm; struct aac_fib *fib; sc = arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); #if __FreeBSD_version >= 800000 mtx_assert(&sc->aac_io_lock, MA_OWNED); if (callout_pending(&sc->aac_daemontime) || callout_active(&sc->aac_daemontime) == 0) return; #else mtx_lock(&sc->aac_io_lock); #endif getmicrotime(&tv); if (!aacraid_alloc_command(sc, &cm)) { fib = cm->cm_fib; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; cm->cm_flags |= AAC_CMD_WAIT; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(u_int32_t); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = SendHostTime; *(uint32_t *)fib->data = tv.tv_sec; aacraid_map_command_sg(cm, NULL, 0, 0); aacraid_release_command(cm); } #if __FreeBSD_version >= 800000 callout_schedule(&sc->aac_daemontime, 30 * 60 * hz); #else mtx_unlock(&sc->aac_io_lock); tv.tv_sec = 30 * 60; tv.tv_usec = 0; sc->timeout_id = timeout(aac_daemon, (void *)sc, tvtohz(&tv)); #endif } void aacraid_add_event(struct aac_softc *sc, struct aac_event *event) { switch (event->ev_type & AAC_EVENT_MASK) { case AAC_EVENT_CMFREE: TAILQ_INSERT_TAIL(&sc->aac_ev_cmfree, event, ev_links); break; default: device_printf(sc->aac_dev, "aac_add event: unknown event %d\n", event->ev_type); break; } return; } /* * Request information of container #cid */ static int aac_get_container_info(struct aac_softc *sc, struct aac_fib *sync_fib, int cid, struct aac_mntinforesp *mir, u_int32_t *uid) { struct aac_command *cm; struct aac_fib *fib; struct aac_mntinfo *mi; struct aac_cnt_config *ccfg; int rval; if (sync_fib == NULL) { if (aacraid_alloc_command(sc, &cm)) { device_printf(sc->aac_dev, "Warning, no free command available\n"); return (-1); } fib = cm->cm_fib; } else { fib = sync_fib; } mi = (struct aac_mntinfo *)&fib->data[0]; /* 4KB support?, 64-bit LBA? */ if (sc->aac_support_opt2 & AAC_SUPPORTED_VARIABLE_BLOCK_SIZE) mi->Command = VM_NameServeAllBlk; else if (sc->flags & AAC_FLAGS_LBA_64BIT) mi->Command = VM_NameServe64; else mi->Command = VM_NameServe; mi->MntType = FT_FILESYS; mi->MntCount = cid; if (sync_fib) { if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_mntinfo))) { device_printf(sc->aac_dev, "Error probing container %d\n", cid); return (-1); } } else { cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_mntinfo); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; if (aacraid_wait_command(cm) != 0) { device_printf(sc->aac_dev, "Error probing container %d\n", cid); aacraid_release_command(cm); return (-1); } } bcopy(&fib->data[0], mir, sizeof(struct aac_mntinforesp)); /* UID */ *uid = cid; if (mir->MntTable[0].VolType != CT_NONE && !(mir->MntTable[0].ContentState & AAC_FSCS_HIDDEN)) { if (!(sc->aac_support_opt2 & AAC_SUPPORTED_VARIABLE_BLOCK_SIZE)) { mir->MntTable[0].ObjExtension.BlockDevice.BlockSize = 0x200; mir->MntTable[0].ObjExtension.BlockDevice.bdLgclPhysMap = 0; } ccfg = (struct aac_cnt_config *)&fib->data[0]; bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_CID_TO_32BITS_UID; ccfg->CTCommand.param[0] = cid; if (sync_fib) { rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_cnt_config)); if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK && mir->MntTable[0].VolType != CT_PASSTHRU) *uid = ccfg->CTCommand.param[1]; } else { fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_cnt_config); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC | AAC_FIBSTATE_FAST_RESPONSE; fib->Header.Command = ContainerCommand; rval = aacraid_wait_command(cm); if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK && mir->MntTable[0].VolType != CT_PASSTHRU) *uid = ccfg->CTCommand.param[1]; aacraid_release_command(cm); } } return (0); } /* * Create a device to represent a new container */ static void aac_add_container(struct aac_softc *sc, struct aac_mntinforesp *mir, int f, u_int32_t uid) { struct aac_container *co; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Check container volume type for validity. Note that many of * the possible types may never show up. */ if ((mir->Status == ST_OK) && (mir->MntTable[0].VolType != CT_NONE)) { co = (struct aac_container *)malloc(sizeof *co, M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (co == NULL) { panic("Out of memory?!"); } co->co_found = f; bcopy(&mir->MntTable[0], &co->co_mntobj, sizeof(struct aac_mntobj)); co->co_uid = uid; TAILQ_INSERT_TAIL(&sc->aac_container_tqh, co, co_link); } } /* * Allocate resources associated with (sc) */ static int aac_alloc(struct aac_softc *sc) { bus_size_t maxsize; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Create DMA tag for mapping buffers into controller-addressable space. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_SG_64BIT) ? BUS_SPACE_MAXADDR : BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->aac_max_sectors << 9, /* maxsize */ sc->aac_sg_tablesize, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->aac_io_lock, /* lockfuncarg */ &sc->aac_buffer_dmat)) { device_printf(sc->aac_dev, "can't allocate buffer DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for mapping FIBs into controller-addressable space.. */ if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize = sc->aac_max_fibs_alloc * (sc->aac_max_fib_size + sizeof(struct aac_fib_xporthdr) + 31); else maxsize = sc->aac_max_fibs_alloc * (sc->aac_max_fib_size + 31); if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ maxsize, /* maxsize */ 1, /* nsegments */ maxsize, /* maxsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_fib_dmat)) { device_printf(sc->aac_dev, "can't allocate FIB DMA tag\n"); return (ENOMEM); } /* * Create DMA tag for the common structure and allocate it. */ maxsize = sizeof(struct aac_common); maxsize += sc->aac_max_fibs * sizeof(u_int32_t); if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_4GB_WINDOW) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ maxsize, /* maxsize */ 1, /* nsegments */ maxsize, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &sc->aac_common_dmat)) { device_printf(sc->aac_dev, "can't allocate common structure DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->aac_common_dmat, (void **)&sc->aac_common, BUS_DMA_NOWAIT, &sc->aac_common_dmamap)) { device_printf(sc->aac_dev, "can't allocate common structure\n"); return (ENOMEM); } (void)bus_dmamap_load(sc->aac_common_dmat, sc->aac_common_dmamap, sc->aac_common, maxsize, aac_common_map, sc, 0); bzero(sc->aac_common, maxsize); /* Allocate some FIBs and associated command structs */ TAILQ_INIT(&sc->aac_fibmap_tqh); sc->aac_commands = malloc(sc->aac_max_fibs * sizeof(struct aac_command), M_AACRAIDBUF, M_WAITOK|M_ZERO); mtx_lock(&sc->aac_io_lock); while (sc->total_fibs < sc->aac_max_fibs) { if (aac_alloc_commands(sc) != 0) break; } mtx_unlock(&sc->aac_io_lock); if (sc->total_fibs == 0) return (ENOMEM); return (0); } /* * Free all of the resources associated with (sc) * * Should not be called if the controller is active. */ void aacraid_free(struct aac_softc *sc) { int i; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* remove the control device */ if (sc->aac_dev_t != NULL) destroy_dev(sc->aac_dev_t); /* throw away any FIB buffers, discard the FIB DMA tag */ aac_free_commands(sc); if (sc->aac_fib_dmat) bus_dma_tag_destroy(sc->aac_fib_dmat); free(sc->aac_commands, M_AACRAIDBUF); /* destroy the common area */ if (sc->aac_common) { bus_dmamap_unload(sc->aac_common_dmat, sc->aac_common_dmamap); bus_dmamem_free(sc->aac_common_dmat, sc->aac_common, sc->aac_common_dmamap); } if (sc->aac_common_dmat) bus_dma_tag_destroy(sc->aac_common_dmat); /* disconnect the interrupt handler */ for (i = 0; i < AAC_MAX_MSIX; ++i) { if (sc->aac_intr[i]) bus_teardown_intr(sc->aac_dev, sc->aac_irq[i], sc->aac_intr[i]); if (sc->aac_irq[i]) bus_release_resource(sc->aac_dev, SYS_RES_IRQ, sc->aac_irq_rid[i], sc->aac_irq[i]); else break; } if (sc->msi_enabled) pci_release_msi(sc->aac_dev); /* destroy data-transfer DMA tag */ if (sc->aac_buffer_dmat) bus_dma_tag_destroy(sc->aac_buffer_dmat); /* destroy the parent DMA tag */ if (sc->aac_parent_dmat) bus_dma_tag_destroy(sc->aac_parent_dmat); /* release the register window mapping */ if (sc->aac_regs_res0 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid0, sc->aac_regs_res0); if (sc->aac_regs_res1 != NULL) bus_release_resource(sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid1, sc->aac_regs_res1); } /* * Disconnect from the controller completely, in preparation for unload. */ int aacraid_detach(device_t dev) { struct aac_softc *sc; struct aac_container *co; struct aac_sim *sim; int error; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); #if __FreeBSD_version >= 800000 callout_drain(&sc->aac_daemontime); #else untimeout(aac_daemon, (void *)sc, sc->timeout_id); #endif /* Remove the child containers */ while ((co = TAILQ_FIRST(&sc->aac_container_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); free(co, M_AACRAIDBUF); } /* Remove the CAM SIMs */ while ((sim = TAILQ_FIRST(&sc->aac_sim_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_sim_tqh, sim, sim_link); error = device_delete_child(dev, sim->sim_dev); if (error) return (error); free(sim, M_AACRAIDBUF); } if (sc->aifflags & AAC_AIFFLAGS_RUNNING) { sc->aifflags |= AAC_AIFFLAGS_EXIT; wakeup(sc->aifthread); tsleep(sc->aac_dev, PUSER | PCATCH, "aac_dch", 30 * hz); } if (sc->aifflags & AAC_AIFFLAGS_RUNNING) panic("Cannot shutdown AIF thread"); if ((error = aacraid_shutdown(dev))) return(error); EVENTHANDLER_DEREGISTER(shutdown_final, sc->eh); aacraid_free(sc); mtx_destroy(&sc->aac_io_lock); return(0); } /* * Bring the controller down to a dormant state and detach all child devices. * * This function is called before detach or system shutdown. * * Note that we can assume that the bioq on the controller is empty, as we won't * allow shutdown if any device is open. */ int aacraid_shutdown(device_t dev) { struct aac_softc *sc; struct aac_fib *fib; struct aac_close_command *cc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; /* * Send a Container shutdown followed by a HostShutdown FIB to the * controller to convince it that we don't want to talk to it anymore. * We've been closed and all I/O completed already */ device_printf(sc->aac_dev, "shutting down controller..."); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); cc = (struct aac_close_command *)&fib->data[0]; bzero(cc, sizeof(struct aac_close_command)); cc->Command = VM_CloseAll; cc->ContainerId = 0xfffffffe; if (aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_close_command))) printf("FAILED.\n"); else printf("done\n"); AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return(0); } /* * Bring the controller to a quiescent state, ready for system suspend. */ int aacraid_suspend(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state |= AAC_STATE_SUSPEND; AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); return(0); } /* * Bring the controller back to a state ready for operation. */ int aacraid_resume(device_t dev) { struct aac_softc *sc; sc = device_get_softc(dev); fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_state &= ~AAC_STATE_SUSPEND; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); return(0); } /* * Interrupt handler for NEW_COMM_TYPE1, NEW_COMM_TYPE2, NEW_COMM_TYPE34 interface. */ void aacraid_new_intr_type1(void *arg) { struct aac_msix_ctx *ctx; struct aac_softc *sc; int vector_no; struct aac_command *cm; struct aac_fib *fib; u_int32_t bellbits, bellbits_shifted, index, handle; int isFastResponse, isAif, noMoreAif, mode; ctx = (struct aac_msix_ctx *)arg; sc = ctx->sc; vector_no = ctx->vector_no; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); if (sc->msi_enabled) { mode = AAC_INT_MODE_MSI; if (vector_no == 0) { bellbits = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_MSI); if (bellbits & 0x40000) mode |= AAC_INT_MODE_AIF; else if (bellbits & 0x1000) mode |= AAC_INT_MODE_SYNC; } } else { mode = AAC_INT_MODE_INTX; bellbits = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R); if (bellbits & AAC_DB_RESPONSE_SENT_NS) { bellbits = AAC_DB_RESPONSE_SENT_NS; AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, bellbits); } else { bellbits_shifted = (bellbits >> AAC_SRC_ODR_SHIFT); AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, bellbits); if (bellbits_shifted & AAC_DB_AIF_PENDING) mode |= AAC_INT_MODE_AIF; else if (bellbits_shifted & AAC_DB_SYNC_COMMAND) mode |= AAC_INT_MODE_SYNC; } /* ODR readback, Prep #238630 */ AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R); } if (mode & AAC_INT_MODE_SYNC) { if (sc->aac_sync_cm) { cm = sc->aac_sync_cm; cm->cm_flags |= AAC_CMD_COMPLETED; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this command */ wakeup(cm); } sc->flags &= ~AAC_QUEUE_FRZN; sc->aac_sync_cm = NULL; } mode = 0; } if (mode & AAC_INT_MODE_AIF) { if (mode & AAC_INT_MODE_INTX) { aac_request_aif(sc); mode = 0; } } if (mode) { /* handle async. status */ index = sc->aac_host_rrq_idx[vector_no]; for (;;) { isFastResponse = isAif = noMoreAif = 0; /* remove toggle bit (31) */ handle = (sc->aac_common->ac_host_rrq[index] & 0x7fffffff); /* check fast response bit (30) */ if (handle & 0x40000000) isFastResponse = 1; /* check AIF bit (23) */ else if (handle & 0x00800000) isAif = TRUE; handle &= 0x0000ffff; if (handle == 0) break; cm = sc->aac_commands + (handle - 1); fib = cm->cm_fib; sc->aac_rrq_outstanding[vector_no]--; if (isAif) { noMoreAif = (fib->Header.XferState & AAC_FIBSTATE_NOMOREAIF) ? 1:0; if (!noMoreAif) aac_handle_aif(sc, fib); aac_remove_busy(cm); aacraid_release_command(cm); } else { if (isFastResponse) { fib->Header.XferState |= AAC_FIBSTATE_DONEADAP; *((u_int32_t *)(fib->data)) = ST_OK; cm->cm_flags |= AAC_CMD_FASTRESP; } aac_remove_busy(cm); aac_unmap_command(cm); cm->cm_flags |= AAC_CMD_COMPLETED; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this command */ wakeup(cm); } sc->flags &= ~AAC_QUEUE_FRZN; } sc->aac_common->ac_host_rrq[index++] = 0; if (index == (vector_no + 1) * sc->aac_vector_cap) index = vector_no * sc->aac_vector_cap; sc->aac_host_rrq_idx[vector_no] = index; if ((isAif && !noMoreAif) || sc->aif_pending) aac_request_aif(sc); } } if (mode & AAC_INT_MODE_AIF) { aac_request_aif(sc); AAC_ACCESS_DEVREG(sc, AAC_CLEAR_AIF_BIT); mode = 0; } /* see if we can start some more I/O */ if ((sc->flags & AAC_QUEUE_FRZN) == 0) aacraid_startio(sc); mtx_unlock(&sc->aac_io_lock); } /* * Handle notification of one or more FIBs coming from the controller. */ static void aac_command_thread(struct aac_softc *sc) { int retval; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); sc->aifflags = AAC_AIFFLAGS_RUNNING; while ((sc->aifflags & AAC_AIFFLAGS_EXIT) == 0) { retval = 0; if ((sc->aifflags & AAC_AIFFLAGS_PENDING) == 0) retval = msleep(sc->aifthread, &sc->aac_io_lock, PRIBIO, "aacraid_aifthd", AAC_PERIODIC_INTERVAL * hz); /* * First see if any FIBs need to be allocated. This needs * to be called without the driver lock because contigmalloc * will grab Giant, and would result in an LOR. */ if ((sc->aifflags & AAC_AIFFLAGS_ALLOCFIBS) != 0) { aac_alloc_commands(sc); sc->aifflags &= ~AAC_AIFFLAGS_ALLOCFIBS; aacraid_startio(sc); } /* * While we're here, check to see if any commands are stuck. * This is pretty low-priority, so it's ok if it doesn't * always fire. */ if (retval == EWOULDBLOCK) aac_timeout(sc); /* Check the hardware printf message buffer */ if (sc->aac_common->ac_printf[0] != 0) aac_print_printf(sc); } sc->aifflags &= ~AAC_AIFFLAGS_RUNNING; mtx_unlock(&sc->aac_io_lock); wakeup(sc->aac_dev); aac_kthread_exit(0); } /* * Submit a command to the controller, return when it completes. * XXX This is very dangerous! If the card has gone out to lunch, we could * be stuck here forever. At the same time, signals are not caught * because there is a risk that a signal could wakeup the sleep before * the card has a chance to complete the command. Since there is no way * to cancel a command that is in progress, we can't protect against the * card completing a command late and spamming the command and data * memory. So, we are held hostage until the command completes. */ int aacraid_wait_command(struct aac_command *cm) { struct aac_softc *sc; int error; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* Put the command on the ready queue and get things going */ aac_enqueue_ready(cm); aacraid_startio(sc); error = msleep(cm, &sc->aac_io_lock, PRIBIO, "aacraid_wait", 0); return(error); } /* *Command Buffer Management */ /* * Allocate a command. */ int aacraid_alloc_command(struct aac_softc *sc, struct aac_command **cmp) { struct aac_command *cm; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((cm = aac_dequeue_free(sc)) == NULL) { if (sc->total_fibs < sc->aac_max_fibs) { sc->aifflags |= AAC_AIFFLAGS_ALLOCFIBS; wakeup(sc->aifthread); } return (EBUSY); } *cmp = cm; return(0); } /* * Release a command back to the freelist. */ void aacraid_release_command(struct aac_command *cm) { struct aac_event *event; struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* (re)initialize the command/FIB */ cm->cm_sgtable = NULL; cm->cm_flags = 0; cm->cm_complete = NULL; cm->cm_ccb = NULL; cm->cm_passthr_dmat = 0; cm->cm_fib->Header.XferState = AAC_FIBSTATE_EMPTY; cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB; cm->cm_fib->Header.Unused = 0; cm->cm_fib->Header.SenderSize = cm->cm_sc->aac_max_fib_size; /* * These are duplicated in aac_start to cover the case where an * intermediate stage may have destroyed them. They're left * initialized here for debugging purposes only. */ cm->cm_fib->Header.u.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; cm->cm_fib->Header.Handle = 0; aac_enqueue_free(cm); /* * Dequeue all events so that there's no risk of events getting * stranded. */ while ((event = TAILQ_FIRST(&sc->aac_ev_cmfree)) != NULL) { TAILQ_REMOVE(&sc->aac_ev_cmfree, event, ev_links); event->ev_callback(sc, event, event->ev_arg); } } /* * Map helper for command/FIB allocation. */ static void aac_map_command_helper(void *arg, bus_dma_segment_t *segs, int nseg, int error) { uint64_t *fibphys; fibphys = (uint64_t *)arg; *fibphys = segs[0].ds_addr; } /* * Allocate and initialize commands/FIBs for this adapter. */ static int aac_alloc_commands(struct aac_softc *sc) { struct aac_command *cm; struct aac_fibmap *fm; uint64_t fibphys; int i, error; u_int32_t maxsize; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (sc->total_fibs + sc->aac_max_fibs_alloc > sc->aac_max_fibs) return (ENOMEM); fm = malloc(sizeof(struct aac_fibmap), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (fm == NULL) return (ENOMEM); mtx_unlock(&sc->aac_io_lock); /* allocate the FIBs in DMAable memory and load them */ if (bus_dmamem_alloc(sc->aac_fib_dmat, (void **)&fm->aac_fibs, BUS_DMA_NOWAIT, &fm->aac_fibmap)) { device_printf(sc->aac_dev, "Not enough contiguous memory available.\n"); free(fm, M_AACRAIDBUF); mtx_lock(&sc->aac_io_lock); return (ENOMEM); } maxsize = sc->aac_max_fib_size + 31; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize += sizeof(struct aac_fib_xporthdr); /* Ignore errors since this doesn't bounce */ (void)bus_dmamap_load(sc->aac_fib_dmat, fm->aac_fibmap, fm->aac_fibs, sc->aac_max_fibs_alloc * maxsize, aac_map_command_helper, &fibphys, 0); mtx_lock(&sc->aac_io_lock); /* initialize constant fields in the command structure */ bzero(fm->aac_fibs, sc->aac_max_fibs_alloc * maxsize); for (i = 0; i < sc->aac_max_fibs_alloc; i++) { cm = sc->aac_commands + sc->total_fibs; fm->aac_commands = cm; cm->cm_sc = sc; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)fm->aac_fibs + i * maxsize); cm->cm_fibphys = fibphys + i * maxsize; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) { u_int64_t fibphys_aligned; fibphys_aligned = (cm->cm_fibphys + sizeof(struct aac_fib_xporthdr) + 31) & ~31; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)cm->cm_fib + (fibphys_aligned - cm->cm_fibphys)); cm->cm_fibphys = fibphys_aligned; } else { u_int64_t fibphys_aligned; fibphys_aligned = (cm->cm_fibphys + 31) & ~31; cm->cm_fib = (struct aac_fib *) ((u_int8_t *)cm->cm_fib + (fibphys_aligned - cm->cm_fibphys)); cm->cm_fibphys = fibphys_aligned; } cm->cm_index = sc->total_fibs; if ((error = bus_dmamap_create(sc->aac_buffer_dmat, 0, &cm->cm_datamap)) != 0) break; if (sc->aac_max_fibs <= 1 || sc->aac_max_fibs - sc->total_fibs > 1) aacraid_release_command(cm); sc->total_fibs++; } if (i > 0) { TAILQ_INSERT_TAIL(&sc->aac_fibmap_tqh, fm, fm_link); fwprintf(sc, HBA_FLAGS_DBG_COMM_B, "total_fibs= %d\n", sc->total_fibs); return (0); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACRAIDBUF); return (ENOMEM); } /* * Free FIBs owned by this adapter. */ static void aac_free_commands(struct aac_softc *sc) { struct aac_fibmap *fm; struct aac_command *cm; int i; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); while ((fm = TAILQ_FIRST(&sc->aac_fibmap_tqh)) != NULL) { TAILQ_REMOVE(&sc->aac_fibmap_tqh, fm, fm_link); /* * We check against total_fibs to handle partially * allocated blocks. */ for (i = 0; i < sc->aac_max_fibs_alloc && sc->total_fibs--; i++) { cm = fm->aac_commands + i; bus_dmamap_destroy(sc->aac_buffer_dmat, cm->cm_datamap); } bus_dmamap_unload(sc->aac_fib_dmat, fm->aac_fibmap); bus_dmamem_free(sc->aac_fib_dmat, fm->aac_fibs, fm->aac_fibmap); free(fm, M_AACRAIDBUF); } } /* * Command-mapping helper function - populate this command's s/g table. */ void aacraid_map_command_sg(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; struct aac_command *cm; struct aac_fib *fib; int i; cm = (struct aac_command *)arg; sc = cm->cm_sc; fib = cm->cm_fib; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "nseg %d", nseg); mtx_assert(&sc->aac_io_lock, MA_OWNED); /* copy into the FIB */ if (cm->cm_sgtable != NULL) { if (fib->Header.Command == RawIo2) { struct aac_raw_io2 *raw; struct aac_sge_ieee1212 *sg; u_int32_t min_size = PAGE_SIZE, cur_size; int conformable = TRUE; raw = (struct aac_raw_io2 *)&fib->data[0]; sg = (struct aac_sge_ieee1212 *)cm->cm_sgtable; raw->sgeCnt = nseg; for (i = 0; i < nseg; i++) { cur_size = segs[i].ds_len; sg[i].addrHigh = 0; *(bus_addr_t *)&sg[i].addrLow = segs[i].ds_addr; sg[i].length = cur_size; sg[i].flags = 0; if (i == 0) { raw->sgeFirstSize = cur_size; } else if (i == 1) { raw->sgeNominalSize = cur_size; min_size = cur_size; } else if ((i+1) < nseg && cur_size != raw->sgeNominalSize) { conformable = FALSE; if (cur_size < min_size) min_size = cur_size; } } /* not conformable: evaluate required sg elements */ if (!conformable) { int j, err_found, nseg_new = nseg; for (i = min_size / PAGE_SIZE; i >= 1; --i) { err_found = FALSE; nseg_new = 2; for (j = 1; j < nseg - 1; ++j) { if (sg[j].length % (i*PAGE_SIZE)) { err_found = TRUE; break; } nseg_new += (sg[j].length / (i*PAGE_SIZE)); } if (!err_found) break; } if (i>0 && nseg_new<=sc->aac_sg_tablesize && !(sc->hint_flags & 4)) nseg = aac_convert_sgraw2(sc, raw, i, nseg, nseg_new); } else { raw->flags |= RIO2_SGL_CONFORMANT; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg * sizeof(struct aac_sge_ieee1212); } else if (fib->Header.Command == RawIo) { struct aac_sg_tableraw *sg; sg = (struct aac_sg_tableraw *)cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntryRaw[i].SgAddress = segs[i].ds_addr; sg->SgEntryRaw[i].SgByteCount = segs[i].ds_len; sg->SgEntryRaw[i].Next = 0; sg->SgEntryRaw[i].Prev = 0; sg->SgEntryRaw[i].Flags = 0; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entryraw); } else if ((cm->cm_sc->flags & AAC_FLAGS_SG_64BIT) == 0) { struct aac_sg_table *sg; sg = cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntry[i].SgAddress = segs[i].ds_addr; sg->SgEntry[i].SgByteCount = segs[i].ds_len; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry); } else { struct aac_sg_table64 *sg; sg = (struct aac_sg_table64 *)cm->cm_sgtable; sg->SgCount = nseg; for (i = 0; i < nseg; i++) { sg->SgEntry64[i].SgAddress = segs[i].ds_addr; sg->SgEntry64[i].SgByteCount = segs[i].ds_len; } /* update the FIB size for the s/g count */ fib->Header.Size += nseg*sizeof(struct aac_sg_entry64); } } /* Fix up the address values in the FIB. Use the command array index * instead of a pointer since these fields are only 32 bits. Shift * the SenderFibAddress over to make room for the fast response bit * and for the AIF bit */ cm->cm_fib->Header.SenderFibAddress = (cm->cm_index << 2); cm->cm_fib->Header.u.ReceiverFibAddress = (u_int32_t)cm->cm_fibphys; /* save a pointer to the command for speedy reverse-lookup */ cm->cm_fib->Header.Handle += cm->cm_index + 1; if (cm->cm_passthr_dmat == 0) { if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_PREWRITE); } cm->cm_flags |= AAC_CMD_MAPPED; if (sc->flags & AAC_FLAGS_SYNC_MODE) { u_int32_t wait = 0; aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, cm->cm_fibphys, 0, 0, 0, &wait, NULL); } else if (cm->cm_flags & AAC_CMD_WAIT) { aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, cm->cm_fibphys, 0, 0, 0, NULL, NULL); } else { int count = 10000000L; while (AAC_SEND_COMMAND(sc, cm) != 0) { if (--count == 0) { aac_unmap_command(cm); sc->flags |= AAC_QUEUE_FRZN; aac_requeue_ready(cm); } DELAY(5); /* wait 5 usec. */ } } } static int aac_convert_sgraw2(struct aac_softc *sc, struct aac_raw_io2 *raw, int pages, int nseg, int nseg_new) { struct aac_sge_ieee1212 *sge; int i, j, pos; u_int32_t addr_low; sge = malloc(nseg_new * sizeof(struct aac_sge_ieee1212), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (sge == NULL) return nseg; for (i = 1, pos = 1; i < nseg - 1; ++i) { for (j = 0; j < raw->sge[i].length / (pages*PAGE_SIZE); ++j) { addr_low = raw->sge[i].addrLow + j * pages * PAGE_SIZE; sge[pos].addrLow = addr_low; sge[pos].addrHigh = raw->sge[i].addrHigh; if (addr_low < raw->sge[i].addrLow) sge[pos].addrHigh++; sge[pos].length = pages * PAGE_SIZE; sge[pos].flags = 0; pos++; } } sge[pos] = raw->sge[nseg-1]; for (i = 1; i < nseg_new; ++i) raw->sge[i] = sge[i]; free(sge, M_AACRAIDBUF); raw->sgeCnt = nseg_new; raw->flags |= RIO2_SGL_CONFORMANT; raw->sgeNominalSize = pages * PAGE_SIZE; return nseg_new; } /* * Unmap a command from controller-visible space. */ static void aac_unmap_command(struct aac_command *cm) { struct aac_softc *sc; sc = cm->cm_sc; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (!(cm->cm_flags & AAC_CMD_MAPPED)) return; if (cm->cm_datalen != 0 && cm->cm_passthr_dmat == 0) { if (cm->cm_flags & AAC_CMD_DATAIN) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTREAD); if (cm->cm_flags & AAC_CMD_DATAOUT) bus_dmamap_sync(sc->aac_buffer_dmat, cm->cm_datamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->aac_buffer_dmat, cm->cm_datamap); } cm->cm_flags &= ~AAC_CMD_MAPPED; } /* * Hardware Interface */ /* * Initialize the adapter. */ static void aac_common_map(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct aac_softc *sc; sc = (struct aac_softc *)arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); sc->aac_common_busaddr = segs[0].ds_addr; } static int aac_check_firmware(struct aac_softc *sc) { u_int32_t code, major, minor, maxsize; u_int32_t options = 0, atu_size = 0, status, waitCount; time_t then; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* check if flash update is running */ if (AAC_GET_FWSTATUS(sc) & AAC_FLASH_UPD_PENDING) { then = time_uptime; do { code = AAC_GET_FWSTATUS(sc); if (time_uptime > (then + AAC_FWUPD_TIMEOUT)) { device_printf(sc->aac_dev, "FATAL: controller not coming ready, " "status %x\n", code); return(ENXIO); } } while (!(code & AAC_FLASH_UPD_SUCCESS) && !(code & AAC_FLASH_UPD_FAILED)); /* * Delay 10 seconds. Because right now FW is doing a soft reset, * do not read scratch pad register at this time */ waitCount = 10 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } /* * Wait for the adapter to come ready. */ then = time_uptime; do { code = AAC_GET_FWSTATUS(sc); if (time_uptime > (then + AAC_BOOT_TIMEOUT)) { device_printf(sc->aac_dev, "FATAL: controller not coming ready, " "status %x\n", code); return(ENXIO); } } while (!(code & AAC_UP_AND_RUNNING) || code == 0xffffffff); /* * Retrieve the firmware version numbers. Dell PERC2/QC cards with * firmware version 1.x are not compatible with this driver. */ if (sc->flags & AAC_FLAGS_PERC2QC) { if (aacraid_sync_command(sc, AAC_MONKER_GETKERNVER, 0, 0, 0, 0, NULL, NULL)) { device_printf(sc->aac_dev, "Error reading firmware version\n"); return (EIO); } /* These numbers are stored as ASCII! */ major = (AAC_GET_MAILBOX(sc, 1) & 0xff) - 0x30; minor = (AAC_GET_MAILBOX(sc, 2) & 0xff) - 0x30; if (major == 1) { device_printf(sc->aac_dev, "Firmware version %d.%d is not supported.\n", major, minor); return (EINVAL); } } /* * Retrieve the capabilities/supported options word so we know what * work-arounds to enable. Some firmware revs don't support this * command. */ if (aacraid_sync_command(sc, AAC_MONKER_GETINFO, 0, 0, 0, 0, &status, NULL)) { if (status != AAC_SRB_STS_INVALID_REQUEST) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); return (EIO); } } else { options = AAC_GET_MAILBOX(sc, 1); atu_size = AAC_GET_MAILBOX(sc, 2); sc->supported_options = options; if ((options & AAC_SUPPORTED_4GB_WINDOW) != 0 && (sc->flags & AAC_FLAGS_NO4GB) == 0) sc->flags |= AAC_FLAGS_4GB_WINDOW; if (options & AAC_SUPPORTED_NONDASD) sc->flags |= AAC_FLAGS_ENABLE_CAM; if ((options & AAC_SUPPORTED_SGMAP_HOST64) != 0 && (sizeof(bus_addr_t) > 4) && (sc->hint_flags & 0x1)) { device_printf(sc->aac_dev, "Enabling 64-bit address support\n"); sc->flags |= AAC_FLAGS_SG_64BIT; } if (sc->aac_if.aif_send_command) { if ((options & AAC_SUPPORTED_NEW_COMM_TYPE3) || (options & AAC_SUPPORTED_NEW_COMM_TYPE4)) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE34; else if (options & AAC_SUPPORTED_NEW_COMM_TYPE1) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE1; else if (options & AAC_SUPPORTED_NEW_COMM_TYPE2) sc->flags |= AAC_FLAGS_NEW_COMM | AAC_FLAGS_NEW_COMM_TYPE2; } if (options & AAC_SUPPORTED_64BIT_ARRAYSIZE) sc->flags |= AAC_FLAGS_ARRAY_64BIT; } if (!(sc->flags & AAC_FLAGS_NEW_COMM)) { device_printf(sc->aac_dev, "Communication interface not supported!\n"); return (ENXIO); } if (sc->hint_flags & 2) { device_printf(sc->aac_dev, "Sync. mode enforced by driver parameter. This will cause a significant performance decrease!\n"); sc->flags |= AAC_FLAGS_SYNC_MODE; } else if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE34) { device_printf(sc->aac_dev, "Async. mode not supported by current driver, sync. mode enforced.\nPlease update driver to get full performance.\n"); sc->flags |= AAC_FLAGS_SYNC_MODE; } /* Check for broken hardware that does a lower number of commands */ sc->aac_max_fibs = (sc->flags & AAC_FLAGS_256FIBS ? 256:512); /* Remap mem. resource, if required */ if (atu_size > rman_get_size(sc->aac_regs_res0)) { bus_release_resource( sc->aac_dev, SYS_RES_MEMORY, sc->aac_regs_rid0, sc->aac_regs_res0); sc->aac_regs_res0 = bus_alloc_resource_anywhere( sc->aac_dev, SYS_RES_MEMORY, &sc->aac_regs_rid0, atu_size, RF_ACTIVE); if (sc->aac_regs_res0 == NULL) { sc->aac_regs_res0 = bus_alloc_resource_any( sc->aac_dev, SYS_RES_MEMORY, &sc->aac_regs_rid0, RF_ACTIVE); if (sc->aac_regs_res0 == NULL) { device_printf(sc->aac_dev, "couldn't allocate register window\n"); return (ENXIO); } } sc->aac_btag0 = rman_get_bustag(sc->aac_regs_res0); sc->aac_bhandle0 = rman_get_bushandle(sc->aac_regs_res0); } /* Read preferred settings */ sc->aac_max_fib_size = sizeof(struct aac_fib); sc->aac_max_sectors = 128; /* 64KB */ sc->aac_max_aif = 1; if (sc->flags & AAC_FLAGS_SG_64BIT) sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite64)) / sizeof(struct aac_sg_entry64); else sc->aac_sg_tablesize = (AAC_FIB_DATASIZE - sizeof(struct aac_blockwrite)) / sizeof(struct aac_sg_entry); if (!aacraid_sync_command(sc, AAC_MONKER_GETCOMMPREF, 0, 0, 0, 0, NULL, NULL)) { options = AAC_GET_MAILBOX(sc, 1); sc->aac_max_fib_size = (options & 0xFFFF); sc->aac_max_sectors = (options >> 16) << 1; options = AAC_GET_MAILBOX(sc, 2); sc->aac_sg_tablesize = (options >> 16); options = AAC_GET_MAILBOX(sc, 3); sc->aac_max_fibs = ((options >> 16) & 0xFFFF); if (sc->aac_max_fibs == 0 || sc->aac_hwif != AAC_HWIF_SRCV) sc->aac_max_fibs = (options & 0xFFFF); options = AAC_GET_MAILBOX(sc, 4); sc->aac_max_aif = (options & 0xFFFF); options = AAC_GET_MAILBOX(sc, 5); sc->aac_max_msix =(sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) ? options : 0; } maxsize = sc->aac_max_fib_size + 31; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) maxsize += sizeof(struct aac_fib_xporthdr); if (maxsize > PAGE_SIZE) { sc->aac_max_fib_size -= (maxsize - PAGE_SIZE); maxsize = PAGE_SIZE; } sc->aac_max_fibs_alloc = PAGE_SIZE / maxsize; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { sc->flags |= AAC_FLAGS_RAW_IO; device_printf(sc->aac_dev, "Enable Raw I/O\n"); } if ((sc->flags & AAC_FLAGS_RAW_IO) && (sc->flags & AAC_FLAGS_ARRAY_64BIT)) { sc->flags |= AAC_FLAGS_LBA_64BIT; device_printf(sc->aac_dev, "Enable 64-bit array\n"); } #ifdef AACRAID_DEBUG aacraid_get_fw_debug_buffer(sc); #endif return (0); } static int aac_init(struct aac_softc *sc) { struct aac_adapter_init *ip; int i, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* reset rrq index */ sc->aac_fibs_pushed_no = 0; for (i = 0; i < sc->aac_max_msix; i++) sc->aac_host_rrq_idx[i] = i * sc->aac_vector_cap; /* * Fill in the init structure. This tells the adapter about the * physical location of various important shared data structures. */ ip = &sc->aac_common->ac_init; ip->InitStructRevision = AAC_INIT_STRUCT_REVISION; if (sc->aac_max_fib_size > sizeof(struct aac_fib)) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_4; sc->flags |= AAC_FLAGS_RAW_IO; } ip->NoOfMSIXVectors = sc->aac_max_msix; ip->AdapterFibsPhysicalAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_fibs); ip->AdapterFibsVirtualAddress = 0; ip->AdapterFibsSize = AAC_ADAPTER_FIBS * sizeof(struct aac_fib); ip->AdapterFibAlign = sizeof(struct aac_fib); ip->PrintfBufferAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_printf); ip->PrintfBufferSize = AAC_PRINTF_BUFSIZE; /* * The adapter assumes that pages are 4K in size, except on some * broken firmware versions that do the page->byte conversion twice, * therefore 'assuming' that this value is in 16MB units (2^24). * Round up since the granularity is so high. */ ip->HostPhysMemPages = ctob(physmem) / AAC_PAGE_SIZE; if (sc->flags & AAC_FLAGS_BROKEN_MEMMAP) { ip->HostPhysMemPages = (ip->HostPhysMemPages + AAC_PAGE_SIZE) / AAC_PAGE_SIZE; } ip->HostElapsedSeconds = time_uptime; /* reset later if invalid */ ip->InitFlags = AAC_INITFLAGS_NEW_COMM_SUPPORTED; if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE1) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_6; ip->InitFlags |= (AAC_INITFLAGS_NEW_COMM_TYPE1_SUPPORTED | AAC_INITFLAGS_FAST_JBOD_SUPPORTED); device_printf(sc->aac_dev, "New comm. interface type1 enabled\n"); } else if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) { ip->InitStructRevision = AAC_INIT_STRUCT_REVISION_7; ip->InitFlags |= (AAC_INITFLAGS_NEW_COMM_TYPE2_SUPPORTED | AAC_INITFLAGS_FAST_JBOD_SUPPORTED); device_printf(sc->aac_dev, "New comm. interface type2 enabled\n"); } ip->MaxNumAif = sc->aac_max_aif; ip->HostRRQ_AddrLow = sc->aac_common_busaddr + offsetof(struct aac_common, ac_host_rrq); /* always 32-bit address */ ip->HostRRQ_AddrHigh = 0; if (sc->aac_support_opt2 & AAC_SUPPORTED_POWER_MANAGEMENT) { ip->InitFlags |= AAC_INITFLAGS_DRIVER_SUPPORTS_PM; ip->InitFlags |= AAC_INITFLAGS_DRIVER_USES_UTC_TIME; device_printf(sc->aac_dev, "Power Management enabled\n"); } ip->MaxIoCommands = sc->aac_max_fibs; ip->MaxIoSize = sc->aac_max_sectors << 9; ip->MaxFibSize = sc->aac_max_fib_size; /* * Do controller-type-specific initialisation */ AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, ~0); /* * Give the init structure to the controller. */ if (aacraid_sync_command(sc, AAC_MONKER_INITSTRUCT, sc->aac_common_busaddr + offsetof(struct aac_common, ac_init), 0, 0, 0, NULL, NULL)) { device_printf(sc->aac_dev, "error establishing init structure\n"); error = EIO; goto out; } /* * Check configuration issues */ if ((error = aac_check_config(sc)) != 0) goto out; error = 0; out: return(error); } static void aac_define_int_mode(struct aac_softc *sc) { device_t dev; int cap, msi_count, error = 0; uint32_t val; dev = sc->aac_dev; /* max. vectors from AAC_MONKER_GETCOMMPREF */ if (sc->aac_max_msix == 0) { sc->aac_max_msix = 1; sc->aac_vector_cap = sc->aac_max_fibs; return; } /* OS capability */ msi_count = pci_msix_count(dev); if (msi_count > AAC_MAX_MSIX) msi_count = AAC_MAX_MSIX; if (msi_count > sc->aac_max_msix) msi_count = sc->aac_max_msix; if (msi_count == 0 || (error = pci_alloc_msix(dev, &msi_count)) != 0) { device_printf(dev, "alloc msix failed - msi_count=%d, err=%d; " "will try MSI\n", msi_count, error); pci_release_msi(dev); } else { sc->msi_enabled = TRUE; device_printf(dev, "using MSI-X interrupts (%u vectors)\n", msi_count); } if (!sc->msi_enabled) { msi_count = 1; if ((error = pci_alloc_msi(dev, &msi_count)) != 0) { device_printf(dev, "alloc msi failed - err=%d; " "will use INTx\n", error); pci_release_msi(dev); } else { sc->msi_enabled = TRUE; device_printf(dev, "using MSI interrupts\n"); } } if (sc->msi_enabled) { /* now read controller capability from PCI config. space */ cap = aac_find_pci_capability(sc, PCIY_MSIX); val = (cap != 0 ? pci_read_config(dev, cap + 2, 2) : 0); if (!(val & AAC_PCI_MSI_ENABLE)) { pci_release_msi(dev); sc->msi_enabled = FALSE; } } if (!sc->msi_enabled) { device_printf(dev, "using legacy interrupts\n"); sc->aac_max_msix = 1; } else { AAC_ACCESS_DEVREG(sc, AAC_ENABLE_MSIX); if (sc->aac_max_msix > msi_count) sc->aac_max_msix = msi_count; } sc->aac_vector_cap = sc->aac_max_fibs / sc->aac_max_msix; fwprintf(sc, HBA_FLAGS_DBG_DEBUG_B, "msi_enabled %d vector_cap %d max_fibs %d max_msix %d", sc->msi_enabled,sc->aac_vector_cap, sc->aac_max_fibs, sc->aac_max_msix); } static int aac_find_pci_capability(struct aac_softc *sc, int cap) { device_t dev; uint32_t status; uint8_t ptr; dev = sc->aac_dev; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); status = pci_read_config(dev, PCIR_HDRTYPE, 1); switch (status & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { int next, val; next = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); val = pci_read_config(dev, ptr + PCICAP_ID, 1); if (val == cap) return (ptr); ptr = next; } return (0); } static int aac_setup_intr(struct aac_softc *sc) { int i, msi_count, rid; struct resource *res; void *tag; msi_count = sc->aac_max_msix; rid = (sc->msi_enabled ? 1:0); for (i = 0; i < msi_count; i++, rid++) { if ((res = bus_alloc_resource_any(sc->aac_dev,SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(sc->aac_dev,"can't allocate interrupt\n"); return (EINVAL); } sc->aac_irq_rid[i] = rid; sc->aac_irq[i] = res; if (aac_bus_setup_intr(sc->aac_dev, res, INTR_MPSAFE | INTR_TYPE_BIO, NULL, aacraid_new_intr_type1, &sc->aac_msix[i], &tag)) { device_printf(sc->aac_dev, "can't set up interrupt\n"); return (EINVAL); } sc->aac_msix[i].vector_no = i; sc->aac_msix[i].sc = sc; sc->aac_intr[i] = tag; } return (0); } static int aac_check_config(struct aac_softc *sc) { struct aac_fib *fib; struct aac_cnt_config *ccfg; struct aac_cf_status_hdr *cf_shdr; int rval; mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); ccfg = (struct aac_cnt_config *)&fib->data[0]; bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_GET_CONFIG_STATUS; ccfg->CTCommand.param[CNT_SIZE] = sizeof(struct aac_cf_status_hdr); rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_cnt_config)); cf_shdr = (struct aac_cf_status_hdr *)ccfg->CTCommand.data; if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK) { if (cf_shdr->action <= CFACT_PAUSE) { bzero(ccfg, sizeof (*ccfg) - CT_PACKET_SIZE); ccfg->Command = VM_ContainerConfig; ccfg->CTCommand.command = CT_COMMIT_CONFIG; rval = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_cnt_config)); if (rval == 0 && ccfg->Command == ST_OK && ccfg->CTCommand.param[0] == CT_OK) { /* successful completion */ rval = 0; } else { /* auto commit aborted due to error(s) */ rval = -2; } } else { /* auto commit aborted due to adapter indicating config. issues too dangerous to auto commit */ rval = -3; } } else { /* error */ rval = -1; } aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return(rval); } /* * Send a synchronous command to the controller and wait for a result. * Indicate if the controller completed the command with an error status. */ int aacraid_sync_command(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3, u_int32_t *sp, u_int32_t *r1) { time_t then; u_int32_t status; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* populate the mailbox */ AAC_SET_MAILBOX(sc, command, arg0, arg1, arg2, arg3); /* ensure the sync command doorbell flag is cleared */ if (!sc->msi_enabled) AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* then set it to signal the adapter */ AAC_QNOTIFY(sc, AAC_DB_SYNC_COMMAND); if ((command != AAC_MONKER_SYNCFIB) || (sp == NULL) || (*sp != 0)) { /* spin waiting for the command to complete */ then = time_uptime; do { if (time_uptime > (then + AAC_SYNC_TIMEOUT)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "timed out"); return(EIO); } } while (!(AAC_GET_ISTATUS(sc) & AAC_DB_SYNC_COMMAND)); /* clear the completion flag */ AAC_CLEAR_ISTATUS(sc, AAC_DB_SYNC_COMMAND); /* get the command status */ status = AAC_GET_MAILBOX(sc, 0); if (sp != NULL) *sp = status; /* return parameter */ if (r1 != NULL) *r1 = AAC_GET_MAILBOX(sc, 1); if (status != AAC_SRB_STS_SUCCESS) return (-1); } return(0); } static int aac_sync_fib(struct aac_softc *sc, u_int32_t command, u_int32_t xferstate, struct aac_fib *fib, u_int16_t datasize) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (datasize > AAC_FIB_DATASIZE) return(EINVAL); /* * Set up the sync FIB */ fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY; fib->Header.XferState |= xferstate; fib->Header.Command = command; fib->Header.StructType = AAC_FIBTYPE_TFIB; fib->Header.Size = sizeof(struct aac_fib_header) + datasize; fib->Header.SenderSize = sizeof(struct aac_fib); fib->Header.SenderFibAddress = 0; /* Not needed */ fib->Header.u.ReceiverFibAddress = sc->aac_common_busaddr + offsetof(struct aac_common, ac_sync_fib); /* * Give the FIB to the controller, wait for a response. */ if (aacraid_sync_command(sc, AAC_MONKER_SYNCFIB, fib->Header.u.ReceiverFibAddress, 0, 0, 0, NULL, NULL)) { fwprintf(sc, HBA_FLAGS_DBG_ERROR_B, "IO error"); return(EIO); } return (0); } /* * Check for commands that have been outstanding for a suspiciously long time, * and complain about them. */ static void aac_timeout(struct aac_softc *sc) { struct aac_command *cm; time_t deadline; int timedout; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Traverse the busy command list, bitch about late commands once * only. */ timedout = 0; deadline = time_uptime - AAC_CMD_TIMEOUT; TAILQ_FOREACH(cm, &sc->aac_busy, cm_link) { if (cm->cm_timestamp < deadline) { device_printf(sc->aac_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime-cm->cm_timestamp)); AAC_PRINT_FIB(sc, cm->cm_fib); timedout++; } } if (timedout) aac_reset_adapter(sc); aacraid_print_queues(sc); } /* * Interface Function Vectors */ /* * Read the current firmware status word. */ static int aac_src_get_fwstatus(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRC_OMR)); } /* * Notify the controller of a change in a given queue */ static void aac_src_qnotify(struct aac_softc *sc, int qbit) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, qbit << AAC_SRC_IDR_SHIFT); } /* * Get the interrupt reason bits */ static int aac_src_get_istatus(struct aac_softc *sc) { int val; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (sc->msi_enabled) { val = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_MSI); if (val & AAC_MSI_SYNC_STATUS) val = AAC_DB_SYNC_COMMAND; else val = 0; } else { val = AAC_MEM0_GETREG4(sc, AAC_SRC_ODBR_R) >> AAC_SRC_ODR_SHIFT; } return(val); } /* * Clear some interrupt reason bits */ static void aac_src_clear_istatus(struct aac_softc *sc, int mask) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (sc->msi_enabled) { if (mask == AAC_DB_SYNC_COMMAND) AAC_ACCESS_DEVREG(sc, AAC_CLEAR_SYNC_BIT); } else { AAC_MEM0_SETREG4(sc, AAC_SRC_ODBR_C, mask << AAC_SRC_ODR_SHIFT); } } /* * Populate the mailbox and set the command word */ static void aac_src_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX, command); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 4, arg0); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 8, arg1); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 12, arg2); AAC_MEM0_SETREG4(sc, AAC_SRC_MAILBOX + 16, arg3); } static void aac_srcv_set_mailbox(struct aac_softc *sc, u_int32_t command, u_int32_t arg0, u_int32_t arg1, u_int32_t arg2, u_int32_t arg3) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX, command); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 4, arg0); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 8, arg1); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 12, arg2); AAC_MEM0_SETREG4(sc, AAC_SRCV_MAILBOX + 16, arg3); } /* * Fetch the immediate command status word */ static int aac_src_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRC_MAILBOX + (mb * 4))); } static int aac_srcv_get_mailbox(struct aac_softc *sc, int mb) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(AAC_MEM0_GETREG4(sc, AAC_SRCV_MAILBOX + (mb * 4))); } /* * Set/clear interrupt masks */ static void aac_src_access_devreg(struct aac_softc *sc, int mode) { u_int32_t val; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); switch (mode) { case AAC_ENABLE_INTERRUPT: AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, (sc->msi_enabled ? AAC_INT_ENABLE_TYPE1_MSIX : AAC_INT_ENABLE_TYPE1_INTX)); break; case AAC_DISABLE_INTERRUPT: AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, AAC_INT_DISABLE_ALL); break; case AAC_ENABLE_MSIX: /* set bit 6 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x40; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); /* unmask int. */ val = PMC_ALL_INTERRUPT_BITS; AAC_MEM0_SETREG4(sc, AAC_SRC_IOAR, val); val = AAC_MEM0_GETREG4(sc, AAC_SRC_OIMR); AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, val & (~(PMC_GLOBAL_INT_BIT2 | PMC_GLOBAL_INT_BIT0))); break; case AAC_DISABLE_MSIX: /* reset bit 6 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val &= ~0x40; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_CLEAR_AIF_BIT: /* set bit 5 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x20; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_CLEAR_SYNC_BIT: /* set bit 4 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x10; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); break; case AAC_ENABLE_INTX: /* set bit 7 */ val = AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); val |= 0x80; AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, val); AAC_MEM0_GETREG4(sc, AAC_SRC_IDBR); /* unmask int. */ val = PMC_ALL_INTERRUPT_BITS; AAC_MEM0_SETREG4(sc, AAC_SRC_IOAR, val); val = AAC_MEM0_GETREG4(sc, AAC_SRC_OIMR); AAC_MEM0_SETREG4(sc, AAC_SRC_OIMR, val & (~(PMC_GLOBAL_INT_BIT2))); break; default: break; } } /* * New comm. interface: Send command functions */ static int aac_src_send_command(struct aac_softc *sc, struct aac_command *cm) { struct aac_fib_xporthdr *pFibX; u_int32_t fibsize, high_addr; u_int64_t address; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, "send command (new comm. type1)"); if (sc->msi_enabled && cm->cm_fib->Header.Command != AifRequest && sc->aac_max_msix > 1) { u_int16_t vector_no, first_choice = 0xffff; vector_no = sc->aac_fibs_pushed_no % sc->aac_max_msix; do { vector_no += 1; if (vector_no == sc->aac_max_msix) vector_no = 1; if (sc->aac_rrq_outstanding[vector_no] < sc->aac_vector_cap) break; if (0xffff == first_choice) first_choice = vector_no; else if (vector_no == first_choice) break; } while (1); if (vector_no == first_choice) vector_no = 0; sc->aac_rrq_outstanding[vector_no]++; if (sc->aac_fibs_pushed_no == 0xffffffff) sc->aac_fibs_pushed_no = 0; else sc->aac_fibs_pushed_no++; cm->cm_fib->Header.Handle += (vector_no << 16); } if (sc->flags & AAC_FLAGS_NEW_COMM_TYPE2) { /* Calculate the amount to the fibsize bits */ fibsize = (cm->cm_fib->Header.Size + 127) / 128 - 1; /* Fill new FIB header */ address = cm->cm_fibphys; high_addr = (u_int32_t)(address >> 32); if (high_addr == 0L) { cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB2; cm->cm_fib->Header.u.TimeStamp = 0L; } else { cm->cm_fib->Header.StructType = AAC_FIBTYPE_TFIB2_64; cm->cm_fib->Header.u.SenderFibAddressHigh = high_addr; } cm->cm_fib->Header.SenderFibAddress = (u_int32_t)address; } else { /* Calculate the amount to the fibsize bits */ fibsize = (sizeof(struct aac_fib_xporthdr) + cm->cm_fib->Header.Size + 127) / 128 - 1; /* Fill XPORT header */ pFibX = (struct aac_fib_xporthdr *) ((unsigned char *)cm->cm_fib - sizeof(struct aac_fib_xporthdr)); pFibX->Handle = cm->cm_fib->Header.Handle; pFibX->HostAddress = cm->cm_fibphys; pFibX->Size = cm->cm_fib->Header.Size; address = cm->cm_fibphys - sizeof(struct aac_fib_xporthdr); high_addr = (u_int32_t)(address >> 32); } if (fibsize > 31) fibsize = 31; aac_enqueue_busy(cm); if (high_addr) { AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE64_H, high_addr); AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE64_L, (u_int32_t)address + fibsize); } else { AAC_MEM0_SETREG4(sc, AAC_SRC_IQUE32, (u_int32_t)address + fibsize); } return 0; } /* * New comm. interface: get, set outbound queue index */ static int aac_src_get_outb_queue(struct aac_softc *sc) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return(-1); } static void aac_src_set_outb_queue(struct aac_softc *sc, int index) { fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); } /* * Debugging and Diagnostics */ /* * Print some information about the controller. */ static void aac_describe_controller(struct aac_softc *sc) { struct aac_fib *fib; struct aac_adapter_info *info; char *adapter_type = "Adaptec RAID controller"; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); if (sc->supported_options & AAC_SUPPORTED_SUPPLEMENT_ADAPTER_INFO) { fib->data[0] = 0; if (aac_sync_fib(sc, RequestSupplementAdapterInfo, 0, fib, 1)) device_printf(sc->aac_dev, "RequestSupplementAdapterInfo failed\n"); else { struct aac_supplement_adapter_info *supp_info; supp_info = ((struct aac_supplement_adapter_info *)&fib->data[0]); adapter_type = (char *)supp_info->AdapterTypeText; sc->aac_feature_bits = supp_info->FeatureBits; sc->aac_support_opt2 = supp_info->SupportedOptions2; } } device_printf(sc->aac_dev, "%s, aacraid driver %d.%d.%d-%d\n", adapter_type, AAC_DRIVER_MAJOR_VERSION, AAC_DRIVER_MINOR_VERSION, AAC_DRIVER_BUGFIX_LEVEL, AAC_DRIVER_BUILD); fib->data[0] = 0; if (aac_sync_fib(sc, RequestAdapterInfo, 0, fib, 1)) { device_printf(sc->aac_dev, "RequestAdapterInfo failed\n"); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } /* save the kernel revision structure for later use */ info = (struct aac_adapter_info *)&fib->data[0]; sc->aac_revision = info->KernelRevision; if (bootverbose) { device_printf(sc->aac_dev, "%s %dMHz, %dMB memory " "(%dMB cache, %dMB execution), %s\n", aac_describe_code(aac_cpu_variant, info->CpuVariant), info->ClockSpeed, info->TotalMem / (1024 * 1024), info->BufferMem / (1024 * 1024), info->ExecutionMem / (1024 * 1024), aac_describe_code(aac_battery_platform, info->batteryPlatform)); device_printf(sc->aac_dev, "Kernel %d.%d-%d, Build %d, S/N %6X\n", info->KernelRevision.external.comp.major, info->KernelRevision.external.comp.minor, info->KernelRevision.external.comp.dash, info->KernelRevision.buildNumber, (u_int32_t)(info->SerialNumber & 0xffffff)); device_printf(sc->aac_dev, "Supported Options=%b\n", sc->supported_options, "\20" "\1SNAPSHOT" "\2CLUSTERS" "\3WCACHE" "\4DATA64" "\5HOSTTIME" "\6RAID50" "\7WINDOW4GB" "\10SCSIUPGD" "\11SOFTERR" "\12NORECOND" "\13SGMAP64" "\14ALARM" "\15NONDASD" "\16SCSIMGT" "\17RAIDSCSI" "\21ADPTINFO" "\22NEWCOMM" "\23ARRAY64BIT" "\24HEATSENSOR"); } aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); } /* * Look up a text description of a numeric error code and return a pointer to * same. */ static char * aac_describe_code(struct aac_code_lookup *table, u_int32_t code) { int i; for (i = 0; table[i].string != NULL; i++) if (table[i].code == code) return(table[i].string); return(table[i + 1].string); } /* * Management Interface */ static int aac_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct aac_softc *sc; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); #if __FreeBSD_version >= 702000 device_busy(sc->aac_dev); devfs_set_cdevpriv(sc, aac_cdevpriv_dtor); #endif return 0; } static int aac_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { union aac_statrequest *as; struct aac_softc *sc; int error = 0; as = (union aac_statrequest *)arg; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); switch (cmd) { case AACIO_STATS: switch (as->as_item) { case AACQ_FREE: case AACQ_READY: case AACQ_BUSY: bcopy(&sc->aac_qstat[as->as_item], &as->as_qstat, sizeof(struct aac_qstat)); break; default: error = ENOENT; break; } break; case FSACTL_SENDFIB: case FSACTL_SEND_LARGE_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_SENDFIB: case FSACTL_LNX_SEND_LARGE_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SENDFIB"); error = aac_ioctl_sendfib(sc, arg); break; case FSACTL_SEND_RAW_SRB: arg = *(caddr_t*)arg; case FSACTL_LNX_SEND_RAW_SRB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_SEND_RAW_SRB"); error = aac_ioctl_send_raw_srb(sc, arg); break; case FSACTL_AIF_THREAD: case FSACTL_LNX_AIF_THREAD: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_AIF_THREAD"); error = EINVAL; break; case FSACTL_OPEN_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_OPEN_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_OPEN_GET_ADAPTER_FIB"); error = aac_open_aif(sc, arg); break; case FSACTL_GET_NEXT_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_NEXT_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_NEXT_ADAPTER_FIB"); error = aac_getnext_aif(sc, arg); break; case FSACTL_CLOSE_GET_ADAPTER_FIB: arg = *(caddr_t*)arg; case FSACTL_LNX_CLOSE_GET_ADAPTER_FIB: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_CLOSE_GET_ADAPTER_FIB"); error = aac_close_aif(sc, arg); break; case FSACTL_MINIPORT_REV_CHECK: arg = *(caddr_t*)arg; case FSACTL_LNX_MINIPORT_REV_CHECK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_MINIPORT_REV_CHECK"); error = aac_rev_check(sc, arg); break; case FSACTL_QUERY_DISK: arg = *(caddr_t*)arg; case FSACTL_LNX_QUERY_DISK: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_QUERY_DISK"); error = aac_query_disk(sc, arg); break; case FSACTL_DELETE_DISK: case FSACTL_LNX_DELETE_DISK: /* * We don't trust the underland to tell us when to delete a * container, rather we rely on an AIF coming from the * controller */ error = 0; break; case FSACTL_GET_PCI_INFO: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_PCI_INFO: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_PCI_INFO"); error = aac_get_pci_info(sc, arg); break; case FSACTL_GET_FEATURES: arg = *(caddr_t*)arg; case FSACTL_LNX_GET_FEATURES: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "FSACTL_GET_FEATURES"); error = aac_supported_features(sc, arg); break; default: fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "unsupported cmd 0x%lx\n", cmd); error = EINVAL; break; } return(error); } static int aac_poll(struct cdev *dev, int poll_events, struct thread *td) { struct aac_softc *sc; struct aac_fib_context *ctx; int revents; sc = dev->si_drv1; revents = 0; mtx_lock(&sc->aac_io_lock); if ((poll_events & (POLLRDNORM | POLLIN)) != 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->ctx_idx != sc->aifq_idx || ctx->ctx_wrap) { revents |= poll_events & (POLLIN | POLLRDNORM); break; } } } mtx_unlock(&sc->aac_io_lock); if (revents == 0) { if (poll_events & (POLLIN | POLLRDNORM)) selrecord(td, &sc->rcv_select); } return (revents); } static void aac_ioctl_event(struct aac_softc *sc, struct aac_event *event, void *arg) { switch (event->ev_type) { case AAC_EVENT_CMFREE: mtx_assert(&sc->aac_io_lock, MA_OWNED); if (aacraid_alloc_command(sc, (struct aac_command **)arg)) { aacraid_add_event(sc, event); return; } free(event, M_AACRAIDBUF); wakeup(arg); break; default: break; } } /* * Send a FIB supplied from userspace */ static int aac_ioctl_sendfib(struct aac_softc *sc, caddr_t ufib) { struct aac_command *cm; int size, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; /* * Get a command */ mtx_lock(&sc->aac_io_lock); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; event = malloc(sizeof(struct aac_event), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aacraid_add_event(sc, event); msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsfib", 0); } mtx_unlock(&sc->aac_io_lock); /* * Fetch the FIB header, then re-copy to get data as well. */ if ((error = copyin(ufib, cm->cm_fib, sizeof(struct aac_fib_header))) != 0) goto out; size = cm->cm_fib->Header.Size + sizeof(struct aac_fib_header); if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "incoming FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } if ((error = copyin(ufib, cm->cm_fib, size)) != 0) goto out; cm->cm_fib->Header.Size = size; cm->cm_timestamp = time_uptime; cm->cm_datalen = 0; /* * Pass the FIB to the controller, wait for it to complete. */ mtx_lock(&sc->aac_io_lock); error = aacraid_wait_command(cm); mtx_unlock(&sc->aac_io_lock); if (error != 0) { device_printf(sc->aac_dev, "aacraid_wait_command return %d\n", error); goto out; } /* * Copy the FIB and data back out to the caller. */ size = cm->cm_fib->Header.Size; if (size > sc->aac_max_fib_size) { device_printf(sc->aac_dev, "outbound FIB oversized (%d > %d)\n", size, sc->aac_max_fib_size); size = sc->aac_max_fib_size; } error = copyout(cm->cm_fib, ufib, size); out: if (cm != NULL) { mtx_lock(&sc->aac_io_lock); aacraid_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * Send a passthrough FIB supplied from userspace */ static int aac_ioctl_send_raw_srb(struct aac_softc *sc, caddr_t arg) { struct aac_command *cm; struct aac_fib *fib; struct aac_srb *srbcmd; struct aac_srb *user_srb = (struct aac_srb *)arg; void *user_reply; int error, transfer_data = 0; bus_dmamap_t orig_map = 0; u_int32_t fibsize = 0; u_int64_t srb_sg_address; u_int32_t srb_sg_bytecount; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); cm = NULL; mtx_lock(&sc->aac_io_lock); if (aacraid_alloc_command(sc, &cm)) { struct aac_event *event; event = malloc(sizeof(struct aac_event), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (event == NULL) { error = EBUSY; mtx_unlock(&sc->aac_io_lock); goto out; } event->ev_type = AAC_EVENT_CMFREE; event->ev_callback = aac_ioctl_event; event->ev_arg = &cm; aacraid_add_event(sc, event); msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsraw", 0); } mtx_unlock(&sc->aac_io_lock); cm->cm_data = NULL; /* save original dma map */ orig_map = cm->cm_datamap; fib = cm->cm_fib; srbcmd = (struct aac_srb *)fib->data; if ((error = copyin((void *)&user_srb->data_len, &fibsize, sizeof (u_int32_t)) != 0)) goto out; if (fibsize > (sc->aac_max_fib_size-sizeof(struct aac_fib_header))) { error = EINVAL; goto out; } if ((error = copyin((void *)user_srb, srbcmd, fibsize) != 0)) goto out; srbcmd->function = 0; /* SRBF_ExecuteScsi */ srbcmd->retry_limit = 0; /* obsolete */ /* only one sg element from userspace supported */ if (srbcmd->sg_map.SgCount > 1) { error = EINVAL; goto out; } /* check fibsize */ if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry))) { struct aac_sg_entry *sgp = srbcmd->sg_map.SgEntry; struct aac_sg_entry sg; if ((error = copyin(sgp, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = (u_int64_t)sg.SgAddress; } else if (fibsize == (sizeof(struct aac_srb) + srbcmd->sg_map.SgCount * sizeof(struct aac_sg_entry64))) { #ifdef __LP64__ struct aac_sg_entry64 *sgp = (struct aac_sg_entry64 *)srbcmd->sg_map.SgEntry; struct aac_sg_entry64 sg; if ((error = copyin(sgp, &sg, sizeof(sg))) != 0) goto out; srb_sg_bytecount = sg.SgByteCount; srb_sg_address = sg.SgAddress; if (srb_sg_address > 0xffffffffull && !(sc->flags & AAC_FLAGS_SG_64BIT)) #endif { error = EINVAL; goto out; } } else { error = EINVAL; goto out; } user_reply = (char *)arg + fibsize; srbcmd->data_len = srb_sg_bytecount; if (srbcmd->sg_map.SgCount == 1) transfer_data = 1; if (transfer_data) { /* * Create DMA tag for the passthr. data buffer and allocate it. */ if (bus_dma_tag_create(sc->aac_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ (sc->flags & AAC_FLAGS_SG_64BIT) ? BUS_SPACE_MAXADDR_32BIT : 0x7fffffff, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ srb_sg_bytecount, /* size */ sc->aac_sg_tablesize, /* nsegments */ srb_sg_bytecount, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* No locking needed */ &cm->cm_passthr_dmat)) { error = ENOMEM; goto out; } if (bus_dmamem_alloc(cm->cm_passthr_dmat, (void **)&cm->cm_data, BUS_DMA_NOWAIT, &cm->cm_datamap)) { error = ENOMEM; goto out; } /* fill some cm variables */ cm->cm_datalen = srb_sg_bytecount; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN) cm->cm_flags |= AAC_CMD_DATAIN; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_OUT) cm->cm_flags |= AAC_CMD_DATAOUT; if (srbcmd->flags & AAC_SRB_FLAGS_DATA_OUT) { if ((error = copyin((void *)(uintptr_t)srb_sg_address, cm->cm_data, cm->cm_datalen)) != 0) goto out; /* sync required for bus_dmamem_alloc() alloc. mem.? */ bus_dmamap_sync(cm->cm_passthr_dmat, cm->cm_datamap, BUS_DMASYNC_PREWRITE); } } /* build the FIB */ fib->Header.Size = sizeof(struct aac_fib_header) + sizeof(struct aac_srb); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC; fib->Header.Command = (sc->flags & AAC_FLAGS_SG_64BIT) ? ScsiPortCommandU64 : ScsiPortCommand; cm->cm_sgtable = (struct aac_sg_table *)&srbcmd->sg_map; /* send command */ if (transfer_data) { bus_dmamap_load(cm->cm_passthr_dmat, cm->cm_datamap, cm->cm_data, cm->cm_datalen, aacraid_map_command_sg, cm, 0); } else { aacraid_map_command_sg(cm, NULL, 0, 0); } /* wait for completion */ mtx_lock(&sc->aac_io_lock); while (!(cm->cm_flags & AAC_CMD_COMPLETED)) msleep(cm, &sc->aac_io_lock, 0, "aacraid_ctlsrw2", 0); mtx_unlock(&sc->aac_io_lock); /* copy data */ if (transfer_data && (srbcmd->flags & AAC_SRB_FLAGS_DATA_IN)) { if ((error = copyout(cm->cm_data, (void *)(uintptr_t)srb_sg_address, cm->cm_datalen)) != 0) goto out; /* sync required for bus_dmamem_alloc() allocated mem.? */ bus_dmamap_sync(cm->cm_passthr_dmat, cm->cm_datamap, BUS_DMASYNC_POSTREAD); } /* status */ error = copyout(fib->data, user_reply, sizeof(struct aac_srb_response)); out: if (cm && cm->cm_data) { if (transfer_data) bus_dmamap_unload(cm->cm_passthr_dmat, cm->cm_datamap); bus_dmamem_free(cm->cm_passthr_dmat, cm->cm_data, cm->cm_datamap); cm->cm_datamap = orig_map; } if (cm && cm->cm_passthr_dmat) bus_dma_tag_destroy(cm->cm_passthr_dmat); if (cm) { mtx_lock(&sc->aac_io_lock); aacraid_release_command(cm); mtx_unlock(&sc->aac_io_lock); } return(error); } /* * Request an AIF from the controller (new comm. type1) */ static void aac_request_aif(struct aac_softc *sc) { struct aac_command *cm; struct aac_fib *fib; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if (aacraid_alloc_command(sc, &cm)) { sc->aif_pending = 1; return; } sc->aif_pending = 0; /* build the FIB */ fib = cm->cm_fib; fib->Header.Size = sizeof(struct aac_fib); fib->Header.XferState = AAC_FIBSTATE_HOSTOWNED | AAC_FIBSTATE_INITIALISED | AAC_FIBSTATE_EMPTY | AAC_FIBSTATE_FROMHOST | AAC_FIBSTATE_REXPECTED | AAC_FIBSTATE_NORM | AAC_FIBSTATE_ASYNC; /* set AIF marker */ fib->Header.Handle = 0x00800000; fib->Header.Command = AifRequest; ((struct aac_aif_command *)fib->data)->command = AifReqEvent; aacraid_map_command_sg(cm, NULL, 0, 0); } #if __FreeBSD_version >= 702000 /* * cdevpriv interface private destructor. */ static void aac_cdevpriv_dtor(void *arg) { struct aac_softc *sc; sc = arg; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&Giant); device_unbusy(sc->aac_dev); mtx_unlock(&Giant); } #else static int aac_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct aac_softc *sc; sc = dev->si_drv1; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); return 0; } #endif /* * Handle an AIF sent to us by the controller; queue it for later reference. * If the queue fills up, then drop the older entries. */ static void aac_handle_aif(struct aac_softc *sc, struct aac_fib *fib) { struct aac_aif_command *aif; struct aac_container *co, *co_next; struct aac_fib_context *ctx; struct aac_fib *sync_fib; struct aac_mntinforesp mir; int next, current, found; int count = 0, changed = 0, i = 0; u_int32_t channel, uid; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); aif = (struct aac_aif_command*)&fib->data[0]; aacraid_print_aif(sc, aif); /* Is it an event that we should care about? */ switch (aif->command) { case AifCmdEventNotify: switch (aif->data.EN.type) { case AifEnAddContainer: case AifEnDeleteContainer: /* * A container was added or deleted, but the message * doesn't tell us anything else! Re-enumerate the * containers and sort things out. */ aac_alloc_sync_fib(sc, &sync_fib); do { /* * Ask the controller for its containers one at * a time. * XXX What if the controller's list changes * midway through this enumaration? * XXX This should be done async. */ if (aac_get_container_info(sc, sync_fib, i, &mir, &uid) != 0) continue; if (i == 0) count = mir.MntRespCount; /* * Check the container against our list. * co->co_found was already set to 0 in a * previous run. */ if ((mir.Status == ST_OK) && (mir.MntTable[0].VolType != CT_NONE)) { found = 0; TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == mir.MntTable[0].ObjectId) { co->co_found = 1; found = 1; break; } } /* * If the container matched, continue * in the list. */ if (found) { i++; continue; } /* * This is a new container. Do all the * appropriate things to set it up. */ aac_add_container(sc, &mir, 1, uid); changed = 1; } i++; } while ((i < count) && (i < AAC_MAX_CONTAINERS)); aac_release_sync_fib(sc); /* * Go through our list of containers and see which ones * were not marked 'found'. Since the controller didn't * list them they must have been deleted. Do the * appropriate steps to destroy the device. Also reset * the co->co_found field. */ co = TAILQ_FIRST(&sc->aac_container_tqh); while (co != NULL) { if (co->co_found == 0) { co_next = TAILQ_NEXT(co, co_link); TAILQ_REMOVE(&sc->aac_container_tqh, co, co_link); free(co, M_AACRAIDBUF); changed = 1; co = co_next; } else { co->co_found = 0; co = TAILQ_NEXT(co, co_link); } } /* Attach the newly created containers */ if (changed) { if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, 0, AAC_CAM_TARGET_WILDCARD); } break; case AifEnEnclosureManagement: switch (aif->data.EN.data.EEE.eventType) { case AIF_EM_DRIVE_INSERTION: case AIF_EM_DRIVE_REMOVAL: channel = aif->data.EN.data.EEE.unitID; if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, ((channel>>24) & 0xF) + 1, (channel & 0xFFFF)); break; } break; case AifEnAddJBOD: case AifEnDeleteJBOD: case AifRawDeviceRemove: channel = aif->data.EN.data.ECE.container; if (sc->cam_rescan_cb != NULL) sc->cam_rescan_cb(sc, ((channel>>24) & 0xF) + 1, AAC_CAM_TARGET_WILDCARD); break; default: break; } default: break; } /* Copy the AIF data to the AIF queue for ioctl retrieval */ current = sc->aifq_idx; next = (current + 1) % AAC_AIFQ_LENGTH; if (next == 0) sc->aifq_filled = 1; bcopy(fib, &sc->aac_aifq[current], sizeof(struct aac_fib)); /* modify AIF contexts */ if (sc->aifq_filled) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (next == ctx->ctx_idx) ctx->ctx_wrap = 1; else if (current == ctx->ctx_idx && ctx->ctx_wrap) ctx->ctx_idx = next; } } sc->aifq_idx = next; /* On the off chance that someone is sleeping for an aif... */ if (sc->aac_state & AAC_STATE_AIF_SLEEPER) wakeup(sc->aac_aifq); /* Wakeup any poll()ers */ selwakeuppri(&sc->rcv_select, PRIBIO); return; } /* * Return the Revision of the driver to userspace and check to see if the * userspace app is possibly compatible. This is extremely bogus since * our driver doesn't follow Adaptec's versioning system. Cheat by just * returning what the card reported. */ static int aac_rev_check(struct aac_softc *sc, caddr_t udata) { struct aac_rev_check rev_check; struct aac_rev_check_resp rev_check_resp; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); /* * Copyin the revision struct from userspace */ if ((error = copyin(udata, (caddr_t)&rev_check, sizeof(struct aac_rev_check))) != 0) { return error; } fwprintf(sc, HBA_FLAGS_DBG_IOCTL_COMMANDS_B, "Userland revision= %d\n", rev_check.callingRevision.buildNumber); /* * Doctor up the response struct. */ rev_check_resp.possiblyCompatible = 1; rev_check_resp.adapterSWRevision.external.comp.major = AAC_DRIVER_MAJOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.minor = AAC_DRIVER_MINOR_VERSION; rev_check_resp.adapterSWRevision.external.comp.type = AAC_DRIVER_TYPE; rev_check_resp.adapterSWRevision.external.comp.dash = AAC_DRIVER_BUGFIX_LEVEL; rev_check_resp.adapterSWRevision.buildNumber = AAC_DRIVER_BUILD; return(copyout((caddr_t)&rev_check_resp, udata, sizeof(struct aac_rev_check_resp))); } /* * Pass the fib context to the caller */ static int aac_open_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *fibctx, *ctx; int error = 0; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); fibctx = malloc(sizeof(struct aac_fib_context), M_AACRAIDBUF, M_NOWAIT|M_ZERO); if (fibctx == NULL) return (ENOMEM); mtx_lock(&sc->aac_io_lock); /* all elements are already 0, add to queue */ if (sc->fibctx == NULL) sc->fibctx = fibctx; else { for (ctx = sc->fibctx; ctx->next; ctx = ctx->next) ; ctx->next = fibctx; fibctx->prev = ctx; } /* evaluate unique value */ fibctx->unique = (*(u_int32_t *)&fibctx & 0xffffffff); ctx = sc->fibctx; while (ctx != fibctx) { if (ctx->unique == fibctx->unique) { fibctx->unique++; ctx = sc->fibctx; } else { ctx = ctx->next; } } error = copyout(&fibctx->unique, (void *)arg, sizeof(u_int32_t)); mtx_unlock(&sc->aac_io_lock); if (error) aac_close_aif(sc, (caddr_t)ctx); return error; } /* * Close the caller's fib context */ static int aac_close_aif(struct aac_softc *sc, caddr_t arg) { struct aac_fib_context *ctx; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (ctx->unique == *(uint32_t *)&arg) { if (ctx == sc->fibctx) sc->fibctx = NULL; else { ctx->prev->next = ctx->next; if (ctx->next) ctx->next->prev = ctx->prev; } break; } } if (ctx) free(ctx, M_AACRAIDBUF); mtx_unlock(&sc->aac_io_lock); return 0; } /* * Pass the caller the next AIF in their queue */ static int aac_getnext_aif(struct aac_softc *sc, caddr_t arg) { struct get_adapter_fib_ioctl agf; struct aac_fib_context *ctx; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); if ((error = copyin(arg, &agf, sizeof(agf))) == 0) { for (ctx = sc->fibctx; ctx; ctx = ctx->next) { if (agf.AdapterFibContext == ctx->unique) break; } if (!ctx) { mtx_unlock(&sc->aac_io_lock); return (EFAULT); } error = aac_return_aif(sc, ctx, agf.AifFib); if (error == EAGAIN && agf.Wait) { fwprintf(sc, HBA_FLAGS_DBG_AIF_B, "aac_getnext_aif(): waiting for AIF"); sc->aac_state |= AAC_STATE_AIF_SLEEPER; while (error == EAGAIN) { mtx_unlock(&sc->aac_io_lock); error = tsleep(sc->aac_aifq, PRIBIO | PCATCH, "aacaif", 0); mtx_lock(&sc->aac_io_lock); if (error == 0) error = aac_return_aif(sc, ctx, agf.AifFib); } sc->aac_state &= ~AAC_STATE_AIF_SLEEPER; } } mtx_unlock(&sc->aac_io_lock); return(error); } /* * Hand the next AIF off the top of the queue out to userspace. */ static int aac_return_aif(struct aac_softc *sc, struct aac_fib_context *ctx, caddr_t uptr) { int current, error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); current = ctx->ctx_idx; if (current == sc->aifq_idx && !ctx->ctx_wrap) { /* empty */ return (EAGAIN); } error = copyout(&sc->aac_aifq[current], (void *)uptr, sizeof(struct aac_fib)); if (error) device_printf(sc->aac_dev, "aac_return_aif: copyout returned %d\n", error); else { ctx->ctx_wrap = 0; ctx->ctx_idx = (current + 1) % AAC_AIFQ_LENGTH; } return(error); } static int aac_get_pci_info(struct aac_softc *sc, caddr_t uptr) { struct aac_pci_info { u_int32_t bus; u_int32_t slot; } pciinf; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); pciinf.bus = pci_get_bus(sc->aac_dev); pciinf.slot = pci_get_slot(sc->aac_dev); error = copyout((caddr_t)&pciinf, uptr, sizeof(struct aac_pci_info)); return (error); } static int aac_supported_features(struct aac_softc *sc, caddr_t uptr) { struct aac_features f; int error; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); if ((error = copyin(uptr, &f, sizeof (f))) != 0) return (error); /* * When the management driver receives FSACTL_GET_FEATURES ioctl with * ALL zero in the featuresState, the driver will return the current * state of all the supported features, the data field will not be * valid. * When the management driver receives FSACTL_GET_FEATURES ioctl with * a specific bit set in the featuresState, the driver will return the * current state of this specific feature and whatever data that are * associated with the feature in the data field or perform whatever * action needed indicates in the data field. */ if (f.feat.fValue == 0) { f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; f.feat.fBits.JBODSupport = 1; /* TODO: In the future, add other features state here as well */ } else { if (f.feat.fBits.largeLBA) f.feat.fBits.largeLBA = (sc->flags & AAC_FLAGS_LBA_64BIT) ? 1 : 0; /* TODO: Add other features state and data in the future */ } error = copyout(&f, uptr, sizeof (f)); return (error); } /* * Give the userland some information about the container. The AAC arch * expects the driver to be a SCSI passthrough type driver, so it expects * the containers to have b:t:l numbers. Fake it. */ static int aac_query_disk(struct aac_softc *sc, caddr_t uptr) { struct aac_query_disk query_disk; struct aac_container *co; int error, id; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_lock(&sc->aac_io_lock); error = copyin(uptr, (caddr_t)&query_disk, sizeof(struct aac_query_disk)); if (error) { mtx_unlock(&sc->aac_io_lock); return (error); } id = query_disk.ContainerNumber; if (id == -1) { mtx_unlock(&sc->aac_io_lock); return (EINVAL); } TAILQ_FOREACH(co, &sc->aac_container_tqh, co_link) { if (co->co_mntobj.ObjectId == id) break; } if (co == NULL) { query_disk.Valid = 0; query_disk.Locked = 0; query_disk.Deleted = 1; /* XXX is this right? */ } else { query_disk.Valid = 1; query_disk.Locked = 1; query_disk.Deleted = 0; query_disk.Bus = device_get_unit(sc->aac_dev); query_disk.Target = 0; query_disk.Lun = 0; query_disk.UnMapped = 0; } error = copyout((caddr_t)&query_disk, uptr, sizeof(struct aac_query_disk)); mtx_unlock(&sc->aac_io_lock); return (error); } static void aac_container_bus(struct aac_softc *sc) { struct aac_sim *sim; device_t child; sim =(struct aac_sim *)malloc(sizeof(struct aac_sim), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (sim == NULL) { device_printf(sc->aac_dev, "No memory to add container bus\n"); panic("Out of memory?!"); } child = device_add_child(sc->aac_dev, "aacraidp", -1); if (child == NULL) { device_printf(sc->aac_dev, "device_add_child failed for container bus\n"); free(sim, M_AACRAIDBUF); panic("Out of memory?!"); } sim->TargetsPerBus = AAC_MAX_CONTAINERS; sim->BusNumber = 0; sim->BusType = CONTAINER_BUS; sim->InitiatorBusId = -1; sim->aac_sc = sc; sim->sim_dev = child; sim->aac_cam = NULL; device_set_ivars(child, sim); device_set_desc(child, "Container Bus"); TAILQ_INSERT_TAIL(&sc->aac_sim_tqh, sim, sim_link); /* device_set_desc(child, aac_describe_code(aac_container_types, mir->MntTable[0].VolType)); */ bus_generic_attach(sc->aac_dev); } static void aac_get_bus_info(struct aac_softc *sc) { struct aac_fib *fib; struct aac_ctcfg *c_cmd; struct aac_ctcfg_resp *c_resp; struct aac_vmioctl *vmi; struct aac_vmi_businf_resp *vmi_resp; struct aac_getbusinf businfo; struct aac_sim *caminf; device_t child; int i, error; mtx_lock(&sc->aac_io_lock); aac_alloc_sync_fib(sc, &fib); c_cmd = (struct aac_ctcfg *)&fib->data[0]; bzero(c_cmd, sizeof(struct aac_ctcfg)); c_cmd->Command = VM_ContainerConfig; c_cmd->cmd = CT_GET_SCSI_METHOD; c_cmd->param = 0; error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_ctcfg)); if (error) { device_printf(sc->aac_dev, "Error %d sending " "VM_ContainerConfig command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } c_resp = (struct aac_ctcfg_resp *)&fib->data[0]; if (c_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_ContainerConfig returned 0x%x\n", c_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } sc->scsi_method_id = c_resp->param; vmi = (struct aac_vmioctl *)&fib->data[0]; bzero(vmi, sizeof(struct aac_vmioctl)); vmi->Command = VM_Ioctl; vmi->ObjType = FT_DRIVE; vmi->MethId = sc->scsi_method_id; vmi->ObjId = 0; vmi->IoctlCmd = GetBusInfo; error = aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof(struct aac_vmi_businf_resp)); if (error) { device_printf(sc->aac_dev, "Error %d sending VMIoctl command\n", error); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } vmi_resp = (struct aac_vmi_businf_resp *)&fib->data[0]; if (vmi_resp->Status != ST_OK) { device_printf(sc->aac_dev, "VM_Ioctl returned %d\n", vmi_resp->Status); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); return; } bcopy(&vmi_resp->BusInf, &businfo, sizeof(struct aac_getbusinf)); aac_release_sync_fib(sc); mtx_unlock(&sc->aac_io_lock); for (i = 0; i < businfo.BusCount; i++) { if (businfo.BusValid[i] != AAC_BUS_VALID) continue; caminf = (struct aac_sim *)malloc( sizeof(struct aac_sim), M_AACRAIDBUF, M_NOWAIT | M_ZERO); if (caminf == NULL) { device_printf(sc->aac_dev, "No memory to add passthrough bus %d\n", i); break; } child = device_add_child(sc->aac_dev, "aacraidp", -1); if (child == NULL) { device_printf(sc->aac_dev, "device_add_child failed for passthrough bus %d\n", i); free(caminf, M_AACRAIDBUF); break; } caminf->TargetsPerBus = businfo.TargetsPerBus; caminf->BusNumber = i+1; caminf->BusType = PASSTHROUGH_BUS; caminf->InitiatorBusId = businfo.InitiatorBusId[i]; caminf->aac_sc = sc; caminf->sim_dev = child; caminf->aac_cam = NULL; device_set_ivars(child, caminf); device_set_desc(child, "SCSI Passthrough Bus"); TAILQ_INSERT_TAIL(&sc->aac_sim_tqh, caminf, sim_link); } } /* * Check to see if the kernel is up and running. If we are in a * BlinkLED state, return the BlinkLED code. */ static u_int32_t aac_check_adapter_health(struct aac_softc *sc, u_int8_t *bled) { u_int32_t ret; ret = AAC_GET_FWSTATUS(sc); if (ret & AAC_UP_AND_RUNNING) ret = 0; else if (ret & AAC_KERNEL_PANIC && bled) *bled = (ret >> 16) & 0xff; return (ret); } /* * Once do an IOP reset, basically have to re-initialize the card as * if coming up from a cold boot, and the driver is responsible for * any IO that was outstanding to the adapter at the time of the IOP * RESET. And prepare the driver for IOP RESET by making the init code * modular with the ability to call it from multiple places. */ static int aac_reset_adapter(struct aac_softc *sc) { struct aac_command *cm; struct aac_fib *fib; struct aac_pause_command *pc; u_int32_t status, reset_mask, waitCount, max_msix_orig; int msi_enabled_orig; fwprintf(sc, HBA_FLAGS_DBG_FUNCTION_ENTRY_B, ""); mtx_assert(&sc->aac_io_lock, MA_OWNED); if (sc->aac_state & AAC_STATE_RESET) { device_printf(sc->aac_dev, "aac_reset_adapter() already in progress\n"); return (EINVAL); } sc->aac_state |= AAC_STATE_RESET; /* disable interrupt */ AAC_ACCESS_DEVREG(sc, AAC_DISABLE_INTERRUPT); /* * Abort all pending commands: * a) on the controller */ while ((cm = aac_dequeue_busy(sc)) != NULL) { cm->cm_flags |= AAC_CMD_RESET; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this * command */ wakeup(cm); } } /* b) in the waiting queues */ while ((cm = aac_dequeue_ready(sc)) != NULL) { cm->cm_flags |= AAC_CMD_RESET; /* is there a completion handler? */ if (cm->cm_complete != NULL) { cm->cm_complete(cm); } else { /* assume that someone is sleeping on this * command */ wakeup(cm); } } /* flush drives */ if (aac_check_adapter_health(sc, NULL) == 0) { mtx_unlock(&sc->aac_io_lock); (void) aacraid_shutdown(sc->aac_dev); mtx_lock(&sc->aac_io_lock); } /* execute IOP reset */ if (sc->aac_support_opt2 & AAC_SUPPORTED_MU_RESET) { AAC_MEM0_SETREG4(sc, AAC_IRCSR, AAC_IRCSR_CORES_RST); /* We need to wait for 5 seconds before accessing the MU again * 10000 * 100us = 1000,000us = 1000ms = 1s */ waitCount = 5 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } else if ((aacraid_sync_command(sc, AAC_IOP_RESET_ALWAYS, 0, 0, 0, 0, &status, &reset_mask)) != 0) { /* call IOP_RESET for older firmware */ if ((aacraid_sync_command(sc, AAC_IOP_RESET, 0, 0, 0, 0, &status, NULL)) != 0) { if (status == AAC_SRB_STS_INVALID_REQUEST) device_printf(sc->aac_dev, "IOP_RESET not supported\n"); else /* probably timeout */ device_printf(sc->aac_dev, "IOP_RESET failed\n"); /* unwind aac_shutdown() */ aac_alloc_sync_fib(sc, &fib); pc = (struct aac_pause_command *)&fib->data[0]; pc->Command = VM_ContainerConfig; pc->Type = CT_PAUSE_IO; pc->Timeout = 1; pc->Min = 1; pc->NoRescan = 1; (void) aac_sync_fib(sc, ContainerCommand, 0, fib, sizeof (struct aac_pause_command)); aac_release_sync_fib(sc); goto finish; } } else if (sc->aac_support_opt2 & AAC_SUPPORTED_DOORBELL_RESET) { AAC_MEM0_SETREG4(sc, AAC_SRC_IDBR, reset_mask); /* * We need to wait for 5 seconds before accessing the doorbell * again, 10000 * 100us = 1000,000us = 1000ms = 1s */ waitCount = 5 * 10000; while (waitCount) { DELAY(100); /* delay 100 microseconds */ waitCount--; } } /* * Initialize the adapter. */ max_msix_orig = sc->aac_max_msix; msi_enabled_orig = sc->msi_enabled; sc->msi_enabled = FALSE; if (aac_check_firmware(sc) != 0) goto finish; if (!(sc->flags & AAC_FLAGS_SYNC_MODE)) { sc->aac_max_msix = max_msix_orig; if (msi_enabled_orig) { sc->msi_enabled = msi_enabled_orig; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_MSIX); } mtx_unlock(&sc->aac_io_lock); aac_init(sc); mtx_lock(&sc->aac_io_lock); } finish: sc->aac_state &= ~AAC_STATE_RESET; AAC_ACCESS_DEVREG(sc, AAC_ENABLE_INTERRUPT); aacraid_startio(sc); return (0); } Index: head/sys/dev/bnxt/bnxt.h =================================================================== --- head/sys/dev/bnxt/bnxt.h (revision 320527) +++ head/sys/dev/bnxt/bnxt.h (revision 320528) @@ -1,606 +1,606 @@ /*- * Broadcom NetXtreme-C/E network driver. * * Copyright (c) 2016 Broadcom, All Rights Reserved. * The term Broadcom refers to Broadcom Limited and/or its subsidiaries * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef _BNXT_H #define _BNXT_H -#include -#include -#include +#include #include #include #include + +#include #include #include #include #include #include "hsi_struct_def.h" /* PCI IDs */ #define BROADCOM_VENDOR_ID 0x14E4 #define BCM57301 0x16c8 #define BCM57302 0x16c9 #define BCM57304 0x16ca #define BCM57311 0x16ce #define BCM57312 0x16cf #define BCM57314 0x16df #define BCM57402 0x16d0 #define BCM57402_NPAR 0x16d4 #define BCM57404 0x16d1 #define BCM57404_NPAR 0x16e7 #define BCM57406 0x16d2 #define BCM57406_NPAR 0x16e8 #define BCM57407 0x16d5 #define BCM57407_NPAR 0x16ea #define BCM57407_SFP 0x16e9 #define BCM57412 0x16d6 #define BCM57412_NPAR1 0x16de #define BCM57412_NPAR2 0x16eb #define BCM57414 0x16d7 #define BCM57414_NPAR1 0x16ec #define BCM57414_NPAR2 0x16ed #define BCM57416 0x16d8 #define BCM57416_NPAR1 0x16ee #define BCM57416_NPAR2 0x16ef #define BCM57416_SFP 0x16e3 #define BCM57417 0x16d9 #define BCM57417_NPAR1 0x16c0 #define BCM57417_NPAR2 0x16cc #define BCM57417_SFP 0x16e2 #define BCM57454 0x1614 #define BCM58700 0x16cd #define NETXTREME_C_VF1 0x16cb #define NETXTREME_C_VF2 0x16e1 #define NETXTREME_C_VF3 0x16e5 #define NETXTREME_E_VF1 0x16c1 #define NETXTREME_E_VF2 0x16d3 #define NETXTREME_E_VF3 0x16dc #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) #define BNXT_MAX_MTU 9000 #define BNXT_RSS_HASH_TYPE_TCPV4 0 #define BNXT_RSS_HASH_TYPE_UDPV4 1 #define BNXT_RSS_HASH_TYPE_IPV4 2 #define BNXT_RSS_HASH_TYPE_TCPV6 3 #define BNXT_RSS_HASH_TYPE_UDPV6 4 #define BNXT_RSS_HASH_TYPE_IPV6 5 #define BNXT_GET_RSS_PROFILE_ID(rss_hash_type) ((rss_hash_type >> 1) & 0x1F) #define BNXT_NO_MORE_WOL_FILTERS 0xFFFF #define bnxt_wol_supported(softc) ((softc)->flags & BNXT_FLAG_WOL_CAP) /* Completion related defines */ #define CMP_VALID(cmp, v_bit) \ ((!!(((struct cmpl_base *)(cmp))->info3_v & htole32(CMPL_BASE_V))) == !!(v_bit) ) #define NEXT_CP_CONS_V(ring, cons, v_bit) do { \ if (__predict_false(++(cons) == (ring)->ring_size)) \ ((cons) = 0, (v_bit) = !v_bit); \ } while (0) #define RING_NEXT(ring, idx) (__predict_false(idx + 1 == (ring)->ring_size) ? \ 0 : idx + 1) #define CMPL_PREFETCH_NEXT(cpr, idx) \ __builtin_prefetch(&((struct cmpl_base *)(cpr)->ring.vaddr)[((idx) +\ (CACHE_LINE_SIZE / sizeof(struct cmpl_base))) & \ ((cpr)->ring.ring_size - 1)]) /* * If we update the index, a write barrier is needed after the write to ensure * the completion ring has space before the RX/TX ring does. Since we can't * make the RX and AG doorbells covered by the same barrier without remapping * MSI-X vectors, we create the barrier over the enture doorbell bar. * TODO: Remap the MSI-X vectors to allow a barrier to only cover the doorbells * for a single ring group. * * A barrier of just the size of the write is used to ensure the ordering * remains correct and no writes are lost. */ #define BNXT_CP_DISABLE_DB(ring) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, 0, \ (ring)->softc->doorbell_bar.size, BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, \ htole32(CMPL_DOORBELL_KEY_CMPL | CMPL_DOORBELL_MASK)); \ } while (0) #define BNXT_CP_ENABLE_DB(ring) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, 0, \ (ring)->softc->doorbell_bar.size, BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, \ htole32(CMPL_DOORBELL_KEY_CMPL)); \ } while (0) #define BNXT_CP_IDX_ENABLE_DB(ring, cons) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, \ htole32(CMPL_DOORBELL_KEY_CMPL | CMPL_DOORBELL_IDX_VALID | \ (cons))); \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, 0, \ (ring)->softc->doorbell_bar.size, BUS_SPACE_BARRIER_WRITE); \ } while (0) #define BNXT_CP_IDX_DISABLE_DB(ring, cons) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, \ htole32(CMPL_DOORBELL_KEY_CMPL | CMPL_DOORBELL_IDX_VALID | \ CMPL_DOORBELL_MASK | (cons))); \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, 0, \ (ring)->softc->doorbell_bar.size, BUS_SPACE_BARRIER_WRITE); \ } while (0) #define BNXT_TX_DB(ring, idx) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4( \ (ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, \ (ring)->doorbell, htole32(TX_DOORBELL_KEY_TX | (idx))); \ } while (0) #define BNXT_RX_DB(ring, idx) do { \ bus_space_barrier((ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, (ring)->doorbell, 4, \ BUS_SPACE_BARRIER_WRITE); \ bus_space_write_4( \ (ring)->softc->doorbell_bar.tag, \ (ring)->softc->doorbell_bar.handle, \ (ring)->doorbell, htole32(RX_DOORBELL_KEY_RX | (idx))); \ } while (0) /* Lock macros */ #define BNXT_HWRM_LOCK_INIT(_softc, _name) \ mtx_init(&(_softc)->hwrm_lock, _name, "BNXT HWRM Lock", MTX_DEF) #define BNXT_HWRM_LOCK(_softc) mtx_lock(&(_softc)->hwrm_lock) #define BNXT_HWRM_UNLOCK(_softc) mtx_unlock(&(_softc)->hwrm_lock) #define BNXT_HWRM_LOCK_DESTROY(_softc) mtx_destroy(&(_softc)->hwrm_lock) #define BNXT_HWRM_LOCK_ASSERT(_softc) mtx_assert(&(_softc)->hwrm_lock, \ MA_OWNED) /* Chip info */ #define BNXT_TSO_SIZE UINT16_MAX /* NVRAM access */ enum bnxt_nvm_directory_type { BNX_DIR_TYPE_UNUSED = 0, BNX_DIR_TYPE_PKG_LOG = 1, BNX_DIR_TYPE_UPDATE = 2, BNX_DIR_TYPE_CHIMP_PATCH = 3, BNX_DIR_TYPE_BOOTCODE = 4, BNX_DIR_TYPE_VPD = 5, BNX_DIR_TYPE_EXP_ROM_MBA = 6, BNX_DIR_TYPE_AVS = 7, BNX_DIR_TYPE_PCIE = 8, BNX_DIR_TYPE_PORT_MACRO = 9, BNX_DIR_TYPE_APE_FW = 10, BNX_DIR_TYPE_APE_PATCH = 11, BNX_DIR_TYPE_KONG_FW = 12, BNX_DIR_TYPE_KONG_PATCH = 13, BNX_DIR_TYPE_BONO_FW = 14, BNX_DIR_TYPE_BONO_PATCH = 15, BNX_DIR_TYPE_TANG_FW = 16, BNX_DIR_TYPE_TANG_PATCH = 17, BNX_DIR_TYPE_BOOTCODE_2 = 18, BNX_DIR_TYPE_CCM = 19, BNX_DIR_TYPE_PCI_CFG = 20, BNX_DIR_TYPE_TSCF_UCODE = 21, BNX_DIR_TYPE_ISCSI_BOOT = 22, BNX_DIR_TYPE_ISCSI_BOOT_IPV6 = 24, BNX_DIR_TYPE_ISCSI_BOOT_IPV4N6 = 25, BNX_DIR_TYPE_ISCSI_BOOT_CFG6 = 26, BNX_DIR_TYPE_EXT_PHY = 27, BNX_DIR_TYPE_SHARED_CFG = 40, BNX_DIR_TYPE_PORT_CFG = 41, BNX_DIR_TYPE_FUNC_CFG = 42, BNX_DIR_TYPE_MGMT_CFG = 48, BNX_DIR_TYPE_MGMT_DATA = 49, BNX_DIR_TYPE_MGMT_WEB_DATA = 50, BNX_DIR_TYPE_MGMT_WEB_META = 51, BNX_DIR_TYPE_MGMT_EVENT_LOG = 52, BNX_DIR_TYPE_MGMT_AUDIT_LOG = 53 }; enum bnxnvm_pkglog_field_index { BNX_PKG_LOG_FIELD_IDX_INSTALLED_TIMESTAMP = 0, BNX_PKG_LOG_FIELD_IDX_PKG_DESCRIPTION = 1, BNX_PKG_LOG_FIELD_IDX_PKG_VERSION = 2, BNX_PKG_LOG_FIELD_IDX_PKG_TIMESTAMP = 3, BNX_PKG_LOG_FIELD_IDX_PKG_CHECKSUM = 4, BNX_PKG_LOG_FIELD_IDX_INSTALLED_ITEMS = 5, BNX_PKG_LOG_FIELD_IDX_INSTALLED_MASK = 6 }; #define BNX_DIR_ORDINAL_FIRST 0 #define BNX_DIR_EXT_NONE 0 struct bnxt_bar_info { struct resource *res; bus_space_tag_t tag; bus_space_handle_t handle; bus_size_t size; int rid; }; struct bnxt_link_info { uint8_t media_type; uint8_t transceiver; uint8_t phy_addr; uint8_t phy_link_status; uint8_t wire_speed; uint8_t loop_back; uint8_t link_up; uint8_t last_link_up; uint8_t duplex; uint8_t last_duplex; uint8_t pause; uint8_t last_pause; uint8_t auto_pause; uint8_t force_pause; uint8_t duplex_setting; uint8_t auto_mode; #define PHY_VER_LEN 3 uint8_t phy_ver[PHY_VER_LEN]; uint8_t phy_type; uint16_t link_speed; uint16_t support_speeds; uint16_t auto_link_speeds; uint16_t auto_link_speed; uint16_t force_link_speed; uint32_t preemphasis; /* copy of requested setting */ uint8_t autoneg; #define BNXT_AUTONEG_SPEED 1 #define BNXT_AUTONEG_FLOW_CTRL 2 uint8_t req_duplex; uint8_t req_flow_ctrl; uint16_t req_link_speed; }; enum bnxt_cp_type { BNXT_DEFAULT, BNXT_TX, BNXT_RX, BNXT_SHARED }; struct bnxt_cos_queue { uint8_t id; uint8_t profile; }; struct bnxt_func_info { uint32_t fw_fid; uint8_t mac_addr[ETHER_ADDR_LEN]; uint16_t max_rsscos_ctxs; uint16_t max_cp_rings; uint16_t max_tx_rings; uint16_t max_rx_rings; uint16_t max_hw_ring_grps; uint16_t max_irqs; uint16_t max_l2_ctxs; uint16_t max_vnics; uint16_t max_stat_ctxs; }; struct bnxt_pf_info { #define BNXT_FIRST_PF_FID 1 #define BNXT_FIRST_VF_FID 128 uint8_t port_id; uint32_t first_vf_id; uint16_t active_vfs; uint16_t max_vfs; uint32_t max_encap_records; uint32_t max_decap_records; uint32_t max_tx_em_flows; uint32_t max_tx_wm_flows; uint32_t max_rx_em_flows; uint32_t max_rx_wm_flows; unsigned long *vf_event_bmap; uint16_t hwrm_cmd_req_pages; void *hwrm_cmd_req_addr[4]; bus_addr_t hwrm_cmd_req_dma_addr[4]; }; struct bnxt_vf_info { uint16_t fw_fid; uint8_t mac_addr[ETHER_ADDR_LEN]; uint16_t max_rsscos_ctxs; uint16_t max_cp_rings; uint16_t max_tx_rings; uint16_t max_rx_rings; uint16_t max_hw_ring_grps; uint16_t max_l2_ctxs; uint16_t max_irqs; uint16_t max_vnics; uint16_t max_stat_ctxs; uint32_t vlan; #define BNXT_VF_QOS 0x1 #define BNXT_VF_SPOOFCHK 0x2 #define BNXT_VF_LINK_FORCED 0x4 #define BNXT_VF_LINK_UP 0x8 uint32_t flags; uint32_t func_flags; /* func cfg flags */ uint32_t min_tx_rate; uint32_t max_tx_rate; void *hwrm_cmd_req_addr; bus_addr_t hwrm_cmd_req_dma_addr; }; #define BNXT_FLAG_VF (1<<1) #define BNXT_PF(softc) (!((softc)->flags & BNXT_FLAG_VF)) #define BNXT_VF(softc) ((softc)->flags & BNXT_FLAG_VF) struct bnxt_vlan_tag { SLIST_ENTRY(bnxt_vlan_tag) next; uint16_t tpid; uint16_t tag; }; struct bnxt_vnic_info { uint16_t id; uint16_t def_ring_grp; uint16_t cos_rule; uint16_t lb_rule; uint16_t mru; uint32_t rx_mask; bool vlan_only; struct iflib_dma_info mc_list; int mc_list_count; #define BNXT_MAX_MC_ADDRS 16 uint32_t flags; #define BNXT_VNIC_FLAG_DEFAULT 0x01 #define BNXT_VNIC_FLAG_BD_STALL 0x02 #define BNXT_VNIC_FLAG_VLAN_STRIP 0x04 uint64_t filter_id; uint32_t flow_id; uint16_t rss_id; uint32_t rss_hash_type; uint8_t rss_hash_key[HW_HASH_KEY_SIZE]; struct iflib_dma_info rss_hash_key_tbl; struct iflib_dma_info rss_grp_tbl; SLIST_HEAD(vlan_head, bnxt_vlan_tag) vlan_tags; struct iflib_dma_info vlan_tag_list; }; struct bnxt_grp_info { uint16_t stats_ctx; uint16_t grp_id; uint16_t rx_ring_id; uint16_t cp_ring_id; uint16_t ag_ring_id; }; struct bnxt_ring { uint64_t paddr; vm_offset_t doorbell; caddr_t vaddr; struct bnxt_softc *softc; uint32_t ring_size; /* Must be a power of two */ uint16_t id; /* Logical ID */ uint16_t phys_id; }; struct bnxt_cp_ring { struct bnxt_ring ring; struct if_irq irq; uint32_t cons; bool v_bit; /* Value of valid bit */ struct ctx_hw_stats *stats; uint32_t stats_ctx_id; uint32_t last_idx; /* Used by RX rings only * set to the last read pidx */ }; struct bnxt_full_tpa_start { struct rx_tpa_start_cmpl low; struct rx_tpa_start_cmpl_hi high; }; /* All the version information for the part */ #define BNXT_VERSTR_SIZE (3*3+2+1) /* ie: "255.255.255\0" */ #define BNXT_NAME_SIZE 17 struct bnxt_ver_info { uint8_t hwrm_if_major; uint8_t hwrm_if_minor; uint8_t hwrm_if_update; char hwrm_if_ver[BNXT_VERSTR_SIZE]; char driver_hwrm_if_ver[BNXT_VERSTR_SIZE]; char hwrm_fw_ver[BNXT_VERSTR_SIZE]; char mgmt_fw_ver[BNXT_VERSTR_SIZE]; char netctrl_fw_ver[BNXT_VERSTR_SIZE]; char roce_fw_ver[BNXT_VERSTR_SIZE]; char phy_ver[BNXT_VERSTR_SIZE]; char pkg_ver[64]; char hwrm_fw_name[BNXT_NAME_SIZE]; char mgmt_fw_name[BNXT_NAME_SIZE]; char netctrl_fw_name[BNXT_NAME_SIZE]; char roce_fw_name[BNXT_NAME_SIZE]; char phy_vendor[BNXT_NAME_SIZE]; char phy_partnumber[BNXT_NAME_SIZE]; uint16_t chip_num; uint8_t chip_rev; uint8_t chip_metal; uint8_t chip_bond_id; uint8_t chip_type; uint8_t hwrm_min_major; uint8_t hwrm_min_minor; uint8_t hwrm_min_update; struct sysctl_ctx_list ver_ctx; struct sysctl_oid *ver_oid; }; struct bnxt_nvram_info { uint16_t mfg_id; uint16_t device_id; uint32_t sector_size; uint32_t size; uint32_t reserved_size; uint32_t available_size; struct sysctl_ctx_list nvm_ctx; struct sysctl_oid *nvm_oid; }; struct bnxt_softc { device_t dev; if_ctx_t ctx; if_softc_ctx_t scctx; if_shared_ctx_t sctx; struct ifmedia *media; struct bnxt_bar_info hwrm_bar; struct bnxt_bar_info doorbell_bar; struct bnxt_link_info link_info; #define BNXT_FLAG_NPAR 0x1 #define BNXT_FLAG_WOL_CAP 0x2 uint32_t flags; uint32_t total_msix; struct bnxt_func_info func; struct bnxt_pf_info pf; struct bnxt_vf_info vf; uint16_t hwrm_cmd_seq; uint32_t hwrm_cmd_timeo; /* milliseconds */ struct iflib_dma_info hwrm_cmd_resp; /* Interrupt info for HWRM */ struct if_irq irq; struct mtx hwrm_lock; uint16_t hwrm_max_req_len; #define BNXT_MAX_QUEUE 8 uint8_t max_tc; struct bnxt_cos_queue q_info[BNXT_MAX_QUEUE]; struct iflib_dma_info hw_rx_port_stats; struct iflib_dma_info hw_tx_port_stats; struct rx_port_stats *rx_port_stats; struct tx_port_stats *tx_port_stats; int num_cp_rings; struct bnxt_ring *tx_rings; struct bnxt_cp_ring *tx_cp_rings; struct iflib_dma_info tx_stats; int ntxqsets; struct bnxt_vnic_info vnic_info; struct bnxt_ring *ag_rings; struct bnxt_ring *rx_rings; struct bnxt_cp_ring *rx_cp_rings; struct bnxt_grp_info *grp_info; struct iflib_dma_info rx_stats; int nrxqsets; struct bnxt_cp_ring def_cp_ring; struct iflib_dma_info def_cp_ring_mem; struct grouptask def_cp_task; struct sysctl_ctx_list hw_stats; struct sysctl_oid *hw_stats_oid; struct bnxt_full_tpa_start *tpa_start; struct bnxt_ver_info *ver_info; struct bnxt_nvram_info *nvm_info; bool wol; uint8_t wol_filter_id; }; struct bnxt_filter_info { STAILQ_ENTRY(bnxt_filter_info) next; uint64_t fw_l2_filter_id; #define INVALID_MAC_INDEX ((uint16_t)-1) uint16_t mac_index; /* Filter Characteristics */ uint32_t flags; uint32_t enables; uint8_t l2_addr[ETHER_ADDR_LEN]; uint8_t l2_addr_mask[ETHER_ADDR_LEN]; uint16_t l2_ovlan; uint16_t l2_ovlan_mask; uint16_t l2_ivlan; uint16_t l2_ivlan_mask; uint8_t t_l2_addr[ETHER_ADDR_LEN]; uint8_t t_l2_addr_mask[ETHER_ADDR_LEN]; uint16_t t_l2_ovlan; uint16_t t_l2_ovlan_mask; uint16_t t_l2_ivlan; uint16_t t_l2_ivlan_mask; uint8_t tunnel_type; uint16_t mirror_vnic_id; uint32_t vni; uint8_t pri_hint; uint64_t l2_filter_id_hint; }; /* Function declarations */ void bnxt_report_link(struct bnxt_softc *softc); bool bnxt_check_hwrm_version(struct bnxt_softc *softc); #endif /* _BNXT_H */ Index: head/sys/dev/cxgb/cxgb_adapter.h =================================================================== --- head/sys/dev/cxgb/cxgb_adapter.h (revision 320527) +++ head/sys/dev/cxgb/cxgb_adapter.h (revision 320528) @@ -1,579 +1,578 @@ /************************************************************************** Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. $FreeBSD$ ***************************************************************************/ #ifndef _CXGB_ADAPTER_H_ #define _CXGB_ADAPTER_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include struct adapter; struct sge_qset; extern int cxgb_debug; #ifdef DEBUG_LOCKING #define MTX_INIT(lock, lockname, class, flags) \ do { \ printf("initializing %s at %s:%d\n", lockname, __FILE__, __LINE__); \ mtx_init((lock), lockname, class, flags); \ } while (0) #define MTX_DESTROY(lock) \ do { \ printf("destroying %s at %s:%d\n", (lock)->lock_object.lo_name, __FILE__, __LINE__); \ mtx_destroy((lock)); \ } while (0) #else #define MTX_INIT mtx_init #define MTX_DESTROY mtx_destroy #endif enum { LF_NO = 0, LF_MAYBE, LF_YES }; struct port_info { struct adapter *adapter; struct ifnet *ifp; int if_flags; int flags; const struct port_type_info *port_type; struct cphy phy; struct cmac mac; struct timeval last_refreshed; struct link_config link_config; struct ifmedia media; struct mtx lock; uint32_t port_id; uint32_t tx_chan; uint32_t txpkt_intf; uint32_t first_qset; uint32_t nqsets; int link_fault; uint8_t hw_addr[ETHER_ADDR_LEN]; struct callout link_check_ch; struct task link_check_task; struct task timer_reclaim_task; struct cdev *port_cdev; #define PORT_LOCK_NAME_LEN 32 #define PORT_NAME_LEN 32 char lockbuf[PORT_LOCK_NAME_LEN]; char namebuf[PORT_NAME_LEN]; } __aligned(L1_CACHE_BYTES); enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), USING_MSI = (1 << 1), USING_MSIX = (1 << 2), QUEUES_BOUND = (1 << 3), FW_UPTODATE = (1 << 4), TPS_UPTODATE = (1 << 5), CXGB_SHUTDOWN = (1 << 6), CXGB_OFLD_INIT = (1 << 7), TP_PARITY_INIT = (1 << 8), CXGB_BUSY = (1 << 9), TOM_INIT_DONE = (1 << 10), /* port flags */ DOOMED = (1 << 0), }; #define IS_DOOMED(p) (p->flags & DOOMED) #define SET_DOOMED(p) do {p->flags |= DOOMED;} while (0) #define IS_BUSY(sc) (sc->flags & CXGB_BUSY) #define SET_BUSY(sc) do {sc->flags |= CXGB_BUSY;} while (0) #define CLR_BUSY(sc) do {sc->flags &= ~CXGB_BUSY;} while (0) #define FL_Q_SIZE 4096 #define JUMBO_Q_SIZE 1024 #define RSPQ_Q_SIZE 2048 #define TX_ETH_Q_SIZE 1024 #define TX_OFLD_Q_SIZE 1024 #define TX_CTRL_Q_SIZE 256 enum { TXQ_ETH = 0, TXQ_OFLD = 1, TXQ_CTRL = 2, }; /* * work request size in bytes */ #define WR_LEN (WR_FLITS * 8) #define PIO_LEN (WR_LEN - sizeof(struct cpl_tx_pkt_lso)) struct lro_state { unsigned short enabled; struct lro_ctrl ctrl; }; #define RX_BUNDLE_SIZE 8 struct rsp_desc; struct sge_rspq { uint32_t credits; uint32_t size; uint32_t cidx; uint32_t gen; uint32_t polling; uint32_t holdoff_tmr; uint32_t next_holdoff; uint32_t imm_data; uint32_t async_notif; uint32_t cntxt_id; uint32_t offload_pkts; uint32_t pure_rsps; uint32_t unhandled_irqs; uint32_t starved; bus_addr_t phys_addr; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; struct t3_mbuf_hdr rspq_mh; struct rsp_desc *desc; struct mtx lock; #define RSPQ_NAME_LEN 32 char lockbuf[RSPQ_NAME_LEN]; uint32_t rspq_dump_start; uint32_t rspq_dump_count; }; struct rx_desc; struct rx_sw_desc; struct sge_fl { uint32_t buf_size; uint32_t credits; uint32_t size; uint32_t cidx; uint32_t pidx; uint32_t gen; uint32_t db_pending; bus_addr_t phys_addr; uint32_t cntxt_id; uint32_t empty; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; uma_zone_t zone; struct rx_desc *desc; struct rx_sw_desc *sdesc; int type; }; struct tx_desc; struct tx_sw_desc; #define TXQ_TRANSMITTING 0x1 struct sge_txq { uint64_t flags; uint32_t in_use; uint32_t size; uint32_t processed; uint32_t cleaned; uint32_t stop_thres; uint32_t cidx; uint32_t pidx; uint32_t gen; uint32_t unacked; uint32_t db_pending; struct tx_desc *desc; struct tx_sw_desc *sdesc; uint32_t token; bus_addr_t phys_addr; struct task qresume_task; struct task qreclaim_task; uint32_t cntxt_id; uint64_t stops; uint64_t restarts; bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; struct mbufq sendq; struct buf_ring *txq_mr; struct ifaltq *txq_ifq; struct callout txq_timer; struct callout txq_watchdog; uint64_t txq_coalesced; uint32_t txq_skipped; uint32_t txq_enqueued; uint32_t txq_dump_start; uint32_t txq_dump_count; uint64_t txq_direct_packets; uint64_t txq_direct_bytes; uint64_t txq_frees; struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1]; }; #define SGE_PSTAT_MAX (SGE_PSTAT_VLANINS+1) #define QS_EXITING 0x1 #define QS_RUNNING 0x2 #define QS_BOUND 0x4 #define QS_FLUSHING 0x8 #define QS_TIMEOUT 0x10 struct sge_qset { struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct lro_state lro; struct sge_txq txq[SGE_TXQ_PER_SET]; uint32_t txq_stopped; /* which Tx queues are stopped */ struct port_info *port; struct adapter *adap; int idx; /* qset # */ int qs_flags; int coalescing; struct cv qs_cv; struct mtx lock; #define QS_NAME_LEN 32 char namebuf[QS_NAME_LEN]; }; struct sge { struct sge_qset qs[SGE_QSETS]; struct mtx reg_lock; }; struct filter_info; typedef int (*cpl_handler_t)(struct sge_qset *, struct rsp_desc *, struct mbuf *); struct adapter { SLIST_ENTRY(adapter) link; device_t dev; int flags; /* PCI register resources */ int regs_rid; struct resource *regs_res; int udbs_rid; struct resource *udbs_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; uint32_t link_width; /* DMA resources */ bus_dma_tag_t parent_dmat; bus_dma_tag_t rx_dmat; bus_dma_tag_t rx_jumbo_dmat; bus_dma_tag_t tx_dmat; /* Interrupt resources */ struct resource *irq_res; int irq_rid; void *intr_tag; uint32_t msix_regs_rid; struct resource *msix_regs_res; struct resource *msix_irq_res[SGE_QSETS]; int msix_irq_rid[SGE_QSETS]; void *msix_intr_tag[SGE_QSETS]; uint8_t rxpkt_map[8]; /* maps RX_PKT interface values to port ids */ uint8_t rrss_map[SGE_QSETS]; /* revers RSS map table */ uint16_t rspq_map[RSS_TABLE_SIZE]; /* maps 7-bit cookie to qidx */ union { uint8_t fill[SGE_QSETS]; uint64_t coalesce; } u; #define tunq_fill u.fill #define tunq_coalesce u.coalesce struct filter_info *filters; /* Tasks */ struct task slow_intr_task; struct task tick_task; struct taskqueue *tq; struct callout cxgb_tick_ch; struct callout sge_timer_ch; /* Register lock for use by the hardware layer */ struct mtx mdio_lock; struct mtx elmer_lock; /* Bookkeeping for the hardware layer */ struct adapter_params params; unsigned int slow_intr_mask; unsigned long irq_stats[IRQ_NUM_STATS]; struct sge sge; struct mc7 pmrx; struct mc7 pmtx; struct mc7 cm; struct mc5 mc5; struct port_info port[MAX_NPORTS]; device_t portdev[MAX_NPORTS]; #ifdef TCP_OFFLOAD void *tom_softc; void *iwarp_softc; #endif char fw_version[64]; char port_types[MAX_NPORTS + 1]; uint32_t open_device_map; #ifdef TCP_OFFLOAD int offload_map; #endif struct mtx lock; driver_intr_t *cxgb_intr; int msi_count; #define ADAPTER_LOCK_NAME_LEN 32 char lockbuf[ADAPTER_LOCK_NAME_LEN]; char reglockbuf[ADAPTER_LOCK_NAME_LEN]; char mdiolockbuf[ADAPTER_LOCK_NAME_LEN]; char elmerlockbuf[ADAPTER_LOCK_NAME_LEN]; int timestamp; #ifdef TCP_OFFLOAD #define NUM_CPL_HANDLERS 0xa7 cpl_handler_t cpl_handler[NUM_CPL_HANDLERS] __aligned(CACHE_LINE_SIZE); #endif }; struct t3_rx_mode { uint32_t idx; struct port_info *port; }; #define MDIO_LOCK(adapter) mtx_lock(&(adapter)->mdio_lock) #define MDIO_UNLOCK(adapter) mtx_unlock(&(adapter)->mdio_lock) #define ELMR_LOCK(adapter) mtx_lock(&(adapter)->elmer_lock) #define ELMR_UNLOCK(adapter) mtx_unlock(&(adapter)->elmer_lock) #define PORT_LOCK(port) mtx_lock(&(port)->lock); #define PORT_UNLOCK(port) mtx_unlock(&(port)->lock); #define PORT_LOCK_INIT(port, name) mtx_init(&(port)->lock, name, 0, MTX_DEF) #define PORT_LOCK_DEINIT(port) mtx_destroy(&(port)->lock) #define PORT_LOCK_ASSERT_NOTOWNED(port) mtx_assert(&(port)->lock, MA_NOTOWNED) #define PORT_LOCK_ASSERT_OWNED(port) mtx_assert(&(port)->lock, MA_OWNED) #define ADAPTER_LOCK(adap) mtx_lock(&(adap)->lock); #define ADAPTER_UNLOCK(adap) mtx_unlock(&(adap)->lock); #define ADAPTER_LOCK_INIT(adap, name) mtx_init(&(adap)->lock, name, 0, MTX_DEF) #define ADAPTER_LOCK_DEINIT(adap) mtx_destroy(&(adap)->lock) #define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) mtx_assert(&(adap)->lock, MA_NOTOWNED) #define ADAPTER_LOCK_ASSERT_OWNED(adap) mtx_assert(&(adap)->lock, MA_OWNED) static __inline uint32_t t3_read_reg(adapter_t *adapter, uint32_t reg_addr) { return (bus_space_read_4(adapter->bt, adapter->bh, reg_addr)); } static __inline void t3_write_reg(adapter_t *adapter, uint32_t reg_addr, uint32_t val) { bus_space_write_4(adapter->bt, adapter->bh, reg_addr, val); } static __inline void t3_os_pci_read_config_4(adapter_t *adapter, int reg, uint32_t *val) { *val = pci_read_config(adapter->dev, reg, 4); } static __inline void t3_os_pci_write_config_4(adapter_t *adapter, int reg, uint32_t val) { pci_write_config(adapter->dev, reg, val, 4); } static __inline void t3_os_pci_read_config_2(adapter_t *adapter, int reg, uint16_t *val) { *val = pci_read_config(adapter->dev, reg, 2); } static __inline void t3_os_pci_write_config_2(adapter_t *adapter, int reg, uint16_t val) { pci_write_config(adapter->dev, reg, val, 2); } static __inline uint8_t * t3_get_next_mcaddr(struct t3_rx_mode *rm) { uint8_t *macaddr = NULL; struct ifnet *ifp = rm->port->ifp; struct ifmultiaddr *ifma; int i = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (i == rm->idx) { macaddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); break; } i++; } if_maddr_runlock(ifp); rm->idx++; return (macaddr); } static __inline void t3_init_rx_mode(struct t3_rx_mode *rm, struct port_info *port) { rm->idx = 0; rm->port = port; } static __inline struct port_info * adap2pinfo(struct adapter *adap, int idx) { return &adap->port[idx]; } int t3_os_find_pci_capability(adapter_t *adapter, int cap); int t3_os_pci_save_state(struct adapter *adapter); int t3_os_pci_restore_state(struct adapter *adapter); void t3_os_link_intr(struct port_info *); void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc, int mac_was_reset); void t3_os_phymod_changed(struct adapter *adap, int port_id); void t3_sge_err_intr_handler(adapter_t *adapter); #ifdef TCP_OFFLOAD int t3_offload_tx(struct adapter *, struct mbuf *); #endif void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]); int t3_mgmt_tx(adapter_t *adap, struct mbuf *m); int t3_register_cpl_handler(struct adapter *, int, cpl_handler_t); int t3_sge_alloc(struct adapter *); int t3_sge_free(struct adapter *); int t3_sge_alloc_qset(adapter_t *, uint32_t, int, int, const struct qset_params *, int, struct port_info *); void t3_free_sge_resources(adapter_t *, int); void t3_sge_start(adapter_t *); void t3_sge_stop(adapter_t *); void t3b_intr(void *data); void t3_intr_msi(void *data); void t3_intr_msix(void *data); int t3_sge_init_adapter(adapter_t *); int t3_sge_reset_adapter(adapter_t *); int t3_sge_init_port(struct port_info *); void t3_free_tx_desc(struct sge_qset *qs, int n, int qid); void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad); void t3_add_attach_sysctls(adapter_t *sc); void t3_add_configured_sysctls(adapter_t *sc); int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); /* * XXX figure out how we can return this to being private to sge */ #define desc_reclaimable(q) ((int)((q)->processed - (q)->cleaned - TX_MAX_DESC)) #define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) static __inline struct sge_qset * fl_to_qset(struct sge_fl *q, int qidx) { return container_of(q, struct sge_qset, fl[qidx]); } static __inline struct sge_qset * rspq_to_qset(struct sge_rspq *q) { return container_of(q, struct sge_qset, rspq); } static __inline struct sge_qset * txq_to_qset(struct sge_txq *q, int qidx) { return container_of(q, struct sge_qset, txq[qidx]); } #undef container_of #define OFFLOAD_DEVMAP_BIT (1 << MAX_NPORTS) static inline int offload_running(adapter_t *adapter) { return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); } void cxgb_tx_watchdog(void *arg); int cxgb_transmit(struct ifnet *ifp, struct mbuf *m); void cxgb_qflush(struct ifnet *ifp); void t3_iterate(void (*)(struct adapter *, void *), void *); void cxgb_refresh_stats(struct port_info *); #endif Index: head/sys/dev/cxgb/cxgb_main.c =================================================================== --- head/sys/dev/cxgb/cxgb_main.c (revision 320527) +++ head/sys/dev/cxgb/cxgb_main.c (revision 320528) @@ -1,3574 +1,3573 @@ /************************************************************************** Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PRIV_SUPPORTED #include #endif static int cxgb_setup_interrupts(adapter_t *); static void cxgb_teardown_interrupts(adapter_t *); static void cxgb_init(void *); static int cxgb_init_locked(struct port_info *); static int cxgb_uninit_locked(struct port_info *); static int cxgb_uninit_synchronized(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgb_media_change(struct ifnet *); static int cxgb_ifm_type(int); static void cxgb_build_medialist(struct port_info *); static void cxgb_media_status(struct ifnet *, struct ifmediareq *); static uint64_t cxgb_get_counter(struct ifnet *, ift_counter); static int setup_sge_qsets(adapter_t *); static void cxgb_async_intr(void *); static void cxgb_tick_handler(void *, int); static void cxgb_tick(void *); static void link_check_callout(void *); static void check_link_status(void *, int); static void setup_rss(adapter_t *sc); static int alloc_filters(struct adapter *); static int setup_hw_filters(struct adapter *); static int set_filter(struct adapter *, int, const struct filter_info *); static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int, unsigned int, u64, u64); #ifdef TCP_OFFLOAD static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *); #endif /* Attachment glue for the PCI controller end of the device. Each port of * the device is attached separately, as defined later. */ static int cxgb_controller_probe(device_t); static int cxgb_controller_attach(device_t); static int cxgb_controller_detach(device_t); static void cxgb_free(struct adapter *); static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end); static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf); static int cxgb_get_regs_len(void); static void touch_bars(device_t dev); static void cxgb_update_mac_settings(struct port_info *p); #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *, int); #endif static device_method_t cxgb_controller_methods[] = { DEVMETHOD(device_probe, cxgb_controller_probe), DEVMETHOD(device_attach, cxgb_controller_attach), DEVMETHOD(device_detach, cxgb_controller_detach), DEVMETHOD_END }; static driver_t cxgb_controller_driver = { "cxgbc", cxgb_controller_methods, sizeof(struct adapter) }; static int cxgbc_mod_event(module_t, int, void *); static devclass_t cxgb_controller_devclass; DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass, cxgbc_mod_event, 0); MODULE_VERSION(cxgbc, 1); MODULE_DEPEND(cxgbc, firmware, 1, 1, 1); /* * Attachment glue for the ports. Attachment is done directly to the * controller device. */ static int cxgb_port_probe(device_t); static int cxgb_port_attach(device_t); static int cxgb_port_detach(device_t); static device_method_t cxgb_port_methods[] = { DEVMETHOD(device_probe, cxgb_port_probe), DEVMETHOD(device_attach, cxgb_port_attach), DEVMETHOD(device_detach, cxgb_port_detach), { 0, 0 } }; static driver_t cxgb_port_driver = { "cxgb", cxgb_port_methods, 0 }; static d_ioctl_t cxgb_extension_ioctl; static d_open_t cxgb_extension_open; static d_close_t cxgb_extension_close; static struct cdevsw cxgb_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = cxgb_extension_open, .d_close = cxgb_extension_close, .d_ioctl = cxgb_extension_ioctl, .d_name = "cxgb", }; static devclass_t cxgb_port_devclass; DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0); MODULE_VERSION(cxgb, 1); static struct mtx t3_list_lock; static SLIST_HEAD(, adapter) t3_list; #ifdef TCP_OFFLOAD static struct mtx t3_uld_list_lock; static SLIST_HEAD(, uld_info) t3_uld_list; #endif /* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which * of these schemes the driver may consider as follows: * * msi = 2: choose from among all three options * msi = 1 : only consider MSI and pin interrupts * msi = 0: force pin interrupts */ static int msi_allowed = 2; SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "CXGB driver parameters"); SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0, "MSI-X, MSI, INTx selector"); /* * The driver uses an auto-queue algorithm by default. * To disable it and force a single queue-set per port, use multiq = 0 */ static int multiq = 1; SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0, "use min(ncpus/ports, 8) queue-sets per port"); /* * By default the driver will not update the firmware unless * it was compiled against a newer version * */ static int force_fw_update = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0, "update firmware even if up to date"); int cxgb_use_16k_clusters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN, &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue "); static int nfilters = -1; SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN, &nfilters, 0, "max number of entries in the filter table"); enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, MAX_RSPQ_ENTRIES = 16384, MAX_RX_BUFFERS = 16384, MAX_RX_JUMBO_BUFFERS = 16384, MIN_TXQ_ENTRIES = 4, MIN_CTRL_TXQ_ENTRIES = 4, MIN_RSPQ_ENTRIES = 32, MIN_FL_ENTRIES = 32, MIN_FL_JUMBO_ENTRIES = 32 }; struct filter_info { u32 sip; u32 sip_mask; u32 dip; u16 sport; u16 dport; u32 vlan:12; u32 vlan_prio:3; u32 mac_hit:1; u32 mac_idx:4; u32 mac_vld:1; u32 pkt_type:2; u32 report_filter_id:1; u32 pass:1; u32 rss:1; u32 qset:3; u32 locked:1; u32 valid:1; }; enum { FILTER_NO_VLAN_PRI = 7 }; #define EEPROM_MAGIC 0x38E2F10C #define PORT_MASK ((1 << MAX_NPORTS) - 1) /* Table for probing the cards. The desc field isn't actually used */ struct cxgb_ident { uint16_t vendor; uint16_t device; int index; char *desc; } cxgb_identifiers[] = { {PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"}, {PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"}, {PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"}, {PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"}, {PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"}, {PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"}, {PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"}, {PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"}, {PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"}, {PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"}, {PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"}, {PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"}, {PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"}, {PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"}, {0, 0, 0, NULL} }; static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset); static __inline char t3rev2char(struct adapter *adapter) { char rev = 'z'; switch(adapter->params.rev) { case T3_REV_A: rev = 'a'; break; case T3_REV_B: case T3_REV_B2: rev = 'b'; break; case T3_REV_C: rev = 'c'; break; } return rev; } static struct cxgb_ident * cxgb_get_ident(device_t dev) { struct cxgb_ident *id; for (id = cxgb_identifiers; id->desc != NULL; id++) { if ((id->vendor == pci_get_vendor(dev)) && (id->device == pci_get_device(dev))) { return (id); } } return (NULL); } static const struct adapter_info * cxgb_get_adapter_info(device_t dev) { struct cxgb_ident *id; const struct adapter_info *ai; id = cxgb_get_ident(dev); if (id == NULL) return (NULL); ai = t3_get_adapter_info(id->index); return (ai); } static int cxgb_controller_probe(device_t dev) { const struct adapter_info *ai; char *ports, buf[80]; int nports; ai = cxgb_get_adapter_info(dev); if (ai == NULL) return (ENXIO); nports = ai->nports0 + ai->nports1; if (nports == 1) ports = "port"; else ports = "ports"; snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define FW_FNAME "cxgb_t3fw" #define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom" #define TPSRAM_NAME "cxgb_t3%c_protocol_sram" static int upgrade_fw(adapter_t *sc) { const struct firmware *fw; int status; u32 vers; if ((fw = firmware_get(FW_FNAME)) == NULL) { device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME); return (ENOENT); } else device_printf(sc->dev, "installing firmware on card\n"); status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize); if (status != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", status); } else { t3_get_fw_version(sc, &vers); snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } firmware_put(fw, FIRMWARE_UNLOAD); return (status); } /* * The cxgb_controller_attach function is responsible for the initial * bringup of the device. Its responsibilities include: * * 1. Determine if the device supports MSI or MSI-X. * 2. Allocate bus resources so that we can access the Base Address Register * 3. Create and initialize mutexes for the controller and its control * logic such as SGE and MDIO. * 4. Call hardware specific setup routine for the adapter as a whole. * 5. Allocate the BAR for doing MSI-X. * 6. Setup the line interrupt iff MSI-X is not supported. * 7. Create the driver's taskq. * 8. Start one task queue service thread. * 9. Check if the firmware and SRAM are up-to-date. They will be * auto-updated later (before FULL_INIT_DONE), if required. * 10. Create a child device for each MAC (port) * 11. Initialize T3 private state. * 12. Trigger the LED * 13. Setup offload iff supported. * 14. Reset/restart the tick callout. * 15. Attach sysctls * * NOTE: Any modification or deviation from this list MUST be reflected in * the above comment. Failure to do so will result in problems on various * error conditions including link flapping. */ static int cxgb_controller_attach(device_t dev) { device_t child; const struct adapter_info *ai; struct adapter *sc; int i, error = 0; uint32_t vers; int port_qsets = 1; int msi_needed, reg; char buf[80]; sc = device_get_softc(dev); sc->dev = dev; sc->msi_count = 0; ai = cxgb_get_adapter_info(dev); snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d", device_get_unit(dev)); ADAPTER_LOCK_INIT(sc, sc->lockbuf); snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d", device_get_unit(dev)); snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d", device_get_unit(dev)); snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d", device_get_unit(dev)); MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN); MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF); MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF); mtx_lock(&t3_list_lock); SLIST_INSERT_HEAD(&t3_list, sc, link); mtx_unlock(&t3_list_lock); /* find the PCIe link width and set max read request to 4KB*/ if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { uint16_t lnk; lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2); sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4; if (sc->link_width < 8 && (ai->caps & SUPPORTED_10000baseT_Full)) { device_printf(sc->dev, "PCIe x%d Link, expect reduced performance\n", sc->link_width); } pci_set_max_read_req(dev, 4096); } touch_bars(dev); pci_enable_busmaster(dev); /* * Allocate the registers and make them available to the driver. * The registers that we care about for NIC mode are in BAR 0 */ sc->regs_rid = PCIR_BAR(0); if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE)) == NULL) { device_printf(dev, "Cannot allocate BAR region 0\n"); error = ENXIO; goto out; } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); for (i = 0; i < MAX_NPORTS; i++) sc->port[i].adapter = sc; if (t3_prep_adapter(sc, ai, 1) < 0) { printf("prep adapter failed\n"); error = ENODEV; goto out; } sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = NULL; if (is_offload(sc) && ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE)) == NULL)) { device_printf(dev, "Cannot allocate BAR region 1\n"); error = ENXIO; goto out; } /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate * enough messages for the queue sets. If that fails, try falling * back to MSI. If that fails, then try falling back to the legacy * interrupt pin model. */ sc->msix_regs_rid = 0x20; if ((msi_allowed >= 2) && (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->msix_regs_rid, RF_ACTIVE)) != NULL) { if (multiq) port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus); msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1; if (pci_msix_count(dev) == 0 || (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 || sc->msi_count != msi_needed) { device_printf(dev, "alloc msix failed - " "msi_count=%d, msi_needed=%d, err=%d; " "will try MSI\n", sc->msi_count, msi_needed, error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); sc->msix_regs_res = NULL; } else { sc->flags |= USING_MSIX; sc->cxgb_intr = cxgb_async_intr; device_printf(dev, "using MSI-X interrupts (%u vectors)\n", sc->msi_count); } } if ((msi_allowed >= 1) && (sc->msi_count == 0)) { sc->msi_count = 1; if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) { device_printf(dev, "alloc msi failed - " "err=%d; will try INTx\n", error); sc->msi_count = 0; port_qsets = 1; pci_release_msi(dev); } else { sc->flags |= USING_MSI; sc->cxgb_intr = t3_intr_msi; device_printf(dev, "using MSI interrupts\n"); } } if (sc->msi_count == 0) { device_printf(dev, "using line interrupts\n"); sc->cxgb_intr = t3b_intr; } /* Create a private taskqueue thread for handling driver events */ sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq); if (sc->tq == NULL) { device_printf(dev, "failed to allocate controller task queue\n"); goto out; } taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", device_get_nameunit(dev)); TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc); /* Create a periodic callout for checking adapter status */ callout_init(&sc->cxgb_tick_ch, 1); if (t3_check_fw_version(sc) < 0 || force_fw_update) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n", FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO); sc->flags &= ~FW_UPTODATE; } else { sc->flags |= FW_UPTODATE; } if (t3_check_tpsram_version(sc) < 0) { /* * Warn user that a firmware update will be attempted in init. */ device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n", t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); sc->flags &= ~TPS_UPTODATE; } else { sc->flags |= TPS_UPTODATE; } /* * Create a child device for each MAC. The ethernet attachment * will be done in these children. */ for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi; if ((child = device_add_child(dev, "cxgb", -1)) == NULL) { device_printf(dev, "failed to add child port\n"); error = EINVAL; goto out; } pi = &sc->port[i]; pi->adapter = sc; pi->nqsets = port_qsets; pi->first_qset = i*port_qsets; pi->port_id = i; pi->tx_chan = i >= ai->nports0; pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i; sc->rxpkt_map[pi->txpkt_intf] = i; sc->port[i].tx_chan = i >= ai->nports0; sc->portdev[i] = child; device_set_softc(child, pi); } if ((error = bus_generic_attach(dev)) != 0) goto out; /* initialize sge private state */ t3_sge_init_adapter(sc); t3_led_ready(sc); error = t3_get_fw_version(sc, &vers); if (error) goto out; snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s", ai->desc, is_offload(sc) ? "R" : "", sc->params.vpd.ec, sc->params.vpd.sn); device_set_desc_copy(dev, buf); snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x", sc->params.vpd.port_type[0], sc->params.vpd.port_type[1], sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]); device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); t3_add_attach_sysctls(sc); #ifdef TCP_OFFLOAD for (i = 0; i < NUM_CPL_HANDLERS; i++) sc->cpl_handler[i] = cpl_not_handled; #endif t3_intr_clear(sc); error = cxgb_setup_interrupts(sc); out: if (error) cxgb_free(sc); return (error); } /* * The cxgb_controller_detach routine is called with the device is * unloaded from the system. */ static int cxgb_controller_detach(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); cxgb_free(sc); return (0); } /* * The cxgb_free() is called by the cxgb_controller_detach() routine * to tear down the structures that were built up in * cxgb_controller_attach(), and should be the final piece of work * done when fully unloading the driver. * * * 1. Shutting down the threads started by the cxgb_controller_attach() * routine. * 2. Stopping the lower level device and all callouts (cxgb_down_locked()). * 3. Detaching all of the port devices created during the * cxgb_controller_attach() routine. * 4. Removing the device children created via cxgb_controller_attach(). * 5. Releasing PCI resources associated with the device. * 6. Turning off the offload support, iff it was turned on. * 7. Destroying the mutexes created in cxgb_controller_attach(). * */ static void cxgb_free(struct adapter *sc) { int i, nqsets = 0; ADAPTER_LOCK(sc); sc->flags |= CXGB_SHUTDOWN; ADAPTER_UNLOCK(sc); /* * Make sure all child devices are gone. */ bus_generic_detach(sc->dev); for (i = 0; i < (sc)->params.nports; i++) { if (sc->portdev[i] && device_delete_child(sc->dev, sc->portdev[i]) != 0) device_printf(sc->dev, "failed to delete child port\n"); nqsets += sc->port[i].nqsets; } /* * At this point, it is as if cxgb_port_detach has run on all ports, and * cxgb_down has run on the adapter. All interrupts have been silenced, * all open devices have been closed. */ KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)", __func__, sc->open_device_map)); for (i = 0; i < sc->params.nports; i++) { KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!", __func__, i)); } /* * Finish off the adapter's callouts. */ callout_drain(&sc->cxgb_tick_ch); callout_drain(&sc->sge_timer_ch); /* * Release resources grabbed under FULL_INIT_DONE by cxgb_up. The * sysctls are cleaned up by the kernel linker. */ if (sc->flags & FULL_INIT_DONE) { t3_free_sge_resources(sc, nqsets); sc->flags &= ~FULL_INIT_DONE; } /* * Release all interrupt resources. */ cxgb_teardown_interrupts(sc); if (sc->flags & (USING_MSI | USING_MSIX)) { device_printf(sc->dev, "releasing msi message(s)\n"); pci_release_msi(sc->dev); } else { device_printf(sc->dev, "no msi message to release\n"); } if (sc->msix_regs_res != NULL) { bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); } /* * Free the adapter's taskqueue. */ if (sc->tq != NULL) { taskqueue_free(sc->tq); sc->tq = NULL; } free(sc->filters, M_DEVBUF); t3_sge_free(sc); if (sc->udbs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->regs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); MTX_DESTROY(&sc->mdio_lock); MTX_DESTROY(&sc->sge.reg_lock); MTX_DESTROY(&sc->elmer_lock); mtx_lock(&t3_list_lock); SLIST_REMOVE(&t3_list, sc, adapter, link); mtx_unlock(&t3_list_lock); ADAPTER_LOCK_DEINIT(sc); } /** * setup_sge_qsets - configure SGE Tx/Rx/response queues * @sc: the controller softc * * Determines how many sets of SGE queues to use and initializes them. * We support multiple queue sets per port if we have MSI-X, otherwise * just one queue set per port. */ static int setup_sge_qsets(adapter_t *sc) { int i, j, err, irq_idx = 0, qset_idx = 0; u_int ntxq = SGE_TXQ_PER_SET; if ((err = t3_sge_alloc(sc)) != 0) { device_printf(sc->dev, "t3_sge_alloc returned %d\n", err); return (err); } if (sc->params.rev > 0 && !(sc->flags & USING_MSI)) irq_idx = -1; for (i = 0; i < (sc)->params.nports; i++) { struct port_info *pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++, qset_idx++) { err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports, (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &sc->params.sge.qset[qset_idx], ntxq, pi); if (err) { t3_free_sge_resources(sc, qset_idx); device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err); return (err); } } } return (0); } static void cxgb_teardown_interrupts(adapter_t *sc) { int i; for (i = 0; i < SGE_QSETS; i++) { if (sc->msix_intr_tag[i] == NULL) { /* Should have been setup fully or not at all */ KASSERT(sc->msix_irq_res[i] == NULL && sc->msix_irq_rid[i] == 0, ("%s: half-done interrupt (%d).", __func__, i)); continue; } bus_teardown_intr(sc->dev, sc->msix_irq_res[i], sc->msix_intr_tag[i]); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i], sc->msix_irq_res[i]); sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL; sc->msix_irq_rid[i] = 0; } if (sc->intr_tag) { KASSERT(sc->irq_res != NULL, ("%s: half-done interrupt.", __func__)); bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } static int cxgb_setup_interrupts(adapter_t *sc) { struct resource *res; void *tag; int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX); sc->irq_rid = intr_flag ? 1 : 0; sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n", intr_flag, sc->irq_rid); err = EINVAL; sc->irq_rid = 0; } else { err = bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE | INTR_TYPE_NET, NULL, sc->cxgb_intr, sc, &sc->intr_tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt (%x, %u, %d)\n", intr_flag, sc->irq_rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = sc->intr_tag = NULL; sc->irq_rid = 0; } } /* That's all for INTx or MSI */ if (!(intr_flag & USING_MSIX) || err) return (err); bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err"); for (i = 0; i < sc->msi_count - 1; i++) { rid = i + 2; res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (res == NULL) { device_printf(sc->dev, "Cannot allocate interrupt " "for message %d\n", rid); err = EINVAL; break; } err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET, NULL, t3_intr_msix, &sc->sge.qs[i], &tag); if (err) { device_printf(sc->dev, "Cannot set up interrupt " "for message %d (%d)\n", rid, err); bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res); break; } sc->msix_irq_rid[i] = rid; sc->msix_irq_res[i] = res; sc->msix_intr_tag[i] = tag; bus_describe_intr(sc->dev, res, tag, "qs%d", i); } if (err) cxgb_teardown_interrupts(sc); return (err); } static int cxgb_port_probe(device_t dev) { struct port_info *p; char buf[80]; const char *desc; p = device_get_softc(dev); desc = p->phy.desc; snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc); device_set_desc_copy(dev, buf); return (0); } static int cxgb_makedev(struct port_info *pi) { pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit, UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp)); if (pi->port_cdev == NULL) return (ENOMEM); pi->port_cdev->si_drv1 = (void *)pi; return (0); } #define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6) #define CXGB_CAP_ENABLE CXGB_CAP static int cxgb_port_attach(device_t dev) { struct port_info *p; struct ifnet *ifp; int err; struct adapter *sc; p = device_get_softc(dev); sc = p->adapter; snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d", device_get_unit(device_get_parent(dev)), p->port_id); PORT_LOCK_INIT(p, p->lockbuf); callout_init(&p->link_check_ch, 1); TASK_INIT(&p->link_check_task, 0, check_link_status, p); /* Allocate an ifnet object and set it up */ ifp = p->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = cxgb_init; ifp->if_softc = p; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; ifp->if_transmit = cxgb_transmit; ifp->if_qflush = cxgb_qflush; ifp->if_get_counter = cxgb_get_counter; ifp->if_capabilities = CXGB_CAP; #ifdef TCP_OFFLOAD if (is_offload(sc)) ifp->if_capabilities |= IFCAP_TOE4; #endif ifp->if_capenable = CXGB_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; /* * Disable TSO on 4-port - it isn't supported by the firmware. */ if (sc->params.nports > 2) { ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO); ifp->if_hwassist &= ~CSUM_TSO; } ether_ifattach(ifp, p->hw_addr); #ifdef DEFAULT_JUMBO if (sc->params.nports <= 2) ifp->if_mtu = ETHERMTU_JUMBO; #endif if ((err = cxgb_makedev(p)) != 0) { printf("makedev failed %d\n", err); return (err); } /* Create a list of media supported by this port */ ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change, cxgb_media_status); cxgb_build_medialist(p); t3_sge_init_port(p); return (err); } /* * cxgb_port_detach() is called via the device_detach methods when * cxgb_free() calls the bus_generic_detach. It is responsible for * removing the device from the view of the kernel, i.e. from all * interfaces lists etc. This routine is only called when the driver is * being unloaded, not when the link goes down. */ static int cxgb_port_detach(device_t dev) { struct port_info *p; struct adapter *sc; int i; p = device_get_softc(dev); sc = p->adapter; /* Tell cxgb_ioctl and if_init that the port is going away */ ADAPTER_LOCK(sc); SET_DOOMED(p); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0); SET_BUSY(sc); ADAPTER_UNLOCK(sc); if (p->port_cdev != NULL) destroy_dev(p->port_cdev); cxgb_uninit_synchronized(p); ether_ifdetach(p->ifp); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_drain(&txq->txq_watchdog); callout_drain(&txq->txq_timer); } PORT_LOCK_DEINIT(p); if_free(p->ifp); p->ifp = NULL; ADAPTER_LOCK(sc); CLR_BUSY(sc); wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (0); } void t3_fatal_err(struct adapter *sc) { u_int fw_status[4]; if (sc->flags & FULL_INIT_DONE) { t3_sge_stop(sc); t3_write_reg(sc, A_XGM_TX_CTRL, 0); t3_write_reg(sc, A_XGM_RX_CTRL, 0); t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0); t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0); t3_intr_disable(sc); } device_printf(sc->dev,"encountered fatal error, operation suspended\n"); if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", fw_status[0], fw_status[1], fw_status[2], fw_status[3]); } int t3_os_find_pci_capability(adapter_t *sc, int cap) { device_t dev; struct pci_devinfo *dinfo; pcicfgregs *cfg; uint32_t status; uint8_t ptr; dev = sc->dev; dinfo = device_get_ivars(dev); cfg = &dinfo->cfg; status = pci_read_config(dev, PCIR_STATUS, 2); if (!(status & PCIM_STATUS_CAPPRESENT)) return (0); switch (cfg->hdrtype & PCIM_HDRTYPE) { case 0: case 1: ptr = PCIR_CAP_PTR; break; case 2: ptr = PCIR_CAP_PTR_2; break; default: return (0); break; } ptr = pci_read_config(dev, ptr, 1); while (ptr != 0) { if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap) return (ptr); ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1); } return (0); } int t3_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t3_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } /** * t3_os_link_changed - handle link status changes * @sc: the adapter associated with the link change * @port_id: the port index whose link status has changed * @link_status: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @fc: the new flow-control setting * * This is the OS-dependent handler for link status changes. The OS * neutral handler takes care of most of the processing for these events, * then calls this handler for any OS-specific processing. */ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc, int mac_was_reset) { struct port_info *pi = &adapter->port[port_id]; struct ifnet *ifp = pi->ifp; /* no race with detach, so ifp should always be good */ KASSERT(ifp, ("%s: if detached.", __func__)); /* Reapply mac settings if they were lost due to a reset */ if (mac_was_reset) { PORT_LOCK(pi); cxgb_update_mac_settings(pi); PORT_UNLOCK(pi); } if (link_status) { ifp->if_baudrate = IF_Mbps(speed); if_link_state_change(ifp, LINK_STATE_UP); } else if_link_state_change(ifp, LINK_STATE_DOWN); } /** * t3_os_phymod_changed - handle PHY module changes * @phy: the PHY reporting the module change * @mod_type: new module type * * This is the OS-dependent handler for PHY module changes. It is * invoked when a PHY module is removed or inserted for any OS-specific * processing. */ void t3_os_phymod_changed(struct adapter *adap, int port_id) { static const char *mod_str[] = { NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown" }; struct port_info *pi = &adap->port[port_id]; int mod = pi->phy.modtype; if (mod != pi->media.ifm_cur->ifm_data) cxgb_build_medialist(pi); if (mod == phy_modtype_none) if_printf(pi->ifp, "PHY module unplugged\n"); else { KASSERT(mod < ARRAY_SIZE(mod_str), ("invalid PHY module type %d", mod)); if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]); } } void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]) { /* * The ifnet might not be allocated before this gets called, * as this is called early on in attach by t3_prep_adapter * save the address off in the port structure */ if (cxgb_debug) printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":"); bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN); } /* * Programs the XGMAC based on the settings in the ifnet. These settings * include MTU, MAC address, mcast addresses, etc. */ static void cxgb_update_mac_settings(struct port_info *p) { struct ifnet *ifp = p->ifp; struct t3_rx_mode rm; struct cmac *mac = &p->mac; int mtu, hwtagging; PORT_LOCK_ASSERT_OWNED(p); bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN); mtu = ifp->if_mtu; if (ifp->if_capenable & IFCAP_VLAN_MTU) mtu += ETHER_VLAN_ENCAP_LEN; hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0; t3_mac_set_mtu(mac, mtu); t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging); t3_mac_set_address(mac, 0, p->hw_addr); t3_init_rx_mode(&rm, p); t3_mac_set_rx_mode(mac, &rm); } static int await_mgmt_replies(struct adapter *adap, unsigned long init_cnt, unsigned long n) { int attempts = 5; while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) { if (!--attempts) return (ETIMEDOUT); t3_os_sleep(10); } return 0; } static int init_tp_parity(struct adapter *adap) { int i; struct mbuf *m; struct cpl_set_tcb_field *greq; unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts; t3_tp_set_offload_mode(adap, 1); for (i = 0; i < 16; i++) { struct cpl_smt_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_smt_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i)); req->iff = i; t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_l2t_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_l2t_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i)); req->params = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } for (i = 0; i < 2048; i++) { struct cpl_rte_write_req *req; m = m_gethdr(M_WAITOK, MT_DATA); req = mtod(m, struct cpl_rte_write_req *); m->m_len = m->m_pkthdr.len = sizeof(*req); memset(req, 0, sizeof(*req)); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i)); req->l2t_idx = htonl(V_L2T_W_IDX(i)); t3_mgmt_tx(adap, m); } m = m_gethdr(M_WAITOK, MT_DATA); greq = mtod(m, struct cpl_set_tcb_field *); m->m_len = m->m_pkthdr.len = sizeof(*greq); memset(greq, 0, sizeof(*greq)); greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0)); greq->mask = htobe64(1); t3_mgmt_tx(adap, m); i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1); t3_tp_set_offload_mode(adap, 0); return (i); } /** * setup_rss - configure Receive Side Steering (per-queue connection demux) * @adap: the adapter * * Sets up RSS to distribute packets to multiple receive queues. We * configure the RSS CPU lookup table to distribute to the number of HW * receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each port. * We always configure the RSS mapping for two ports since the mapping * table has plenty of entries. */ static void setup_rss(adapter_t *adap) { int i; u_int nq[2]; uint8_t cpus[SGE_QSETS + 1]; uint16_t rspq_map[RSS_TABLE_SIZE]; for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; cpus[SGE_QSETS] = 0xff; nq[0] = nq[1] = 0; for_each_port(adap, i) { const struct port_info *pi = adap2pinfo(adap, i); nq[pi->tx_chan] += pi->nqsets; } for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { rspq_map[i] = nq[0] ? i % nq[0] : 0; rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0; } /* Calculate the reverse RSS map table */ for (i = 0; i < SGE_QSETS; ++i) adap->rrss_map[i] = 0xff; for (i = 0; i < RSS_TABLE_SIZE; ++i) if (adap->rrss_map[rspq_map[i]] == 0xff) adap->rrss_map[rspq_map[i]] = i; t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN | F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map); } static void send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo, int hi, int port) { struct mbuf *m; struct mngt_pktsched_wr *req; m = m_gethdr(M_NOWAIT, MT_DATA); if (m) { req = mtod(m, struct mngt_pktsched_wr *); req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT)); req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET; req->sched = sched; req->idx = qidx; req->min = lo; req->max = hi; req->binding = port; m->m_len = m->m_pkthdr.len = sizeof(*req); t3_mgmt_tx(adap, m); } } static void bind_qsets(adapter_t *sc) { int i, j; for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); for (j = 0; j < pi->nqsets; ++j) { send_pktsched_cmd(sc, 1, pi->first_qset + j, -1, -1, pi->tx_chan); } } } static void update_tpeeprom(struct adapter *adap) { const struct firmware *tpeeprom; uint32_t version; unsigned int major, minor; int ret, len; char rev, name[32]; t3_seeprom_read(adap, TP_SRAM_OFFSET, &version); major = G_TP_VERSION_MAJOR(version); minor = G_TP_VERSION_MINOR(version); if (major == TP_VERSION_MAJOR && minor == TP_VERSION_MINOR) return; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPEEPROM_NAME, rev); tpeeprom = firmware_get(name); if (tpeeprom == NULL) { device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n", name); return; } len = tpeeprom->datasize - 4; ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize); if (ret) goto release_tpeeprom; if (len != TP_SRAM_LEN) { device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", name, len, TP_SRAM_LEN); return; } ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize, TP_SRAM_OFFSET); if (!ret) { device_printf(adap->dev, "Protocol SRAM image updated in EEPROM to %d.%d.%d\n", TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); } else device_printf(adap->dev, "Protocol SRAM image update in EEPROM failed\n"); release_tpeeprom: firmware_put(tpeeprom, FIRMWARE_UNLOAD); return; } static int update_tpsram(struct adapter *adap) { const struct firmware *tpsram; int ret; char rev, name[32]; rev = t3rev2char(adap); snprintf(name, sizeof(name), TPSRAM_NAME, rev); update_tpeeprom(adap); tpsram = firmware_get(name); if (tpsram == NULL){ device_printf(adap->dev, "could not load TP SRAM\n"); return (EINVAL); } else device_printf(adap->dev, "updating TP SRAM\n"); ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize); if (ret) goto release_tpsram; ret = t3_set_proto_sram(adap, tpsram->data); if (ret) device_printf(adap->dev, "loading protocol SRAM failed\n"); release_tpsram: firmware_put(tpsram, FIRMWARE_UNLOAD); return ret; } /** * cxgb_up - enable the adapter * @adap: adapter being enabled * * Called when the first port is enabled, this function performs the * actions necessary to make an adapter operational, such as completing * the initialization of HW modules, and enabling interrupts. */ static int cxgb_up(struct adapter *sc) { int err = 0; unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS; KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)", __func__, sc->open_device_map)); if ((sc->flags & FULL_INIT_DONE) == 0) { ADAPTER_LOCK_ASSERT_NOTOWNED(sc); if ((sc->flags & FW_UPTODATE) == 0) if ((err = upgrade_fw(sc))) goto out; if ((sc->flags & TPS_UPTODATE) == 0) if ((err = update_tpsram(sc))) goto out; if (is_offload(sc) && nfilters != 0) { sc->params.mc5.nservers = 0; if (nfilters < 0) sc->params.mc5.nfilters = mxf; else sc->params.mc5.nfilters = min(nfilters, mxf); } err = t3_init_hw(sc, 0); if (err) goto out; t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT); t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); err = setup_sge_qsets(sc); if (err) goto out; alloc_filters(sc); setup_rss(sc); t3_add_configured_sysctls(sc); sc->flags |= FULL_INIT_DONE; } t3_intr_clear(sc); t3_sge_start(sc); t3_intr_enable(sc); if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) && is_offload(sc) && init_tp_parity(sc) == 0) sc->flags |= TP_PARITY_INIT; if (sc->flags & TP_PARITY_INIT) { t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR); t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff); } if (!(sc->flags & QUEUES_BOUND)) { bind_qsets(sc); setup_hw_filters(sc); sc->flags |= QUEUES_BOUND; } t3_sge_reset_adapter(sc); out: return (err); } /* * Called when the last open device is closed. Does NOT undo all of cxgb_up's * work. Specifically, the resources grabbed under FULL_INIT_DONE are released * during controller_detach, not here. */ static void cxgb_down(struct adapter *sc) { t3_sge_stop(sc); t3_intr_disable(sc); } /* * if_init for cxgb ports. */ static void cxgb_init(void *arg) { struct port_info *p = arg; struct adapter *sc = p->adapter; ADAPTER_LOCK(sc); cxgb_init_locked(p); /* releases adapter lock */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); } static int cxgb_init_locked(struct port_info *p) { struct adapter *sc = p->adapter; struct ifnet *ifp = p->ifp; struct cmac *mac = &p->mac; int i, rc = 0, may_sleep = 0, gave_up_lock = 0; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { gave_up_lock = 1; if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); /* * The code that runs during one-time adapter initialization can sleep * so it's important not to hold any locks across it. */ may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1; if (may_sleep) { SET_BUSY(sc); gave_up_lock = 1; ADAPTER_UNLOCK(sc); } if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0)) goto done; PORT_LOCK(p); if (isset(&sc->open_device_map, p->port_id) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(p); goto done; } t3_port_intr_enable(sc, p->port_id); if (!mac->multiport) t3_mac_init(mac); cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; PORT_UNLOCK(p); for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) { struct sge_qset *qs = &sc->sge.qs[i]; struct sge_txq *txq = &qs->txq[TXQ_ETH]; callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } /* all ok */ setbit(&sc->open_device_map, p->port_id); callout_reset(&p->link_check_ch, p->phy.caps & SUPPORTED_LINK_IRQ ? hz * 3 : hz / 4, link_check_callout, p); done: if (may_sleep) { ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); } if (gave_up_lock) wakeup_one(&sc->flags); ADAPTER_UNLOCK(sc); return (rc); } static int cxgb_uninit_locked(struct port_info *p) { struct adapter *sc = p->adapter; int rc; ADAPTER_LOCK_ASSERT_OWNED(sc); while (!IS_DOOMED(p) && IS_BUSY(sc)) { if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) { rc = EINTR; goto done; } } if (IS_DOOMED(p)) { rc = ENXIO; goto done; } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); ADAPTER_UNLOCK(sc); rc = cxgb_uninit_synchronized(p); ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup_one(&sc->flags); done: ADAPTER_UNLOCK(sc); return (rc); } /* * Called on "ifconfig down", and from port_detach */ static int cxgb_uninit_synchronized(struct port_info *pi) { struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; /* * taskqueue_drain may cause a deadlock if the adapter lock is held. */ ADAPTER_LOCK_ASSERT_NOTOWNED(sc); /* * Clear this port's bit from the open device map, and then drain all * the tasks that can access/manipulate this port's port_info or ifp. * We disable this port's interrupts here and so the slow/ext * interrupt tasks won't be enqueued. The tick task will continue to * be enqueued every second but the runs after this drain will not see * this port in the open device map. * * A well behaved task must take open_device_map into account and ignore * ports that are not open. */ clrbit(&sc->open_device_map, pi->port_id); t3_port_intr_disable(sc, pi->port_id); taskqueue_drain(sc->tq, &sc->slow_intr_task); taskqueue_drain(sc->tq, &sc->tick_task); callout_drain(&pi->link_check_ch); taskqueue_drain(sc->tq, &pi->link_check_task); PORT_LOCK(pi); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* disable pause frames */ t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0); /* Reset RX FIFO HWM */ t3_set_reg_field(sc, A_XGM_RXFIFO_CFG + pi->mac.offset, V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0); DELAY(100 * 1000); /* Wait for TXFIFO empty */ t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset, F_TXFIFO_EMPTY, 1, 20, 5); DELAY(100 * 1000); t3_mac_disable(&pi->mac, MAC_DIRECTION_RX); pi->phy.ops->power_down(&pi->phy, 1); PORT_UNLOCK(pi); pi->link_config.link_ok = 0; t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0); if (sc->open_device_map == 0) cxgb_down(pi->adapter); return (0); } /* * Mark lro enabled or disabled in all qsets for this port */ static int cxgb_set_lro(struct port_info *p, int enabled) { int i; struct adapter *adp = p->adapter; struct sge_qset *q; for (i = 0; i < p->nqsets; i++) { q = &adp->sge.qs[p->first_qset + i]; q->lro.enabled = (enabled != 0); } return (0); } static int cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct port_info *p = ifp->if_softc; struct adapter *sc = p->adapter; struct ifreq *ifr = (struct ifreq *)data; int flags, error = 0, mtu; uint32_t mask; switch (command) { case SIOCSIFMTU: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) { fail: ADAPTER_UNLOCK(sc); return (error); } mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { error = EINVAL; } else { ifp->if_mtu = mtu; PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFFLAGS: ADAPTER_LOCK(sc); if (IS_DOOMED(p)) { error = ENXIO; goto fail; } if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = p->if_flags; if (((ifp->if_flags ^ flags) & IFF_PROMISC) || ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) { if (IS_BUSY(sc)) { error = EBUSY; goto fail; } PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); } else error = cxgb_init_locked(p); p->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) error = cxgb_uninit_locked(p); else ADAPTER_UNLOCK(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } ADAPTER_UNLOCK(sc); break; case SIOCSIFCAP: ADAPTER_LOCK(sc); error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); if (error) goto fail; mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; /* Safe to do this even if cxgb_up not called yet */ cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO); } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE4) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4; error = toe_capability(p, enable); if (error == 0) ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { PORT_LOCK(p); cxgb_update_mac_settings(p); PORT_UNLOCK(p); } } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif ADAPTER_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &p->media, command); break; default: error = ether_ioctl(ifp, command, data); } return (error); } static int cxgb_media_change(struct ifnet *ifp) { return (EOPNOTSUPP); } /* * Translates phy->modtype to the correct Ethernet media subtype. */ static int cxgb_ifm_type(int mod) { switch (mod) { case phy_modtype_sr: return (IFM_10G_SR); case phy_modtype_lr: return (IFM_10G_LR); case phy_modtype_lrm: return (IFM_10G_LRM); case phy_modtype_twinax: return (IFM_10G_TWINAX); case phy_modtype_twinax_long: return (IFM_10G_TWINAX_LONG); case phy_modtype_none: return (IFM_NONE); case phy_modtype_unknown: return (IFM_UNKNOWN); } KASSERT(0, ("%s: modtype %d unknown", __func__, mod)); return (IFM_UNKNOWN); } /* * Rebuilds the ifmedia list for this port, and sets the current media. */ static void cxgb_build_medialist(struct port_info *p) { struct cphy *phy = &p->phy; struct ifmedia *media = &p->media; int mod = phy->modtype; int m = IFM_ETHER | IFM_FDX; PORT_LOCK(p); ifmedia_removeall(media); if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) { /* Copper (RJ45) */ if (phy->caps & SUPPORTED_10000baseT_Full) ifmedia_add(media, m | IFM_10G_T, mod, NULL); if (phy->caps & SUPPORTED_1000baseT_Full) ifmedia_add(media, m | IFM_1000_T, mod, NULL); if (phy->caps & SUPPORTED_100baseT_Full) ifmedia_add(media, m | IFM_100_TX, mod, NULL); if (phy->caps & SUPPORTED_10baseT_Full) ifmedia_add(media, m | IFM_10_T, mod, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); } else if (phy->caps & SUPPORTED_TP) { /* Copper (CX4) */ KASSERT(phy->caps & SUPPORTED_10000baseT_Full, ("%s: unexpected cap 0x%x", __func__, phy->caps)); ifmedia_add(media, m | IFM_10G_CX4, mod, NULL); ifmedia_set(media, m | IFM_10G_CX4); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_10000baseT_Full) { /* 10G optical (but includes SFP+ twinax) */ m |= cxgb_ifm_type(mod); if (IFM_SUBTYPE(m) == IFM_NONE) m &= ~IFM_FDX; ifmedia_add(media, m, mod, NULL); ifmedia_set(media, m); } else if (phy->caps & SUPPORTED_FIBRE && phy->caps & SUPPORTED_1000baseT_Full) { /* 1G optical */ /* XXX: Lie and claim to be SX, could actually be any 1G-X */ ifmedia_add(media, m | IFM_1000_SX, mod, NULL); ifmedia_set(media, m | IFM_1000_SX); } else { KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__, phy->caps)); } PORT_UNLOCK(p); } static void cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct port_info *p = ifp->if_softc; struct ifmedia_entry *cur = p->media.ifm_cur; int speed = p->link_config.speed; if (cur->ifm_data != p->phy.modtype) { cxgb_build_medialist(p); cur = p->media.ifm_cur; } ifmr->ifm_status = IFM_AVALID; if (!p->link_config.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* * active and current will differ iff current media is autoselect. That * can happen only for copper RJ45. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg, ("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps)); ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == SPEED_10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == SPEED_1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == SPEED_100) ifmr->ifm_active |= IFM_100_TX; else if (speed == SPEED_10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static uint64_t cxgb_get_counter(struct ifnet *ifp, ift_counter c) { struct port_info *pi = ifp->if_softc; struct adapter *sc = pi->adapter; struct cmac *mac = &pi->mac; struct mac_stats *mstats = &mac->stats; cxgb_refresh_stats(pi); switch (c) { case IFCOUNTER_IPACKETS: return (mstats->rx_frames); case IFCOUNTER_IERRORS: return (mstats->rx_jabber + mstats->rx_data_errs + mstats->rx_sequence_errs + mstats->rx_runt + mstats->rx_too_long + mstats->rx_mac_internal_errs + mstats->rx_short + mstats->rx_fcs_errs); case IFCOUNTER_OPACKETS: return (mstats->tx_frames); case IFCOUNTER_OERRORS: return (mstats->tx_excess_collisions + mstats->tx_underrun + mstats->tx_len_errs + mstats->tx_mac_internal_errs + mstats->tx_excess_deferral + mstats->tx_fcs_errs); case IFCOUNTER_COLLISIONS: return (mstats->tx_total_collisions); case IFCOUNTER_IBYTES: return (mstats->rx_octets); case IFCOUNTER_OBYTES: return (mstats->tx_octets); case IFCOUNTER_IMCASTS: return (mstats->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (mstats->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (mstats->rx_cong_drops); case IFCOUNTER_OQDROPS: { int i; uint64_t drops; drops = 0; if (sc->flags & FULL_INIT_DONE) { for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++) drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops; } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static void cxgb_async_intr(void *data) { adapter_t *sc = data; t3_write_reg(sc, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); taskqueue_enqueue(sc->tq, &sc->slow_intr_task); } static void link_check_callout(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; taskqueue_enqueue(sc->tq, &pi->link_check_task); } static void check_link_status(void *arg, int pending) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; if (!isset(&sc->open_device_map, pi->port_id)) return; t3_link_changed(sc, pi->port_id); if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) || pi->link_config.link_ok == 0) callout_reset(&pi->link_check_ch, hz, link_check_callout, pi); } void t3_os_link_intr(struct port_info *pi) { /* * Schedule a link check in the near future. If the link is flapping * rapidly we'll keep resetting the callout and delaying the check until * things stabilize a bit. */ callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi); } static void check_t3b2_mac(struct adapter *sc) { int i; if (sc->flags & CXGB_SHUTDOWN) return; for_each_port(sc, i) { struct port_info *p = &sc->port[i]; int status; #ifdef INVARIANTS struct ifnet *ifp = p->ifp; #endif if (!isset(&sc->open_device_map, p->port_id) || p->link_fault || !p->link_config.link_ok) continue; KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, ("%s: state mismatch (drv_flags %x, device_map %x)", __func__, ifp->if_drv_flags, sc->open_device_map)); PORT_LOCK(p); status = t3b2_mac_watchdog_task(&p->mac); if (status == 1) p->mac.stats.num_toggled++; else if (status == 2) { struct cmac *mac = &p->mac; cxgb_update_mac_settings(p); t3_link_start(&p->phy, mac, &p->link_config); t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); t3_port_intr_enable(sc, p->port_id); p->mac.stats.num_resets++; } PORT_UNLOCK(p); } } static void cxgb_tick(void *arg) { adapter_t *sc = (adapter_t *)arg; if (sc->flags & CXGB_SHUTDOWN) return; taskqueue_enqueue(sc->tq, &sc->tick_task); callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); } void cxgb_refresh_stats(struct port_info *pi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); PORT_UNLOCK(pi); getmicrotime(&pi->last_refreshed); } static void cxgb_tick_handler(void *arg, int count) { adapter_t *sc = (adapter_t *)arg; const struct adapter_params *p = &sc->params; int i; uint32_t cause, reset; if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE)) return; if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) check_t3b2_mac(sc); cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY); if (cause) { struct sge_qset *qs = &sc->sge.qs[0]; uint32_t mask, v; v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00; mask = 1; for (i = 0; i < SGE_QSETS; i++) { if (v & mask) qs[i].rspq.starved++; mask <<= 1; } mask <<= SGE_QSETS; /* skip RSPQXDISABLED */ for (i = 0; i < SGE_QSETS * 2; i++) { if (v & mask) { qs[i / 2].fl[i % 2].empty++; } mask <<= 1; } /* clear */ t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v); t3_write_reg(sc, A_SG_INT_CAUSE, cause); } for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct cmac *mac = &pi->mac; if (!isset(&sc->open_device_map, pi->port_id)) continue; cxgb_refresh_stats(pi); if (mac->multiport) continue; /* Count rx fifo overflows, once per second */ cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset); reset = 0; if (cause & F_RXFIFO_OVERFLOW) { mac->stats.rx_fifo_ovfl++; reset |= F_RXFIFO_OVERFLOW; } t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset); } } static void touch_bars(device_t dev) { /* * Don't enable yet */ #if !defined(__LP64__) && 0 u32 v; pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v); pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v); pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v); #endif } static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset) { uint8_t *buf; int err = 0; u32 aligned_offset, aligned_len, *p; struct adapter *adapter = pi->adapter; aligned_offset = offset & ~3; aligned_len = (len + (offset & 3) + 3) & ~3; if (aligned_offset != offset || aligned_len != len) { buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO); if (!buf) return (ENOMEM); err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf); if (!err && aligned_len > 4) err = t3_seeprom_read(adapter, aligned_offset + aligned_len - 4, (u32 *)&buf[aligned_len - 4]); if (err) goto out; memcpy(buf + (offset & 3), data, len); } else buf = (uint8_t *)(uintptr_t)data; err = t3_seeprom_wp(adapter, 0); if (err) goto out; for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) { err = t3_seeprom_write(adapter, aligned_offset, *p); aligned_offset += 4; } if (!err) err = t3_seeprom_wp(adapter, 1); out: if (buf != data) free(buf, M_DEVBUF); return err; } static int in_range(int val, int lo, int hi) { return val < 0 || (val <= hi && val >= lo); } static int cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td) { return (0); } static int cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td) { return (0); } static int cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int mmd, error = 0; struct port_info *pi = dev->si_drv1; adapter_t *sc = pi->adapter; #ifdef PRIV_SUPPORTED if (priv_check(td, PRIV_DRIVER)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #else if (suser(td)) { if (cxgb_debug) printf("user does not have access to privileged ioctls\n"); return (EPERM); } #endif switch (cmd) { case CHELSIO_GET_MIIREG: { uint32_t val; struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_read) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, &val); } else error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, &val); if (error == 0) mid->val_out = val; break; } case CHELSIO_SET_MIIREG: { struct cphy *phy = &pi->phy; struct ch_mii_data *mid = (struct ch_mii_data *)data; if (!phy->mdio_write) return (EOPNOTSUPP); if (is_10G(sc)) { mmd = mid->phy_id >> 8; if (!mmd) mmd = MDIO_DEV_PCS; else if (mmd > MDIO_DEV_VEND2) return (EINVAL); error = phy->mdio_write(sc, mid->phy_id & 0x1f, mmd, mid->reg_num, mid->val_in); } else error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0, mid->reg_num & 0x1f, mid->val_in); break; } case CHELSIO_SETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); t3_write_reg(sc, edata->addr, edata->val); break; } case CHELSIO_GETREG: { struct ch_reg *edata = (struct ch_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); edata->val = t3_read_reg(sc, edata->addr); break; } case CHELSIO_GET_SGE_CONTEXT: { struct ch_cntxt *ecntxt = (struct ch_cntxt *)data; mtx_lock_spin(&sc->sge.reg_lock); switch (ecntxt->cntxt_type) { case CNTXT_TYPE_EGRESS: error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_FL: error = -t3_sge_read_fl(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_RSP: error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id, ecntxt->data); break; case CNTXT_TYPE_CQ: error = -t3_sge_read_cq(sc, ecntxt->cntxt_id, ecntxt->data); break; default: error = EINVAL; break; } mtx_unlock_spin(&sc->sge.reg_lock); break; } case CHELSIO_GET_SGE_DESC: { struct ch_desc *edesc = (struct ch_desc *)data; int ret; if (edesc->queue_num >= SGE_QSETS * 6) return (EINVAL); ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6], edesc->queue_num % 6, edesc->idx, edesc->data); if (ret < 0) return (EINVAL); edesc->size = ret; break; } case CHELSIO_GET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; int q1 = pi->first_qset; int nqsets = pi->nqsets; int i; if (t->qset_idx >= nqsets) return EINVAL; i = q1 + t->qset_idx; q = &sc->params.sge.qset[i]; t->rspq_size = q->rspq_size; t->txq_size[0] = q->txq_size[0]; t->txq_size[1] = q->txq_size[1]; t->txq_size[2] = q->txq_size[2]; t->fl_size[0] = q->fl_size; t->fl_size[1] = q->jumbo_size; t->polling = q->polling; t->lro = q->lro; t->intr_lat = q->coalesce_usecs; t->cong_thres = q->cong_thres; t->qnum = i; if ((sc->flags & FULL_INIT_DONE) == 0) t->vector = 0; else if (sc->flags & USING_MSIX) t->vector = rman_get_start(sc->msix_irq_res[i]); else t->vector = rman_get_start(sc->irq_res); break; } case CHELSIO_GET_QSET_NUM: { struct ch_reg *edata = (struct ch_reg *)data; edata->val = pi->nqsets; break; } case CHELSIO_LOAD_FW: { uint8_t *fw_data; uint32_t vers; struct ch_mem_range *t = (struct ch_mem_range *)data; /* * You're allowed to load a firmware only before FULL_INIT_DONE * * FW_UPTODATE is also set so the rest of the initialization * will not overwrite what was loaded here. This gives you the * flexibility to load any firmware (and maybe shoot yourself in * the foot). */ ADAPTER_LOCK(sc); if (sc->open_device_map || sc->flags & FULL_INIT_DONE) { ADAPTER_UNLOCK(sc); return (EBUSY); } fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!fw_data) error = ENOMEM; else error = copyin(t->buf, fw_data, t->len); if (!error) error = -t3_load_fw(sc, fw_data, t->len); if (t3_get_fw_version(sc, &vers) == 0) { snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d", G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); } if (!error) sc->flags |= FW_UPTODATE; free(fw_data, M_DEVBUF); ADAPTER_UNLOCK(sc); break; } case CHELSIO_LOAD_BOOT: { uint8_t *boot_data; struct ch_mem_range *t = (struct ch_mem_range *)data; boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT); if (!boot_data) return ENOMEM; error = copyin(t->buf, boot_data, t->len); if (!error) error = -t3_load_boot(sc, boot_data, t->len); free(boot_data, M_DEVBUF); break; } case CHELSIO_GET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); m->tx_pg_sz = p->tx_pg_size; m->tx_num_pg = p->tx_num_pgs; m->rx_pg_sz = p->rx_pg_size; m->rx_num_pg = p->rx_num_pgs; m->pm_total = p->pmtx_size + p->chan_rx_size * p->nchan; break; } case CHELSIO_SET_PM: { struct ch_pm *m = (struct ch_pm *)data; struct tp_params *p = &sc->params.tp; if (!is_offload(sc)) return (EOPNOTSUPP); if (sc->flags & FULL_INIT_DONE) return (EBUSY); if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) || !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1))) return (EINVAL); /* not power of 2 */ if (!(m->rx_pg_sz & 0x14000)) return (EINVAL); /* not 16KB or 64KB */ if (!(m->tx_pg_sz & 0x1554000)) return (EINVAL); if (m->tx_num_pg == -1) m->tx_num_pg = p->tx_num_pgs; if (m->rx_num_pg == -1) m->rx_num_pg = p->rx_num_pgs; if (m->tx_num_pg % 24 || m->rx_num_pg % 24) return (EINVAL); if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size || m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size) return (EINVAL); p->rx_pg_size = m->rx_pg_sz; p->tx_pg_size = m->tx_pg_sz; p->rx_num_pgs = m->rx_num_pg; p->tx_num_pgs = m->tx_num_pg; break; } case CHELSIO_SETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; int i; if (!is_offload(sc)) return (EOPNOTSUPP); if (offload_running(sc)) return (EBUSY); if (m->nmtus != NMTUS) return (EINVAL); if (m->mtus[0] < 81) /* accommodate SACK */ return (EINVAL); /* * MTUs must be in ascending order */ for (i = 1; i < NMTUS; ++i) if (m->mtus[i] < m->mtus[i - 1]) return (EINVAL); memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus)); break; } case CHELSIO_GETMTUTAB: { struct ch_mtus *m = (struct ch_mtus *)data; if (!is_offload(sc)) return (EOPNOTSUPP); memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus)); m->nmtus = NMTUS; break; } case CHELSIO_GET_MEM: { struct ch_mem_range *t = (struct ch_mem_range *)data; struct mc7 *mem; uint8_t *useraddr; u64 buf[32]; /* * Use these to avoid modifying len/addr in the return * struct */ uint32_t len = t->len, addr = t->addr; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need the memory controllers */ if ((addr & 0x7) || (len & 0x7)) return (EINVAL); if (t->mem_id == MEM_CM) mem = &sc->cm; else if (t->mem_id == MEM_PMRX) mem = &sc->pmrx; else if (t->mem_id == MEM_PMTX) mem = &sc->pmtx; else return (EINVAL); /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision */ t->version = 3 | (sc->params.rev << 10); /* * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers. */ useraddr = (uint8_t *)t->buf; while (len) { unsigned int chunk = min(len, sizeof(buf)); error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf); if (error) return (-error); if (copyout(buf, useraddr, chunk)) return (EFAULT); useraddr += chunk; addr += chunk; len -= chunk; } break; } case CHELSIO_READ_TCAM_WORD: { struct ch_tcam_word *t = (struct ch_tcam_word *)data; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EIO); /* need MC5 */ return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf); break; } case CHELSIO_SET_TRACE_FILTER: { struct ch_trace *t = (struct ch_trace *)data; const struct trace_params *tp; tp = (const struct trace_params *)&t->sip; if (t->config_tx) t3_config_trace_filter(sc, tp, 0, t->invert_match, t->trace_tx); if (t->config_rx) t3_config_trace_filter(sc, tp, 1, t->invert_match, t->trace_rx); break; } case CHELSIO_SET_PKTSCHED: { struct ch_pktsched_params *p = (struct ch_pktsched_params *)data; if (sc->open_device_map == 0) return (EAGAIN); send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max, p->binding); break; } case CHELSIO_IFCONF_GETREGS: { struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data; int reglen = cxgb_get_regs_len(); uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (regs->len > reglen) regs->len = reglen; else if (regs->len < reglen) error = ENOBUFS; if (!error) { cxgb_get_regs(sc, regs, buf); error = copyout(buf, regs->data, reglen); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_HW_SCHED: { struct ch_hw_sched *t = (struct ch_hw_sched *)data; unsigned int ticks_per_usec = core_ticks_per_usec(sc); if ((sc->flags & FULL_INIT_DONE) == 0) return (EAGAIN); /* need TP to be initialized */ if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) || !in_range(t->channel, 0, 1) || !in_range(t->kbps, 0, 10000000) || !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) || !in_range(t->flow_ipg, 0, dack_ticks_to_usec(sc, 0x7ff))) return (EINVAL); if (t->kbps >= 0) { error = t3_config_sched(sc, t->kbps, t->sched); if (error < 0) return (-error); } if (t->class_ipg >= 0) t3_set_sched_ipg(sc, t->sched, t->class_ipg); if (t->flow_ipg >= 0) { t->flow_ipg *= 1000; /* us -> ns */ t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1); } if (t->mode >= 0) { int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched); t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, bit, t->mode ? bit : 0); } if (t->channel >= 0) t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP, 1 << t->sched, t->channel << t->sched); break; } case CHELSIO_GET_EEPROM: { int i; struct ch_eeprom *e = (struct ch_eeprom *)data; uint8_t *buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } e->magic = EEPROM_MAGIC; for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4) error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]); if (!error) error = copyout(buf + e->offset, e->data, e->len); free(buf, M_DEVBUF); break; } case CHELSIO_CLEAR_STATS: { if (!(sc->flags & FULL_INIT_DONE)) return EAGAIN; PORT_LOCK(pi); t3_mac_update_stats(&pi->mac); memset(&pi->mac.stats, 0, sizeof(pi->mac.stats)); PORT_UNLOCK(pi); break; } case CHELSIO_GET_UP_LA: { struct ch_up_la *la = (struct ch_up_la *)data; uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT); if (buf == NULL) { return (ENOMEM); } if (la->bufsize < LA_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_la(sc, &la->stopped, &la->idx, &la->bufsize, buf); if (!error) error = copyout(buf, la->data, la->bufsize); free(buf, M_DEVBUF); break; } case CHELSIO_GET_UP_IOQS: { struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data; uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT); uint32_t *v; if (buf == NULL) { return (ENOMEM); } if (ioqs->bufsize < IOQS_BUFSIZE) error = ENOBUFS; if (!error) error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf); if (!error) { v = (uint32_t *)buf; ioqs->ioq_rx_enable = *v++; ioqs->ioq_tx_enable = *v++; ioqs->ioq_rx_status = *v++; ioqs->ioq_tx_status = *v++; error = copyout(v, ioqs->data, ioqs->bufsize); } free(buf, M_DEVBUF); break; } case CHELSIO_SET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); /* No TCAM */ if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); /* mc5 not setup yet */ if (nfilters == 0) return (EBUSY); /* TOE will use TCAM */ /* sanity checks */ if (f->filter_id >= nfilters || (f->val.dip && f->mask.dip != 0xffffffff) || (f->val.sport && f->mask.sport != 0xffff) || (f->val.dport && f->mask.dport != 0xffff) || (f->val.vlan && f->mask.vlan != 0xfff) || (f->val.vlan_prio && f->mask.vlan_prio != FILTER_NO_VLAN_PRI) || (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) || f->qset >= SGE_QSETS || sc->rrss_map[f->qset] >= RSS_TABLE_SIZE) return (EINVAL); /* Was allocated with M_WAITOK */ KASSERT(sc->filters, ("filter table NULL\n")); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); bzero(p, sizeof(*p)); p->sip = f->val.sip; p->sip_mask = f->mask.sip; p->dip = f->val.dip; p->sport = f->val.sport; p->dport = f->val.dport; p->vlan = f->mask.vlan ? f->val.vlan : 0xfff; p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) : FILTER_NO_VLAN_PRI; p->mac_hit = f->mac_hit; p->mac_vld = f->mac_addr_idx != 0xffff; p->mac_idx = f->mac_addr_idx; p->pkt_type = f->proto; p->report_filter_id = f->want_filter_id; p->pass = f->pass; p->rss = f->rss; p->qset = f->qset; error = set_filter(sc, f->filter_id, p); if (error == 0) p->valid = 1; break; } case CHELSIO_DEL_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); if (f->filter_id >= nfilters) return (EINVAL); p = &sc->filters[f->filter_id]; if (p->locked) return (EPERM); if (!p->valid) return (EFAULT); /* Read "Bad address" as "Bad index" */ bzero(p, sizeof(*p)); p->sip = p->sip_mask = 0xffffffff; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pkt_type = 1; error = set_filter(sc, f->filter_id, p); break; } case CHELSIO_GET_FILTER: { struct ch_filter *f = (struct ch_filter *)data; struct filter_info *p; unsigned int i, nfilters = sc->params.mc5.nfilters; if (!is_offload(sc)) return (EOPNOTSUPP); if (!(sc->flags & FULL_INIT_DONE)) return (EAGAIN); if (nfilters == 0 || sc->filters == NULL) return (EINVAL); i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1; for (; i < nfilters; i++) { p = &sc->filters[i]; if (!p->valid) continue; bzero(f, sizeof(*f)); f->filter_id = i; f->val.sip = p->sip; f->mask.sip = p->sip_mask; f->val.dip = p->dip; f->mask.dip = p->dip ? 0xffffffff : 0; f->val.sport = p->sport; f->mask.sport = p->sport ? 0xffff : 0; f->val.dport = p->dport; f->mask.dport = p->dport ? 0xffff : 0; f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan; f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff; f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : p->vlan_prio; f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ? 0 : FILTER_NO_VLAN_PRI; f->mac_hit = p->mac_hit; f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff; f->proto = p->pkt_type; f->want_filter_id = p->report_filter_id; f->pass = p->pass; f->rss = p->rss; f->qset = p->qset; break; } if (i == nfilters) f->filter_id = 0xffffffff; break; } default: return (EOPNOTSUPP); break; } return (error); } static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start, unsigned int end) { uint32_t *p = (uint32_t *)(buf + start); for ( ; start <= end; start += sizeof(uint32_t)) *p++ = t3_read_reg(ap, start); } #define T3_REGMAP_SIZE (3 * 1024) static int cxgb_get_regs_len(void) { return T3_REGMAP_SIZE; } static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf) { /* * Version scheme: * bits 0..9: chip version * bits 10..15: chip revision * bit 31: set for PCIe cards */ regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31); /* * We skip the MAC statistics registers because they are clear-on-read. * Also reading multi-register stats would need to synchronize with the * periodic mac stats accumulation. Hard to justify the complexity. */ memset(buf, 0, cxgb_get_regs_len()); reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN); reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT); reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE); reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA); reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3); reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0, XGM_REG(A_XGM_SERDES_STAT3, 1)); reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1), XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } static int alloc_filters(struct adapter *sc) { struct filter_info *p; unsigned int nfilters = sc->params.mc5.nfilters; if (nfilters == 0) return (0); p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO); sc->filters = p; p = &sc->filters[nfilters - 1]; p->vlan = 0xfff; p->vlan_prio = FILTER_NO_VLAN_PRI; p->pass = p->rss = p->valid = p->locked = 1; return (0); } static int setup_hw_filters(struct adapter *sc) { int i, rc; unsigned int nfilters = sc->params.mc5.nfilters; if (!sc->filters) return (0); t3_enable_filters(sc); for (i = rc = 0; i < nfilters && !rc; i++) { if (sc->filters[i].locked) rc = set_filter(sc, i, &sc->filters[i]); } return (rc); } static int set_filter(struct adapter *sc, int id, const struct filter_info *f) { int len; struct mbuf *m; struct ulp_txpkt *txpkt; struct work_request_hdr *wr; struct cpl_pass_open_req *oreq; struct cpl_set_tcb_field *sreq; len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq); KASSERT(len <= MHLEN, ("filter request too big for an mbuf")); id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes - sc->params.mc5.nfilters; m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); wr = mtod(m, struct work_request_hdr *); wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); oreq = (struct cpl_pass_open_req *)(wr + 1); txpkt = (struct ulp_txpkt *)oreq; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8)); OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id)); oreq->local_port = htons(f->dport); oreq->peer_port = htons(f->sport); oreq->local_ip = htonl(f->dip); oreq->peer_ip = htonl(f->sip); oreq->peer_netmask = htonl(f->sip_mask); oreq->opt0h = 0; oreq->opt0l = htonl(F_NO_OFFLOAD); oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) | V_CONN_POLICY(CPL_CONN_POLICY_FILTER) | V_VLAN_PRI(f->vlan_prio >> 1) | V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) | V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) | V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4))); sreq = (struct cpl_set_tcb_field *)(oreq + 1); set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL, (f->report_filter_id << 15) | (1 << 23) | ((u64)f->pass << 35) | ((u64)!f->rss << 36)); set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1); t3_mgmt_tx(sc, m); if (f->pass && !f->rss) { len = sizeof(*sreq); m = m_gethdr(M_WAITOK, MT_DATA); m->m_len = m->m_pkthdr.len = len; bzero(mtod(m, char *), len); sreq = mtod(m, struct cpl_set_tcb_field *); sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); mk_set_tcb_field(sreq, id, 25, 0x3f80000, (u64)sc->rrss_map[f->qset] << 19); t3_mgmt_tx(sc, m); } return 0; } static inline void mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); req->reply = V_NO_REPLY(1); req->cpu_idx = 0; req->word = htons(word); req->mask = htobe64(mask); req->val = htobe64(val); } static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, unsigned int word, u64 mask, u64 val) { struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); mk_set_tcb_field(req, tid, word, mask, val); } void t3_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; mtx_lock(&t3_list_lock); SLIST_FOREACH(sc, &t3_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } mtx_unlock(&t3_list_lock); } #ifdef TCP_OFFLOAD static int toe_capability(struct port_info *pi, int enable) { int rc; struct adapter *sc = pi->adapter; ADAPTER_LOCK_ASSERT_OWNED(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if (!(sc->flags & FULL_INIT_DONE)) { log(LOG_WARNING, "You must enable a cxgb interface first\n"); return (EAGAIN); } if (isset(&sc->offload_map, pi->port_id)) return (0); if (!(sc->flags & TOM_INIT_DONE)) { rc = t3_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t3_tom.ko before trying " "to enable TOE on a cxgb interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM activated but flag not set", __func__)); } setbit(&sc->offload_map, pi->port_id); /* * XXX: Temporary code to allow iWARP to be enabled when TOE is * enabled on any port. Need to figure out how to enable, * disable, load, and unload iWARP cleanly. */ if (!isset(&sc->offload_map, MAX_NPORTS) && t3_activate_uld(sc, ULD_IWARP) == 0) setbit(&sc->offload_map, MAX_NPORTS); } else { if (!isset(&sc->offload_map, pi->port_id)) return (0); KASSERT(sc->flags & TOM_INIT_DONE, ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t3_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t3_uld_list, ui, link); ui->refcount = 0; done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(u, &t3_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t3_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_activate_uld(struct adapter *sc, int id) { int rc = EAGAIN; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->activate(sc); if (rc == 0) ui->refcount++; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } int t3_deactivate_uld(struct adapter *sc, int id) { int rc = EINVAL; struct uld_info *ui; mtx_lock(&t3_uld_list_lock); SLIST_FOREACH(ui, &t3_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) ui->refcount--; goto done; } } done: mtx_unlock(&t3_uld_list_lock); return (rc); } static int cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused, struct mbuf *m) { m_freem(m); return (EDOOFUS); } int t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= NUM_CPL_HANDLERS) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } #endif static int cxgbc_mod_event(module_t mod, int cmd, void *arg) { int rc = 0; switch (cmd) { case MOD_LOAD: mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF); SLIST_INIT(&t3_list); #ifdef TCP_OFFLOAD mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF); SLIST_INIT(&t3_uld_list); #endif break; case MOD_UNLOAD: #ifdef TCP_OFFLOAD mtx_lock(&t3_uld_list_lock); if (!SLIST_EMPTY(&t3_uld_list)) { rc = EBUSY; mtx_unlock(&t3_uld_list_lock); break; } mtx_unlock(&t3_uld_list_lock); mtx_destroy(&t3_uld_list_lock); #endif mtx_lock(&t3_list_lock); if (!SLIST_EMPTY(&t3_list)) { rc = EBUSY; mtx_unlock(&t3_list_lock); break; } mtx_unlock(&t3_list_lock); mtx_destroy(&t3_list_lock); break; } return (rc); } Index: head/sys/dev/cxgb/cxgb_sge.c =================================================================== --- head/sys/dev/cxgb/cxgb_sge.c (revision 320527) +++ head/sys/dev/cxgb/cxgb_sge.c (revision 320528) @@ -1,3707 +1,3706 @@ /************************************************************************** Copyright (c) 2007-2009, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int txq_fills = 0; int multiq_tx_enable = 1; #ifdef TCP_OFFLOAD CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS); #endif extern struct sysctl_oid_list sysctl__hw_cxgb_children; int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, "size of per-queue mbuf ring"); static int cxgb_tx_coalesce_force = 0; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN, &cxgb_tx_coalesce_force, 0, "coalesce small packets into a single work request regardless of ring state"); #define COALESCE_START_DEFAULT TX_ETH_Q_SIZE>>1 #define COALESCE_START_MAX (TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3)) #define COALESCE_STOP_DEFAULT TX_ETH_Q_SIZE>>2 #define COALESCE_STOP_MIN TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_DEFAULT TX_ETH_Q_SIZE>>5 #define TX_RECLAIM_MAX TX_ETH_Q_SIZE>>2 #define TX_RECLAIM_MIN TX_ETH_Q_SIZE>>6 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_start, 0, "coalesce enable threshold"); static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN, &cxgb_tx_coalesce_enable_stop, 0, "coalesce disable threshold"); static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN, &cxgb_tx_reclaim_threshold, 0, "tx cleaning minimum threshold"); /* * XXX don't re-enable this until TOE stops assuming * we have an m_ext */ static int recycle_enable = 0; extern int cxgb_use_16k_clusters; extern int nmbjumbop; extern int nmbjumbo9; extern int nmbjumbo16; #define USE_GTS 0 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_RX_DROP_THRES 16 #define SGE_RX_COPY_THRES 128 /* * Period of the Tx buffer reclaim timer. This timer does not need to run * frequently as Tx buffers are usually reclaimed by new Tx packets. */ #define TX_RECLAIM_PERIOD (hz >> 1) /* * Values for sge_txq.flags */ enum { TXQ_RUNNING = 1 << 0, /* fetch engine is running */ TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */ }; struct tx_desc { uint64_t flit[TX_DESC_FLITS]; } __packed; struct rx_desc { uint32_t addr_lo; uint32_t len_gen; uint32_t gen2; uint32_t addr_hi; } __packed; struct rsp_desc { /* response queue descriptor */ struct rss_header rss_hdr; uint32_t flags; uint32_t len_cq; uint8_t imm_data[47]; uint8_t intr_gen; } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define RSPQ_NSOP_NEOP G_RSPD_SOP_EOP(0) #define RSPQ_EOP G_RSPD_SOP_EOP(F_RSPD_EOP) #define RSPQ_SOP G_RSPD_SOP_EOP(F_RSPD_SOP) #define RSPQ_SOP_EOP G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP) struct tx_sw_desc { /* SW state per Tx descriptor */ struct mbuf *m; bus_dmamap_t map; int flags; }; struct rx_sw_desc { /* SW state per Rx descriptor */ caddr_t rxsd_cl; struct mbuf *m; bus_dmamap_t map; int flags; }; struct txq_state { unsigned int compl; unsigned int gen; unsigned int pidx; }; struct refill_fl_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; /* * Maps a number of flits to the number of Tx descriptors that can hold them. * The formula is * * desc = 1 + (flits - 2) / (WR_FLITS - 1). * * HW allows up to 4 descriptors to be combined into a WR. */ static uint8_t flit_desc_map[] = { 0, #if SGE_NUM_GENBITS == 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 #elif SGE_NUM_GENBITS == 2 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, #else # error "SGE_NUM_GENBITS must be 1 or 2" #endif }; #define TXQ_LOCK_ASSERT(qs) mtx_assert(&(qs)->lock, MA_OWNED) #define TXQ_TRYLOCK(qs) mtx_trylock(&(qs)->lock) #define TXQ_LOCK(qs) mtx_lock(&(qs)->lock) #define TXQ_UNLOCK(qs) mtx_unlock(&(qs)->lock) #define TXQ_RING_EMPTY(qs) drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_NEEDS_ENQUEUE(qs) \ drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_FLUSH(qs) drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) #define TXQ_RING_DEQUEUE_COND(qs, func, arg) \ drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg) #define TXQ_RING_DEQUEUE(qs) \ drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr) int cxgb_debug = 0; static void sge_timer_cb(void *arg); static void sge_timer_reclaim(void *arg, int ncount); static void sge_txq_reclaim_handler(void *arg, int ncount); static void cxgb_start_locked(struct sge_qset *qs); /* * XXX need to cope with bursty scheduling by looking at a wider * window than we are now for determining the need for coalescing * */ static __inline uint64_t check_pkt_coalesce(struct sge_qset *qs) { struct adapter *sc; struct sge_txq *txq; uint8_t *fill; if (__predict_false(cxgb_tx_coalesce_force)) return (1); txq = &qs->txq[TXQ_ETH]; sc = qs->port->adapter; fill = &sc->tunq_fill[qs->idx]; if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX) cxgb_tx_coalesce_enable_start = COALESCE_START_MAX; if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN) cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN; /* * if the hardware transmit queue is more than 1/8 full * we mark it as coalescing - we drop back from coalescing * when we go below 1/32 full and there are no packets enqueued, * this provides us with some degree of hysteresis */ if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs) && (qs->coalescing == 0)) *fill = 0; else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) *fill = 1; return (sc->tunq_coalesce); } #ifdef __LP64__ static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { uint64_t wr_hilo; #if _BYTE_ORDER == _LITTLE_ENDIAN wr_hilo = wr_hi; wr_hilo |= (((uint64_t)wr_lo)<<32); #else wr_hilo = wr_lo; wr_hilo |= (((uint64_t)wr_hi)<<32); #endif wrp->wrh_hilo = wr_hilo; } #else static void set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo) { wrp->wrh_hi = wr_hi; wmb(); wrp->wrh_lo = wr_lo; } #endif struct coalesce_info { int count; int nbytes; }; static int coalesce_check(struct mbuf *m, void *arg) { struct coalesce_info *ci = arg; int *count = &ci->count; int *nbytes = &ci->nbytes; if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) && (*count < 7) && (m->m_next == NULL))) { *count += 1; *nbytes += m->m_len; return (1); } return (0); } static struct mbuf * cxgb_dequeue(struct sge_qset *qs) { struct mbuf *m, *m_head, *m_tail; struct coalesce_info ci; if (check_pkt_coalesce(qs) == 0) return TXQ_RING_DEQUEUE(qs); m_head = m_tail = NULL; ci.count = ci.nbytes = 0; do { m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci); if (m_head == NULL) { m_tail = m_head = m; } else if (m != NULL) { m_tail->m_nextpkt = m; m_tail = m; } } while (m != NULL); if (ci.count > 7) panic("trying to coalesce %d packets in to one WR", ci.count); return (m_head); } /** * reclaim_completed_tx - reclaims completed Tx descriptors * @adapter: the adapter * @q: the Tx queue to reclaim completed descriptors from * * Reclaims Tx descriptors that the SGE has indicated it has processed, * and frees the associated buffers if possible. Called with the Tx * queue's lock held. */ static __inline int reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue) { struct sge_txq *q = &qs->txq[queue]; int reclaim = desc_reclaimable(q); if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) || (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN)) cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT; if (reclaim < reclaim_min) return (0); mtx_assert(&qs->lock, MA_OWNED); if (reclaim > 0) { t3_free_tx_desc(qs, reclaim, queue); q->cleaned += reclaim; q->in_use -= reclaim; } if (isset(&qs->txq_stopped, TXQ_ETH)) clrbit(&qs->txq_stopped, TXQ_ETH); return (reclaim); } /** * should_restart_tx - are there enough resources to restart a Tx queue? * @q: the Tx queue * * Checks if there are enough descriptors to restart a suspended Tx queue. */ static __inline int should_restart_tx(const struct sge_txq *q) { unsigned int r = q->processed - q->cleaned; return q->in_use - r < (q->size >> 1); } /** * t3_sge_init - initialize SGE * @adap: the adapter * @p: the SGE parameters * * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queue sets here, instead the driver * top-level must request those individually. We also do not enable DMA * here, that should be done after the queues have been set up. */ void t3_sge_init(adapter_t *adap, struct sge_params *p) { u_int ctrl, ups; ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */ ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL | F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN | V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS | V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING; #if SGE_NUM_GENBITS == 1 ctrl |= F_EGRGENCTRL; #endif if (adap->params.rev > 0) { if (!(adap->flags & (USING_MSIX | USING_MSI))) ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ; } t3_write_reg(adap, A_SG_CONTROL, ctrl); t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) | V_LORCQDRBTHRSH(512)); t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10); t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) | V_TIMEOUT(200 * core_ticks_per_usec(adap))); t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, adap->params.rev < T3_REV_C ? 1000 : 500); t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000); t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256); t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff)); t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024); } /** * sgl_len - calculates the size of an SGL of the given capacity * @n: the number of SGL entries * * Calculates the number of flits needed for a scatter/gather list that * can hold the given number of entries. */ static __inline unsigned int sgl_len(unsigned int n) { return ((3 * n) / 2 + (n & 1)); } /** * get_imm_packet - return the next ingress packet buffer from a response * @resp: the response descriptor containing the packet data * * Return a packet containing the immediate data of the given response. */ static int get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m) { if (resp->rss_hdr.opcode == CPL_RX_DATA) { const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else if (resp->rss_hdr.opcode == CPL_RX_PKT) { const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0]; m->m_len = sizeof(*cpl) + ntohs(cpl->len); } else m->m_len = IMMED_PKT_SIZE; m->m_ext.ext_buf = NULL; m->m_ext.ext_type = 0; memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len); return (0); } static __inline u_int flits_to_desc(u_int n) { return (flit_desc_map[n]); } #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \ F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \ V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \ F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \ F_HIRCQPARITYERROR) #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR) #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \ F_RSPQDISABLED) /** * t3_sge_err_intr_handler - SGE async event interrupt handler * @adapter: the adapter * * Interrupt handler for SGE asynchronous (non-data) events. */ void t3_sge_err_intr_handler(adapter_t *adapter) { unsigned int v, status; status = t3_read_reg(adapter, A_SG_INT_CAUSE); if (status & SGE_PARERR) CH_ALERT(adapter, "SGE parity error (0x%x)\n", status & SGE_PARERR); if (status & SGE_FRAMINGERR) CH_ALERT(adapter, "SGE framing error (0x%x)\n", status & SGE_FRAMINGERR); if (status & F_RSPQCREDITOVERFOW) CH_ALERT(adapter, "SGE response queue credit overflow\n"); if (status & F_RSPQDISABLED) { v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS); CH_ALERT(adapter, "packet delivered to disabled response queue (0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff); } t3_write_reg(adapter, A_SG_INT_CAUSE, status); if (status & SGE_FATALERR) t3_fatal_err(adapter); } void t3_sge_prep(adapter_t *adap, struct sge_params *p) { int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size; nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus); nqsets *= adap->params.nports; fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE); while (!powerof2(fl_q_size)) fl_q_size--; use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters : is_offload(adap); #if __FreeBSD_version >= 700111 if (use_16k) { jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM16BYTES; } else { jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUM9BYTES; } #else jumbo_q_size = min(nmbjumbop/(3*nqsets), JUMBO_Q_SIZE); jumbo_buf_size = MJUMPAGESIZE; #endif while (!powerof2(jumbo_q_size)) jumbo_q_size--; if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2)) device_printf(adap->dev, "Insufficient clusters and/or jumbo buffers.\n"); p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data); for (i = 0; i < SGE_QSETS; ++i) { struct qset_params *q = p->qset + i; if (adap->params.nports > 2) { q->coalesce_usecs = 50; } else { #ifdef INVARIANTS q->coalesce_usecs = 10; #else q->coalesce_usecs = 5; #endif } q->polling = 0; q->rspq_size = RSPQ_Q_SIZE; q->fl_size = fl_q_size; q->jumbo_size = jumbo_q_size; q->jumbo_buf_size = jumbo_buf_size; q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE; q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16; q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE; q->cong_thres = 0; } } int t3_sge_alloc(adapter_t *sc) { /* The parent tag. */ if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ BUS_SPACE_UNRESTRICTED, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lock, lockarg */ &sc->parent_dmat)) { device_printf(sc->dev, "Cannot allocate parent DMA tag\n"); return (ENOMEM); } /* * DMA tag for normal sized RX frames */ if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) { device_printf(sc->dev, "Cannot allocate RX DMA tag\n"); return (ENOMEM); } /* * DMA tag for jumbo sized RX frames. */ if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) { device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n"); return (ENOMEM); } /* * DMA tag for TX frames. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->tx_dmat)) { device_printf(sc->dev, "Cannot allocate TX DMA tag\n"); return (ENOMEM); } return (0); } int t3_sge_free(struct adapter * sc) { if (sc->tx_dmat != NULL) bus_dma_tag_destroy(sc->tx_dmat); if (sc->rx_jumbo_dmat != NULL) bus_dma_tag_destroy(sc->rx_jumbo_dmat); if (sc->rx_dmat != NULL) bus_dma_tag_destroy(sc->rx_dmat); if (sc->parent_dmat != NULL) bus_dma_tag_destroy(sc->parent_dmat); return (0); } void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U); qs->rspq.polling = 0 /* p->polling */; } #if !defined(__i386__) && !defined(__amd64__) static void refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct refill_fl_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #endif /** * refill_fl - refill an SGE free-buffer list * @sc: the controller softc * @q: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an SGE free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void refill_fl(adapter_t *sc, struct sge_fl *q, int n) { struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; struct refill_fl_cb_arg cb_arg; struct mbuf *m; caddr_t cl; int err; cb_arg.error = 0; while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. */ if (q->zone == zone_pack) { if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL) break; cl = m->m_ext.ext_buf; } else { if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL) break; if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { uma_zfree(q->zone, cl); break; } } if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) { if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(q->zone, cl); goto done; } sd->flags |= RX_SW_DESC_MAP_CREATED; } #if !defined(__i386__) && !defined(__amd64__) err = bus_dmamap_load(q->entry_tag, sd->map, cl, q->buf_size, refill_fl_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { if (q->zone != zone_pack) uma_zfree(q->zone, cl); m_free(m); goto done; } #else cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl); #endif sd->flags |= RX_SW_DESC_INUSE; sd->rxsd_cl = cl; sd->m = m; d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff); d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff); d->len_gen = htobe32(V_FLD_GEN1(q->gen)); d->gen2 = htobe32(V_FLD_GEN2(q->gen)); d++; sd++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; sd = q->sdesc; d = q->desc; } q->credits++; q->db_pending++; } done: if (q->db_pending >= 32) { q->db_pending = 0; t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } } /** * free_rx_bufs - free the Rx buffers on an SGE free list * @sc: the controle softc * @q: the SGE free list to clean up * * Release the buffers on an SGE free-buffer Rx queue. HW fetching from * this queue should be stopped before calling this function. */ static void free_rx_bufs(adapter_t *sc, struct sge_fl *q) { u_int cidx = q->cidx; while (q->credits--) { struct rx_sw_desc *d = &q->sdesc[cidx]; if (d->flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(q->entry_tag, d->map); bus_dmamap_destroy(q->entry_tag, d->map); if (q->zone == zone_pack) { m_init(d->m, M_NOWAIT, MT_DATA, M_EXT); uma_zfree(zone_pack, d->m); } else { m_init(d->m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, d->m); uma_zfree(q->zone, d->rxsd_cl); } } d->rxsd_cl = NULL; d->m = NULL; if (++cidx == q->size) cidx = 0; } } static __inline void __refill_fl(adapter_t *adap, struct sge_fl *fl) { refill_fl(adap, fl, min(16U, fl->size - fl->credits)); } static __inline void __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max) { uint32_t reclaimable = fl->size - fl->credits; if (reclaimable > 0) refill_fl(adap, fl, min(max, reclaimable)); } /** * recycle_rx_buf - recycle a receive buffer * @adapter: the adapter * @q: the SGE free list * @idx: index of buffer to recycle * * Recycles the specified buffer on the given free list by adding it at * the next available slot on the list. */ static void recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx) { struct rx_desc *from = &q->desc[idx]; struct rx_desc *to = &q->desc[q->pidx]; q->sdesc[q->pidx] = q->sdesc[idx]; to->addr_lo = from->addr_lo; // already big endian to->addr_hi = from->addr_hi; // likewise wmb(); /* necessary ? */ to->len_gen = htobe32(V_FLD_GEN1(q->gen)); to->gen2 = htobe32(V_FLD_GEN2(q->gen)); q->credits++; if (++q->pidx == q->size) { q->pidx = 0; q->gen ^= 1; } t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); } static void alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { uint32_t *addr; addr = arg; *addr = segs[0].ds_addr; } static int alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size, bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag, bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag) { size_t len = nelem * elem_size; void *s = NULL; void *p = NULL; int err; if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor tag\n"); return (ENOMEM); } if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT, map)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor memory\n"); return (ENOMEM); } bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0); bzero(p, len); *(void **)desc = p; if (sw_size) { len = nelem * sw_size; s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO); *(void **)sdesc = s; } if (parent_entry_tag == NULL) return (0); if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS, TX_MAX_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, entry_tag)) != 0) { device_printf(sc->dev, "Cannot allocate descriptor entry tag\n"); return (ENOMEM); } return (0); } static void sge_slow_intr_handler(void *arg, int ncount) { adapter_t *sc = arg; t3_slow_intr_handler(sc); t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask); (void) t3_read_reg(sc, A_PL_INT_ENABLE0); } /** * sge_timer_cb - perform periodic maintenance of an SGE qset * @data: the SGE queue set to maintain * * Runs periodically from a timer to perform maintenance of an SGE queue * set. It performs two tasks: * * a) Cleans up any completed Tx descriptors that may still be pending. * Normal descriptor cleanup happens when new packets are added to a Tx * queue so this timer is relatively infrequent and does any cleanup only * if the Tx queue has not seen any new packets in a while. We make a * best effort attempt to reclaim descriptors, in that we don't wait * around if we cannot get a queue's lock (which most likely is because * someone else is queueing new packets and so will also handle the clean * up). Since control queues use immediate data exclusively we don't * bother cleaning them up here. * * b) Replenishes Rx queues that have run out due to memory shortage. * Normally new Rx buffers are added when existing ones are consumed but * when out of memory a queue can become empty. We try to add only a few * buffers here, the queue will be replenished fully as these new buffers * are used up if memory shortage has subsided. * * c) Return coalesced response queue credits in case a response queue is * starved. * * d) Ring doorbells for T304 tunnel queues since we have seen doorbell * fifo overflows and the FW doesn't implement any recovery scheme yet. */ static void sge_timer_cb(void *arg) { adapter_t *sc = arg; if ((sc->flags & USING_MSIX) == 0) { struct port_info *pi; struct sge_qset *qs; struct sge_txq *txq; int i, j; int reclaim_ofl, refill_rx; if (sc->open_device_map == 0) return; for (i = 0; i < sc->params.nports; i++) { pi = &sc->port[i]; for (j = 0; j < pi->nqsets; j++) { qs = &sc->sge.qs[pi->first_qset + j]; txq = &qs->txq[0]; reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned; refill_rx = ((qs->fl[0].credits < qs->fl[0].size) || (qs->fl[1].credits < qs->fl[1].size)); if (reclaim_ofl || refill_rx) { taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task); break; } } } } if (sc->params.nports > 2) { int i; for_each_port(sc, i) { struct port_info *pi = &sc->port[i]; t3_write_reg(sc, A_SG_KDOORBELL, F_SELEGRCNTX | (FW_TUNNEL_SGEEC_START + pi->first_qset)); } } if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) && sc->open_device_map != 0) callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); } /* * This is meant to be a catch-all function to keep sge state private * to sge.c * */ int t3_sge_init_adapter(adapter_t *sc) { callout_init(&sc->sge_timer_ch, 1); callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc); return (0); } int t3_sge_reset_adapter(adapter_t *sc) { callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc); return (0); } int t3_sge_init_port(struct port_info *pi) { TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi); return (0); } /** * refill_rspq - replenish an SGE response queue * @adapter: the adapter * @q: the response queue to replenish * @credits: how many new responses to make available * * Replenishes a response queue by making the supplied number of responses * available to HW. */ static __inline void refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits) { /* mbufs are allocated on demand when a rspq entry is processed. */ t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN, V_RSPQ(q->cntxt_id) | V_CREDITS(credits)); } static void sge_txq_reclaim_handler(void *arg, int ncount) { struct sge_qset *qs = arg; int i; for (i = 0; i < 3; i++) reclaim_completed_tx(qs, 16, i); } static void sge_timer_reclaim(void *arg, int ncount) { struct port_info *pi = arg; int i, nqsets = pi->nqsets; adapter_t *sc = pi->adapter; struct sge_qset *qs; struct mtx *lock; KASSERT((sc->flags & USING_MSIX) == 0, ("can't call timer reclaim for msi-x")); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[pi->first_qset + i]; reclaim_completed_tx(qs, 16, TXQ_OFLD); lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; if (mtx_trylock(lock)) { /* XXX currently assume that we are *NOT* polling */ uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size - 16) __refill_fl(sc, &qs->fl[0]); if (qs->fl[1].credits < qs->fl[1].size - 16) __refill_fl(sc, &qs->fl[1]); if (status & (1 << qs->rspq.cntxt_id)) { if (qs->rspq.credits) { refill_rspq(sc, &qs->rspq, 1); qs->rspq.credits--; t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, 1 << qs->rspq.cntxt_id); } } mtx_unlock(lock); } } } /** * init_qset_cntxt - initialize an SGE queue set context info * @qs: the queue set * @id: the queue set id * * Initializes the TIDs and context ids for the queues of a queue set. */ static void init_qset_cntxt(struct sge_qset *qs, u_int id) { qs->rspq.cntxt_id = id; qs->fl[0].cntxt_id = 2 * id; qs->fl[1].cntxt_id = 2 * id + 1; qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id; qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id; qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id; qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id; qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id; /* XXX: a sane limit is needed instead of INT_MAX */ mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX); mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX); } static void txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs) { txq->in_use += ndesc; /* * XXX we don't handle stopping of queue * presumably start handles this when we bump against the end */ txqs->gen = txq->gen; txq->unacked += ndesc; txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5); txq->unacked &= 31; txqs->pidx = txq->pidx; txq->pidx += ndesc; #ifdef INVARIANTS if (((txqs->pidx > txq->cidx) && (txq->pidx < txqs->pidx) && (txq->pidx >= txq->cidx)) || ((txqs->pidx < txq->cidx) && (txq->pidx >= txq-> cidx)) || ((txqs->pidx < txq->cidx) && (txq->cidx < txqs->pidx))) panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d", txqs->pidx, txq->pidx, txq->cidx); #endif if (txq->pidx >= txq->size) { txq->pidx -= txq->size; txq->gen ^= 1; } } /** * calc_tx_descs - calculate the number of Tx descriptors for a packet * @m: the packet mbufs * @nsegs: the number of segments * * Returns the number of Tx descriptors needed for the given Ethernet * packet. Ethernet packets require addition of WR and CPL headers. */ static __inline unsigned int calc_tx_descs(const struct mbuf *m, int nsegs) { unsigned int flits; if (m->m_pkthdr.len <= PIO_LEN) return 1; flits = sgl_len(nsegs) + 2; if (m->m_pkthdr.csum_flags & CSUM_TSO) flits++; return flits_to_desc(flits); } /** * make_sgl - populate a scatter/gather list for a packet * @sgp: the SGL to populate * @segs: the packet dma segments * @nsegs: the number of segments * * Generates a scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ static __inline void make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs) { int i, idx; for (idx = 0, i = 0; i < nsegs; i++) { /* * firmware doesn't like empty segments */ if (segs[i].ds_len == 0) continue; if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ds_len); sgp->addr[idx] = htobe64(segs[i].ds_addr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } } /** * check_ring_tx_db - check and potentially ring a Tx queue's doorbell * @adap: the adapter * @q: the Tx queue * * Ring the doorbell if a Tx queue is asleep. There is a natural race, * where the HW is going to sleep just after we checked, however, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. * * When GTS is disabled we unconditionally ring the doorbell. */ static __inline void check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring) { #if USE_GTS clear_bit(TXQ_LAST_PKT_DB, &q->flags); if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) { set_bit(TXQ_LAST_PKT_DB, &q->flags); #ifdef T3_TRACE T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d", q->cntxt_id); #endif t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } #else if (mustring || ++q->db_pending >= 32) { wmb(); /* write descriptors before telling HW */ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); q->db_pending = 0; } #endif } static __inline void wr_gen2(struct tx_desc *d, unsigned int gen) { #if SGE_NUM_GENBITS == 2 d->flit[TX_DESC_FLITS - 1] = htobe64(gen); #endif } /** * write_wr_hdr_sgl - write a WR header and, optionally, SGL * @ndesc: number of Tx descriptors spanned by the SGL * @txd: first Tx descriptor to be written * @txqs: txq state (generation and producer index) * @txq: the SGE Tx queue * @sgl: the SGL * @flits: number of flits to the start of the SGL in the first descriptor * @sgl_flits: the SGL size in flits * @wr_hi: top 32 bits of WR header based on WR type (big endian) * @wr_lo: low 32 bits of WR header based on WR type (big endian) * * Write a work request header and an associated SGL. If the SGL is * small enough to fit into one Tx descriptor it has already been written * and we just need to write the WR header. Otherwise we distribute the * SGL across the number of descriptors it spans. */ static void write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs, const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits, unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo) { struct work_request_hdr *wrp = (struct work_request_hdr *)txd; struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx]; if (__predict_true(ndesc == 1)) { set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi, htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) | wr_lo); wr_gen2(txd, txqs->gen); } else { unsigned int ogen = txqs->gen; const uint64_t *fp = (const uint64_t *)sgl; struct work_request_hdr *wp = wrp; wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | wr_hi; while (sgl_flits) { unsigned int avail = WR_FLITS - flits; if (avail > sgl_flits) avail = sgl_flits; memcpy(&txd->flit[flits], fp, avail * sizeof(*fp)); sgl_flits -= avail; ndesc--; if (!sgl_flits) break; fp += avail; txd++; txsd++; if (++txqs->pidx == txq->size) { txqs->pidx = 0; txqs->gen ^= 1; txd = txq->desc; txsd = txq->sdesc; } /* * when the head of the mbuf chain * is freed all clusters will be freed * with it */ wrp = (struct work_request_hdr *)txd; wrp->wrh_hi = htonl(V_WR_DATATYPE(1) | V_WR_SGLSFLT(1)) | wr_hi; wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS, sgl_flits + 1)) | V_WR_GEN(txqs->gen)) | wr_lo; wr_gen2(txd, txqs->gen); flits = 1; } wrp->wrh_hi |= htonl(F_WR_EOP); wmb(); wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo; wr_gen2((struct tx_desc *)wp, ogen); } } /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */ #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20) #define GET_VTAG(cntrl, m) \ do { \ if ((m)->m_flags & M_VLANTAG) \ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \ } while (0) static int t3_encap(struct sge_qset *qs, struct mbuf **m) { adapter_t *sc; struct mbuf *m0; struct sge_txq *txq; struct txq_state txqs; struct port_info *pi; unsigned int ndesc, flits, cntrl, mlen; int err, nsegs, tso_info = 0; struct work_request_hdr *wrp; struct tx_sw_desc *txsd; struct sg_ent *sgp, *sgl; uint32_t wr_hi, wr_lo, sgl_flits; bus_dma_segment_t segs[TX_MAX_SEGS]; struct tx_desc *txd; pi = qs->port; sc = pi->adapter; txq = &qs->txq[TXQ_ETH]; txd = &txq->desc[txq->pidx]; txsd = &txq->sdesc[txq->pidx]; sgl = txq->txq_sgl; prefetch(txd); m0 = *m; mtx_assert(&qs->lock, MA_OWNED); cntrl = V_TXPKT_INTF(pi->txpkt_intf); KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n")); if (m0->m_nextpkt == NULL && m0->m_next != NULL && m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); if (m0->m_nextpkt != NULL) { busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs); ndesc = 1; mlen = 0; } else { if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map, &m0, segs, &nsegs))) { if (cxgb_debug) printf("failed ... err=%d\n", err); return (err); } mlen = m0->m_pkthdr.len; ndesc = calc_tx_descs(m0, nsegs); } txq_prod(txq, ndesc, &txqs); KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs)); txsd->m = m0; if (m0->m_nextpkt != NULL) { struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd; int i, fidx; if (nsegs > 7) panic("trying to coalesce %d packets in to one WR", nsegs); txq->txq_coalesced += nsegs; wrp = (struct work_request_hdr *)txd; flits = nsegs*2 + 1; for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) { struct cpl_tx_pkt_batch_entry *cbe; uint64_t flit; uint32_t *hflit = (uint32_t *)&flit; int cflags = m0->m_pkthdr.csum_flags; cntrl = V_TXPKT_INTF(pi->txpkt_intf); GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(cflags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; hflit[0] = htonl(cntrl); hflit[1] = htonl(segs[i].ds_len | 0x80000000); flit |= htobe64(1 << 24); cbe = &cpl_batch->pkt_entry[i]; cbe->cntrl = hflit[0]; cbe->len = hflit[1]; cbe->addr = htobe64(segs[i].ds_addr); } wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) | V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token)); set_wr_hdr(wrp, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); return (0); } else if (tso_info) { uint16_t eth_type; struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd; struct ether_header *eh; void *l3hdr; struct tcphdr *tcp; txd->flit[2] = 0; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); hdr->len = htonl(mlen | 0x80000000); if (__predict_false(mlen < TCPPKTHDRSIZE)) { printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x", m0, mlen, m0->m_pkthdr.tso_segsz, (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags); panic("tx tso packet too small"); } /* Make sure that ether, ip, tcp headers are all in m0 */ if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) { m0 = m_pullup(m0, TCPPKTHDRSIZE); if (__predict_false(m0 == NULL)) { /* XXX panic probably an overreaction */ panic("couldn't fit header into mbuf"); } } eh = mtod(m0, struct ether_header *); eth_type = eh->ether_type; if (eth_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = (void *)eh; tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN); l3hdr = evh + 1; eth_type = evh->evl_proto; } else { tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II); l3hdr = eh + 1; } if (eth_type == htons(ETHERTYPE_IP)) { struct ip *ip = l3hdr; tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl); tcp = (struct tcphdr *)(ip + 1); } else if (eth_type == htons(ETHERTYPE_IPV6)) { struct ip6_hdr *ip6 = l3hdr; KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO with ip6_nxt %d", __func__, ip6->ip6_nxt)); tso_info |= F_LSO_IPV6; tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2); tcp = (struct tcphdr *)(ip6 + 1); } else panic("%s: CSUM_TSO but neither ip nor ip6", __func__); tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); if (__predict_false(mlen <= PIO_LEN)) { /* * pkt not undersized but fits in PIO_LEN * Indicates a TSO bug at the higher levels. */ txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]); flits = (mlen + 7) / 8 + 3; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&hdr->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 3; } else { struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd; GET_VTAG(cntrl, m0); cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP))) cntrl |= F_TXPKT_IPCSUM_DIS; if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6)))) cntrl |= F_TXPKT_L4CSUM_DIS; cpl->cntrl = htonl(cntrl); cpl->len = htonl(mlen | 0x80000000); if (mlen <= PIO_LEN) { txsd->m = NULL; m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); flits = (mlen + 7) / 8 + 2; wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | F_WR_SOP | F_WR_EOP | txqs.compl); wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(txqs.gen) | V_WR_TID(txq->token)); set_wr_hdr(&cpl->wr, wr_hi, wr_lo); wmb(); ETHER_BPF_MTAP(pi->ifp, m0); wr_gen2(txd, txqs.gen); check_ring_tx_db(sc, txq, 0); m_freem(m0); return (0); } flits = 2; } wrp = (struct work_request_hdr *)txd; sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl; make_sgl(sgp, segs, nsegs); sgl_flits = sgl_len(nsegs); ETHER_BPF_MTAP(pi->ifp, m0); KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc)); wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl); wr_lo = htonl(V_WR_TID(txq->token)); write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo); check_ring_tx_db(sc, txq, 0); return (0); } void cxgb_tx_watchdog(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) && TXQ_RING_EMPTY(qs)) qs->coalescing = 0; else if (qs->coalescing == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start)) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_FLUSHING; cxgb_start_locked(qs); qs->qs_flags &= ~QS_FLUSHING; TXQ_UNLOCK(qs); } if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog, qs, txq->txq_watchdog.c_cpu); } static void cxgb_tx_timeout(void *arg) { struct sge_qset *qs = arg; struct sge_txq *txq = &qs->txq[TXQ_ETH]; if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3))) qs->coalescing = 1; if (TXQ_TRYLOCK(qs)) { qs->qs_flags |= QS_TIMEOUT; cxgb_start_locked(qs); qs->qs_flags &= ~QS_TIMEOUT; TXQ_UNLOCK(qs); } } static void cxgb_start_locked(struct sge_qset *qs) { struct mbuf *m_head = NULL; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct port_info *pi = qs->port; struct ifnet *ifp = pi->ifp; if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT)) reclaim_completed_tx(qs, 0, TXQ_ETH); if (!pi->link_config.link_ok) { TXQ_RING_FLUSH(qs); return; } TXQ_LOCK_ASSERT(qs); while (!TXQ_RING_EMPTY(qs) && (ifp->if_drv_flags & IFF_DRV_RUNNING) && pi->link_config.link_ok) { reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (txq->size - txq->in_use <= TX_MAX_DESC) break; if ((m_head = cxgb_dequeue(qs)) == NULL) break; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. */ if (t3_encap(qs, &m_head) || m_head == NULL) break; m_head = NULL; } if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 && pi->link_config.link_ok) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); if (m_head != NULL) m_freem(m_head); } static int cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m) { struct port_info *pi = qs->port; struct sge_txq *txq = &qs->txq[TXQ_ETH]; struct buf_ring *br = txq->txq_mr; int error, avail; avail = txq->size - txq->in_use; TXQ_LOCK_ASSERT(qs); /* * We can only do a direct transmit if the following are true: * - we aren't coalescing (ring < 3/4 full) * - the link is up -- checked in caller * - there are no packets enqueued already * - there is space in hardware transmit queue */ if (check_pkt_coalesce(qs) == 0 && !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) { if (t3_encap(qs, &m)) { if (m != NULL && (error = drbr_enqueue(ifp, br, m)) != 0) return (error); } else { if (txq->db_pending) check_ring_tx_db(pi->adapter, txq, 1); /* * We've bypassed the buf ring so we need to update * the stats directly */ txq->txq_direct_packets++; txq->txq_direct_bytes += m->m_pkthdr.len; } } else if ((error = drbr_enqueue(ifp, br, m)) != 0) return (error); reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH); if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok && (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7))) cxgb_start_locked(qs); else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer)) callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout, qs, txq->txq_timer.c_cpu); return (0); } int cxgb_transmit(struct ifnet *ifp, struct mbuf *m) { struct sge_qset *qs; struct port_info *pi = ifp->if_softc; int error, qidx = pi->first_qset; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||(!pi->link_config.link_ok)) { m_freem(m); return (0); } /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset; qs = &pi->adapter->sge.qs[qidx]; if (TXQ_TRYLOCK(qs)) { /* XXX running */ error = cxgb_transmit_locked(ifp, qs, m); TXQ_UNLOCK(qs); } else error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m); return (error); } void cxgb_qflush(struct ifnet *ifp) { /* * flush any enqueued mbufs in the buf_rings * and in the transmit queues * no-op for now */ return; } /** * write_imm - write a packet into a Tx descriptor as immediate data * @d: the Tx descriptor to write * @m: the packet * @len: the length of packet data to write as immediate data * @gen: the generation bit value to write * * Writes a packet as immediate data into a Tx descriptor. The packet * contains a work request at its beginning. We must write the packet * carefully so the SGE doesn't read accidentally before it's written in * its entirety. */ static __inline void write_imm(struct tx_desc *d, caddr_t src, unsigned int len, unsigned int gen) { struct work_request_hdr *from = (struct work_request_hdr *)src; struct work_request_hdr *to = (struct work_request_hdr *)d; uint32_t wr_hi, wr_lo; KASSERT(len <= WR_LEN && len >= sizeof(*from), ("%s: invalid len %d", __func__, len)); memcpy(&to[1], &from[1], len - sizeof(*from)); wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP | V_WR_BCNTLFLT(len & 7)); wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8)); set_wr_hdr(to, wr_hi, wr_lo); wmb(); wr_gen2(d, gen); } /** * check_desc_avail - check descriptor availability on a send queue * @adap: the adapter * @q: the TX queue * @m: the packet needing the descriptors * @ndesc: the number of Tx descriptors needed * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL) * * Checks if the requested number of Tx descriptors is available on an * SGE send queue. If the queue is already suspended or not enough * descriptors are available the packet is queued for later transmission. * Must be called with the Tx queue locked. * * Returns 0 if enough descriptors are available, 1 if there aren't * enough descriptors and the packet has been queued, and 2 if the caller * needs to retry because there weren't enough descriptors at the * beginning of the call but some freed up in the mean time. */ static __inline int check_desc_avail(adapter_t *adap, struct sge_txq *q, struct mbuf *m, unsigned int ndesc, unsigned int qid) { /* * XXX We currently only use this for checking the control queue * the control queue is only used for binding qsets which happens * at init time so we are guaranteed enough descriptors */ if (__predict_false(mbufq_len(&q->sendq))) { addq_exit: (void )mbufq_enqueue(&q->sendq, m); return 1; } if (__predict_false(q->size - q->in_use < ndesc)) { struct sge_qset *qs = txq_to_qset(q, qid); setbit(&qs->txq_stopped, qid); if (should_restart_tx(q) && test_and_clear_bit(qid, &qs->txq_stopped)) return 2; q->stops++; goto addq_exit; } return 0; } /** * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs * @q: the SGE control Tx queue * * This is a variant of reclaim_completed_tx() that is used for Tx queues * that send only immediate data (presently just the control queues) and * thus do not have any mbufs */ static __inline void reclaim_completed_tx_imm(struct sge_txq *q) { unsigned int reclaim = q->processed - q->cleaned; q->in_use -= reclaim; q->cleaned += reclaim; } /** * ctrl_xmit - send a packet through an SGE control Tx queue * @adap: the adapter * @q: the control queue * @m: the packet * * Send a packet through an SGE control Tx queue. Packets sent through * a control queue must fit entirely as immediate data in a single Tx * descriptor and have no page fragments. */ static int ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *); struct sge_txq *q = &qs->txq[TXQ_CTRL]; KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__)); wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP); wrp->wrh_lo = htonl(V_WR_TID(q->token)); TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (ENOSPC); } goto again; } write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); q->in_use++; if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); m_free(m); return (0); } /** * restart_ctrlq - restart a suspended control queue * @qs: the queue set cotaining the control queue * * Resumes transmission on a suspended Tx control queue. */ static void restart_ctrlq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; adapter_t *adap = qs->port->adapter; TXQ_LOCK(qs); again: reclaim_completed_tx_imm(q); while (q->in_use < q->size && (m = mbufq_dequeue(&q->sendq)) != NULL) { write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen); m_free(m); if (++q->pidx >= q->size) { q->pidx = 0; q->gen ^= 1; } q->in_use++; } if (mbufq_len(&q->sendq)) { setbit(&qs->txq_stopped, TXQ_CTRL); if (should_restart_tx(q) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) goto again; q->stops++; } TXQ_UNLOCK(qs); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /* * Send a management message through control queue 0 */ int t3_mgmt_tx(struct adapter *adap, struct mbuf *m) { return ctrl_xmit(adap, &adap->sge.qs[0], m); } /** * free_qset - free the resources of an SGE queue set * @sc: the controller owning the queue set * @q: the queue set * * Release the HW and SW resources associated with an SGE queue set, such * as HW contexts, packet buffers, and descriptor rings. Traffic to the * queue set must be quiesced prior to calling this. */ static void t3_free_qset(adapter_t *sc, struct sge_qset *q) { int i; reclaim_completed_tx(q, 0, TXQ_ETH); if (q->txq[TXQ_ETH].txq_mr != NULL) buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF); if (q->txq[TXQ_ETH].txq_ifq != NULL) { ifq_delete(q->txq[TXQ_ETH].txq_ifq); free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF); } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { if (q->fl[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_fl(sc, q->fl[i].cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map); bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc, q->fl[i].desc_map); bus_dma_tag_destroy(q->fl[i].desc_tag); bus_dma_tag_destroy(q->fl[i].entry_tag); } if (q->fl[i].sdesc) { free_rx_bufs(sc, &q->fl[i]); free(q->fl[i].sdesc, M_DEVBUF); } } mtx_unlock(&q->lock); MTX_DESTROY(&q->lock); for (i = 0; i < SGE_TXQ_PER_SET; i++) { if (q->txq[i].desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->txq[i].desc_tag, q->txq[i].desc_map); bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc, q->txq[i].desc_map); bus_dma_tag_destroy(q->txq[i].desc_tag); bus_dma_tag_destroy(q->txq[i].entry_tag); } if (q->txq[i].sdesc) { free(q->txq[i].sdesc, M_DEVBUF); } } if (q->rspq.desc) { mtx_lock_spin(&sc->sge.reg_lock); t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id); mtx_unlock_spin(&sc->sge.reg_lock); bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map); bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc, q->rspq.desc_map); bus_dma_tag_destroy(q->rspq.desc_tag); MTX_DESTROY(&q->rspq.lock); } #if defined(INET6) || defined(INET) tcp_lro_free(&q->lro.ctrl); #endif bzero(q, sizeof(*q)); } /** * t3_free_sge_resources - free SGE resources * @sc: the adapter softc * * Frees resources used by the SGE queue sets. */ void t3_free_sge_resources(adapter_t *sc, int nqsets) { int i; for (i = 0; i < nqsets; ++i) { TXQ_LOCK(&sc->sge.qs[i]); t3_free_qset(sc, &sc->sge.qs[i]); } } /** * t3_sge_start - enable SGE * @sc: the controller softc * * Enables the SGE for DMAs. This is the last step in starting packet * transfers. */ void t3_sge_start(adapter_t *sc) { t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE); } /** * t3_sge_stop - disable SGE operation * @sc: the adapter * * Disables the DMA engine. This can be called in emeregencies (e.g., * from error interrupts) or from normal process context. In the latter * case it also disables any pending queue restart tasklets. Note that * if it is called in interrupt context it cannot disable the restart * tasklets as it cannot wait, however the tasklets will have no effect * since the doorbells are disabled and the driver will call this again * later from process context, at which time the tasklets will be stopped * if they are still running. */ void t3_sge_stop(adapter_t *sc) { int i, nqsets; t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0); if (sc->tq == NULL) return; for (nqsets = i = 0; i < (sc)->params.nports; i++) nqsets += sc->port[i].nqsets; #ifdef notyet /* * * XXX */ for (i = 0; i < nqsets; ++i) { struct sge_qset *qs = &sc->sge.qs[i]; taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } #endif } /** * t3_free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter * @q: the Tx queue to reclaim descriptors from * @reclaimable: the number of descriptors to reclaim * @m_vec_size: maximum number of buffers to reclaim * @desc_reclaimed: returns the number of descriptors reclaimed * * Reclaims Tx descriptors from an SGE Tx queue and frees the associated * Tx buffers. Called with the Tx queue lock held. * * Returns number of buffers of reclaimed */ void t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue) { struct tx_sw_desc *txsd; unsigned int cidx, mask; struct sge_txq *q = &qs->txq[queue]; #ifdef T3_TRACE T3_TRACE2(sc->tb[q->cntxt_id & 7], "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx); #endif cidx = q->cidx; mask = q->size - 1; txsd = &q->sdesc[cidx]; mtx_assert(&qs->lock, MA_OWNED); while (reclaimable--) { prefetch(q->sdesc[(cidx + 1) & mask].m); prefetch(q->sdesc[(cidx + 2) & mask].m); if (txsd->m != NULL) { if (txsd->flags & TX_SW_DESC_MAPPED) { bus_dmamap_unload(q->entry_tag, txsd->map); txsd->flags &= ~TX_SW_DESC_MAPPED; } m_freem_list(txsd->m); txsd->m = NULL; } else q->txq_skipped++; ++txsd; if (++cidx == q->size) { cidx = 0; txsd = q->sdesc; } } q->cidx = cidx; } /** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue * * Returns true if a response descriptor contains a yet unprocessed * response. */ static __inline int is_new_response(const struct rsp_desc *r, const struct sge_rspq *q) { return (r->intr_gen & F_RSPD_GEN2) == q->gen; } #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \ V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR)) /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */ #define NOMEM_INTR_DELAY 2500 #ifdef TCP_OFFLOAD /** * write_ofld_wr - write an offload work request * @adap: the adapter * @m: the packet to send * @q: the Tx queue * @pidx: index of the first Tx descriptor to write * @gen: the generation value to use * @ndesc: number of descriptors the packet will occupy * * Write an offload work request to send the supplied packet. The packet * data already carry the work request with most fields populated. */ static void write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q, unsigned int pidx, unsigned int gen, unsigned int ndesc) { unsigned int sgl_flits, flits; int i, idx, nsegs, wrlen; struct work_request_hdr *from; struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1]; struct tx_desc *d = &q->desc[pidx]; struct txq_state txqs; struct sglist_seg *segs; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sglist *sgl; from = (void *)(oh + 1); /* Start of WR within mbuf */ wrlen = m->m_len - sizeof(*oh); if (!(oh->flags & F_HDR_SGL)) { write_imm(d, (caddr_t)from, wrlen, gen); /* * mbuf with "real" immediate tx data will be enqueue_wr'd by * t3_push_frames and freed in wr_ack. Others, like those sent * down by close_conn, t3_send_reset, etc. should be freed here. */ if (!(oh->flags & F_HDR_DF)) m_free(m); return; } memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from)); sgl = oh->sgl; flits = wrlen / 8; sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl; nsegs = sgl->sg_nseg; segs = sgl->sg_segs; for (idx = 0, i = 0; i < nsegs; i++) { KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__)); if (i && idx == 0) ++sgp; sgp->len[idx] = htobe32(segs[i].ss_len); sgp->addr[idx] = htobe64(segs[i].ss_paddr); idx ^= 1; } if (idx) { sgp->len[idx] = 0; sgp->addr[idx] = 0; } sgl_flits = sgl_len(nsegs); txqs.gen = gen; txqs.pidx = pidx; txqs.compl = 0; write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits, from->wrh_hi, from->wrh_lo); } /** * ofld_xmit - send a packet through an offload queue * @adap: the adapter * @q: the Tx offload queue * @m: the packet * * Send an offload packet through an SGE offload queue. */ static int ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m) { int ret; unsigned int ndesc; unsigned int pidx, gen; struct sge_txq *q = &qs->txq[TXQ_OFLD]; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); ndesc = G_HDR_NDESC(oh->flags); TXQ_LOCK(qs); again: reclaim_completed_tx(qs, 16, TXQ_OFLD); ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD); if (__predict_false(ret)) { if (ret == 1) { TXQ_UNLOCK(qs); return (EINTR); } goto again; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } write_ofld_wr(adap, m, q, pidx, gen, ndesc); check_ring_tx_db(adap, q, 1); TXQ_UNLOCK(qs); return (0); } /** * restart_offloadq - restart a suspended offload queue * @qs: the queue set cotaining the offload queue * * Resumes transmission on a suspended Tx offload queue. */ static void restart_offloadq(void *data, int npending) { struct mbuf *m; struct sge_qset *qs = data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; adapter_t *adap = qs->port->adapter; int cleaned; TXQ_LOCK(qs); again: cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD); while ((m = mbufq_first(&q->sendq)) != NULL) { unsigned int gen, pidx; struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); unsigned int ndesc = G_HDR_NDESC(oh->flags); if (__predict_false(q->size - q->in_use < ndesc)) { setbit(&qs->txq_stopped, TXQ_OFLD); if (should_restart_tx(q) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) goto again; q->stops++; break; } gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; } (void)mbufq_dequeue(&q->sendq); TXQ_UNLOCK(qs); write_ofld_wr(adap, m, q, pidx, gen, ndesc); TXQ_LOCK(qs); } #if USE_GTS set_bit(TXQ_RUNNING, &q->flags); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif TXQ_UNLOCK(qs); wmb(); t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** * t3_offload_tx - send an offload packet * @m: the packet * * Sends an offload packet. We use the packet priority to select the * appropriate Tx queue as follows: bit 0 indicates whether the packet * should be sent as regular or control, bits 1-3 select the queue set. */ int t3_offload_tx(struct adapter *sc, struct mbuf *m) { struct ofld_hdr *oh = mtod(m, struct ofld_hdr *); struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)]; if (oh->flags & F_HDR_CTRL) { m_adj(m, sizeof (*oh)); /* trim ofld_hdr off */ return (ctrl_xmit(sc, qs, m)); } else return (ofld_xmit(sc, qs, m)); } #endif static void restart_tx(struct sge_qset *qs) { struct adapter *sc = qs->port->adapter; if (isset(&qs->txq_stopped, TXQ_OFLD) && should_restart_tx(&qs->txq[TXQ_OFLD]) && test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) { qs->txq[TXQ_OFLD].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task); } if (isset(&qs->txq_stopped, TXQ_CTRL) && should_restart_tx(&qs->txq[TXQ_CTRL]) && test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) { qs->txq[TXQ_CTRL].restarts++; taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task); } } /** * t3_sge_alloc_qset - initialize an SGE queue set * @sc: the controller softc * @id: the queue set id * @nports: how many Ethernet ports will be using this queue set * @irq_vec_idx: the IRQ vector index for response queue interrupts * @p: configuration parameters for this queue set * @ntxq: number of Tx queues for the queue set * @pi: port info for queue set * * Allocate resources and initialize an SGE queue set. A queue set * comprises a response queue, two Rx free-buffer queues, and up to 3 * Tx queues. The Tx queues are assigned roles in the order Ethernet * queue, offload queue, and control queue. */ int t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct port_info *pi) { struct sge_qset *q = &sc->sge.qs[id]; int i, ret = 0; MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF); q->port = pi; q->adap = sc; if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size, M_DEVBUF, M_WAITOK, &q->lock)) == NULL) { device_printf(sc->dev, "failed to allocate mbuf ring\n"); goto err; } if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) { device_printf(sc->dev, "failed to allocate ifq\n"); goto err; } ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp); callout_init(&q->txq[TXQ_ETH].txq_timer, 1); callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1); q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus; q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus; init_qset_cntxt(q, id); q->idx = id; if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[0].phys_addr, &q->fl[0].desc, &q->fl[0].sdesc, &q->fl[0].desc_tag, &q->fl[0].desc_map, sc->rx_dmat, &q->fl[0].entry_tag)) != 0) { printf("error %d from alloc ring fl0\n", ret); goto err; } if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc), sizeof(struct rx_sw_desc), &q->fl[1].phys_addr, &q->fl[1].desc, &q->fl[1].sdesc, &q->fl[1].desc_tag, &q->fl[1].desc_map, sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) { printf("error %d from alloc ring fl1\n", ret); goto err; } if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0, &q->rspq.phys_addr, &q->rspq.desc, NULL, &q->rspq.desc_tag, &q->rspq.desc_map, NULL, NULL)) != 0) { printf("error %d from alloc ring rspq\n", ret); goto err; } snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d", device_get_unit(sc->dev), irq_vec_idx); MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF); for (i = 0; i < ntxq; ++i) { size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc); if ((ret = alloc_ring(sc, p->txq_size[i], sizeof(struct tx_desc), sz, &q->txq[i].phys_addr, &q->txq[i].desc, &q->txq[i].sdesc, &q->txq[i].desc_tag, &q->txq[i].desc_map, sc->tx_dmat, &q->txq[i].entry_tag)) != 0) { printf("error %d from alloc ring tx %i\n", ret, i); goto err; } mbufq_init(&q->txq[i].sendq, INT_MAX); q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; } #ifdef TCP_OFFLOAD TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q); #endif TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q); TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q); TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q); q->fl[0].gen = q->fl[1].gen = 1; q->fl[0].size = p->fl_size; q->fl[1].size = p->jumbo_size; q->rspq.gen = 1; q->rspq.cidx = 0; q->rspq.size = p->rspq_size; q->txq[TXQ_ETH].stop_thres = nports * flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3); q->fl[0].buf_size = MCLBYTES; q->fl[0].zone = zone_pack; q->fl[0].type = EXT_PACKET; if (p->jumbo_buf_size == MJUM16BYTES) { q->fl[1].zone = zone_jumbo16; q->fl[1].type = EXT_JUMBO16; } else if (p->jumbo_buf_size == MJUM9BYTES) { q->fl[1].zone = zone_jumbo9; q->fl[1].type = EXT_JUMBO9; } else if (p->jumbo_buf_size == MJUMPAGESIZE) { q->fl[1].zone = zone_jumbop; q->fl[1].type = EXT_JUMBOP; } else { KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size)); ret = EDOOFUS; goto err; } q->fl[1].buf_size = p->jumbo_buf_size; /* Allocate and setup the lro_ctrl structure */ q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO); #if defined(INET6) || defined(INET) ret = tcp_lro_init(&q->lro.ctrl); if (ret) { printf("error %d from tcp_lro_init\n", ret); goto err; } #endif q->lro.ctrl.ifp = pi->ifp; mtx_lock_spin(&sc->sge.reg_lock); ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx, q->rspq.phys_addr, q->rspq.size, q->fl[0].buf_size, 1, 0); if (ret) { printf("error %d from t3_sge_init_rspcntxt\n", ret); goto err_unlock; } for (i = 0; i < SGE_RXQ_PER_SET; ++i) { ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0, q->fl[i].phys_addr, q->fl[i].size, q->fl[i].buf_size, p->cong_thres, 1, 0); if (ret) { printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i); goto err_unlock; } } ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS, SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr, q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } if (ntxq > 1) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id, USE_GTS, SGE_CNTXT_OFLD, id, q->txq[TXQ_OFLD].phys_addr, q->txq[TXQ_OFLD].size, 0, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } if (ntxq > 2) { ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0, SGE_CNTXT_CTRL, id, q->txq[TXQ_CTRL].phys_addr, q->txq[TXQ_CTRL].size, q->txq[TXQ_CTRL].token, 1, 0); if (ret) { printf("error %d from t3_sge_init_ecntxt\n", ret); goto err_unlock; } } mtx_unlock_spin(&sc->sge.reg_lock); t3_update_qset_coalesce(q, p); refill_fl(sc, &q->fl[0], q->fl[0].size); refill_fl(sc, &q->fl[1], q->fl[1].size); refill_rspq(sc, &q->rspq, q->rspq.size - 1); t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | V_NEWTIMER(q->rspq.holdoff_tmr)); return (0); err_unlock: mtx_unlock_spin(&sc->sge.reg_lock); err: TXQ_LOCK(q); t3_free_qset(sc, q); return (ret); } /* * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with * ethernet data. Hardware assistance with various checksums and any vlan tag * will also be taken into account here. */ void t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad) { struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad); struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]]; struct ifnet *ifp = pi->ifp; if (cpl->vlan_valid) { m->m_pkthdr.ether_vtag = ntohs(cpl->vlan); m->m_flags |= M_VLANTAG; } m->m_pkthdr.rcvif = ifp; /* * adjust after conversion to mbuf chain */ m->m_pkthdr.len -= (sizeof(*cpl) + ethpad); m->m_len -= (sizeof(*cpl) + ethpad); m->m_data += (sizeof(*cpl) + ethpad); if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) { struct ether_header *eh = mtod(m, void *); uint16_t eh_type; if (eh->ether_type == htons(ETHERTYPE_VLAN)) { struct ether_vlan_header *evh = mtod(m, void *); eh_type = evh->evl_proto; } else eh_type = eh->ether_type; if (ifp->if_capenable & IFCAP_RXCSUM && eh_type == htons(ETHERTYPE_IP)) { m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 && eh_type == htons(ETHERTYPE_IPV6)) { m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; } } } /** * get_packet - return the next ingress packet buffer from a free list * @adap: the adapter that received the packet * @drop_thres: # of remaining buffers before we start dropping packets * @qs: the qset that the SGE free list holding the packet belongs to * @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain * @r: response descriptor * * Get the next packet from a free list and complete setup of the * sk_buff. If the packet is small we make a copy and recycle the * original buffer, otherwise we use the original buffer itself. If a * positive drop threshold is supplied packets are dropped and their * buffers recycled if (a) the number of remaining buffers is under the * threshold and the packet is too big to copy, or (b) the packet should * be copied but there is no memory for the copy. */ static int get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs, struct t3_mbuf_hdr *mh, struct rsp_desc *r) { unsigned int len_cq = ntohl(r->len_cq); struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; int mask, cidx = fl->cidx; struct rx_sw_desc *sd = &fl->sdesc[cidx]; uint32_t len = G_RSPD_LEN(len_cq); uint32_t flags = M_EXT; uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags)); caddr_t cl; struct mbuf *m; int ret = 0; mask = fl->size - 1; prefetch(fl->sdesc[(cidx + 1) & mask].m); prefetch(fl->sdesc[(cidx + 2) & mask].m); prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl); prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl); fl->credits--; bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD); if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) { if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) goto skip_recycle; cl = mtod(m, void *); memcpy(cl, sd->rxsd_cl, len); recycle_rx_buf(adap, fl, fl->cidx); m->m_pkthdr.len = m->m_len = len; m->m_flags = 0; mh->mh_head = mh->mh_tail = m; ret = 1; goto done; } else { skip_recycle: bus_dmamap_unload(fl->entry_tag, sd->map); cl = sd->rxsd_cl; m = sd->m; if ((sopeop == RSPQ_SOP_EOP) || (sopeop == RSPQ_SOP)) flags |= M_PKTHDR; m_init(m, M_NOWAIT, MT_DATA, flags); if (fl->zone == zone_pack) { /* * restore clobbered data pointer */ m->m_data = m->m_ext.ext_buf; } else { m_cljset(m, cl, fl->type); } m->m_len = len; } switch(sopeop) { case RSPQ_SOP_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_SOP: mh->mh_head = mh->mh_tail = m; m->m_pkthdr.len = len; break; case RSPQ_EOP: ret = 1; /* FALLTHROUGH */ case RSPQ_NSOP_NEOP: if (mh->mh_tail == NULL) { log(LOG_ERR, "discarding intermediate descriptor entry\n"); m_freem(m); break; } mh->mh_tail->m_next = m; mh->mh_tail = m; mh->mh_head->m_pkthdr.len += len; break; } if (cxgb_debug) printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len); done: if (++fl->cidx == fl->size) fl->cidx = 0; return (ret); } /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response * @flags: the response control flags * * Handles the control information of an SGE response, such as GTS * indications and completion credits for the queue set's Tx queues. * HW coalesces credits, we don't do any extra SW coalescing. */ static __inline void handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags) { unsigned int credits; #if USE_GTS if (flags & F_RSPD_TXQ0_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags); #endif credits = G_RSPD_TXQ0_CR(flags); if (credits) qs->txq[TXQ_ETH].processed += credits; credits = G_RSPD_TXQ2_CR(flags); if (credits) qs->txq[TXQ_CTRL].processed += credits; # if USE_GTS if (flags & F_RSPD_TXQ1_GTS) clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags); # endif credits = G_RSPD_TXQ1_CR(flags); if (credits) qs->txq[TXQ_OFLD].processed += credits; } static void check_ring_db(adapter_t *adap, struct sge_qset *qs, unsigned int sleeping) { ; } /** * process_responses - process responses from an SGE response queue * @adap: the adapter * @qs: the queue set to which the response queue belongs * @budget: how many responses can be processed in this round * * Process responses from an SGE response queue up to the supplied budget. * Responses include received packets as well as credits and other events * for the queues that belong to the response queue's queue set. * A negative budget is effectively unlimited. * * Additionally choose the interrupt holdoff time for the next interrupt * on this queue. If the system is under memory shortage use a fairly * long delay to help recovery. */ static int process_responses(adapter_t *adap, struct sge_qset *qs, int budget) { struct sge_rspq *rspq = &qs->rspq; struct rsp_desc *r = &rspq->desc[rspq->cidx]; int budget_left = budget; unsigned int sleeping = 0; #if defined(INET6) || defined(INET) int lro_enabled = qs->lro.enabled; int skip_lro; struct lro_ctrl *lro_ctrl = &qs->lro.ctrl; #endif struct t3_mbuf_hdr *mh = &rspq->rspq_mh; #ifdef DEBUG static int last_holdoff = 0; if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) { printf("next_holdoff=%d\n", rspq->holdoff_tmr); last_holdoff = rspq->holdoff_tmr; } #endif rspq->next_holdoff = rspq->holdoff_tmr; while (__predict_true(budget_left && is_new_response(r, rspq))) { int eth, eop = 0, ethpad = 0; uint32_t flags = ntohl(r->flags); uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val); uint8_t opcode = r->rss_hdr.opcode; eth = (opcode == CPL_RX_PKT); if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) { struct mbuf *m; if (cxgb_debug) printf("async notification\n"); if (mh->mh_head == NULL) { mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA); m = mh->mh_head; } else { m = m_gethdr(M_NOWAIT, MT_DATA); } if (m == NULL) goto no_mem; memcpy(mtod(m, char *), r, AN_PKT_SIZE); m->m_len = m->m_pkthdr.len = AN_PKT_SIZE; *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF; opcode = CPL_ASYNC_NOTIF; eop = 1; rspq->async_notif++; goto skip; } else if (flags & F_RSPD_IMM_DATA_VALID) { struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { no_mem: rspq->next_holdoff = NOMEM_INTR_DELAY; budget_left--; break; } if (mh->mh_head == NULL) mh->mh_head = m; else mh->mh_tail->m_next = m; mh->mh_tail = m; get_imm_packet(adap, r, m); mh->mh_head->m_pkthdr.len += m->m_len; eop = 1; rspq->imm_data++; } else if (r->len_cq) { int drop_thresh = eth ? SGE_RX_DROP_THRES : 0; eop = get_packet(adap, drop_thresh, qs, mh, r); if (eop) { if (r->rss_hdr.hash_type && !adap->timestamp) { M_HASHTYPE_SET(mh->mh_head, M_HASHTYPE_OPAQUE_HASH); mh->mh_head->m_pkthdr.flowid = rss_hash; } } ethpad = 2; } else { rspq->pure_rsps++; } skip: if (flags & RSPD_CTRL_MASK) { sleeping |= flags & RSPD_GTS_MASK; handle_rsp_cntrl_info(qs, flags); } if (!eth && eop) { rspq->offload_pkts++; #ifdef TCP_OFFLOAD adap->cpl_handler[opcode](qs, r, mh->mh_head); #else m_freem(mh->mh_head); #endif mh->mh_head = NULL; } else if (eth && eop) { struct mbuf *m = mh->mh_head; t3_rx_eth(adap, m, ethpad); /* * The T304 sends incoming packets on any qset. If LRO * is also enabled, we could end up sending packet up * lro_ctrl->ifp's input. That is incorrect. * * The mbuf's rcvif was derived from the cpl header and * is accurate. Skip LRO and just use that. */ #if defined(INET6) || defined(INET) skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif); if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro && (tcp_lro_rx(lro_ctrl, m, 0) == 0) ) { /* successfully queue'd for LRO */ } else #endif { /* * LRO not enabled, packet unsuitable for LRO, * or unable to queue. Pass it up right now in * either case. */ struct ifnet *ifp = m->m_pkthdr.rcvif; (*ifp->if_input)(ifp, m); } mh->mh_head = NULL; } r++; if (__predict_false(++rspq->cidx == rspq->size)) { rspq->cidx = 0; rspq->gen ^= 1; r = rspq->desc; } if (++rspq->credits >= 64) { refill_rspq(adap, rspq, rspq->credits); rspq->credits = 0; } __refill_fl_lt(adap, &qs->fl[0], 32); __refill_fl_lt(adap, &qs->fl[1], 32); --budget_left; } #if defined(INET6) || defined(INET) /* Flush LRO */ tcp_lro_flush_all(lro_ctrl); #endif if (sleeping) check_ring_db(adap, qs, sleeping); mb(); /* commit Tx queue processed updates */ if (__predict_false(qs->txq_stopped > 1)) restart_tx(qs); __refill_fl_lt(adap, &qs->fl[0], 512); __refill_fl_lt(adap, &qs->fl[1], 512); budget -= budget_left; return (budget); } /* * A helper function that processes responses and issues GTS. */ static __inline int process_responses_gts(adapter_t *adap, struct sge_rspq *rq) { int work; static int last_holdoff = 0; work = process_responses(adap, rspq_to_qset(rq), -1); if (cxgb_debug && (rq->next_holdoff != last_holdoff)) { printf("next_holdoff=%d\n", rq->next_holdoff); last_holdoff = rq->next_holdoff; } t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) | V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx)); return (work); } /* * Interrupt handler for legacy INTx interrupts for T3B-based cards. * Handles data events from SGE response queues as well as error and other * async events as they all use the same interrupt pin. We use one SGE * response queue per port in this mode and protect all response queues with * queue 0's lock. */ void t3b_intr(void *data) { uint32_t i, map; adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); if (!map) return; if (__predict_false(map & F_ERRINTR)) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } mtx_lock(&q0->lock); for_each_port(adap, i) if (map & (1 << i)) process_responses_gts(adap, &adap->sge.qs[i].rspq); mtx_unlock(&q0->lock); } /* * The MSI interrupt handler. This needs to handle data events from SGE * response queues as well as error and other async events as they all use * the same MSI vector. We use one SGE response queue per port in this mode * and protect all response queues with queue 0's lock. */ void t3_intr_msi(void *data) { adapter_t *adap = data; struct sge_rspq *q0 = &adap->sge.qs[0].rspq; int i, new_packets = 0; mtx_lock(&q0->lock); for_each_port(adap, i) if (process_responses_gts(adap, &adap->sge.qs[i].rspq)) new_packets = 1; mtx_unlock(&q0->lock); if (new_packets == 0) { t3_write_reg(adap, A_PL_INT_ENABLE0, 0); (void) t3_read_reg(adap, A_PL_INT_ENABLE0); taskqueue_enqueue(adap->tq, &adap->slow_intr_task); } } void t3_intr_msix(void *data) { struct sge_qset *qs = data; adapter_t *adap = qs->port->adapter; struct sge_rspq *rspq = &qs->rspq; if (process_responses_gts(adap, rspq) == 0) rspq->unhandled_irqs++; } #define QDUMP_SBUF_SIZE 32 * 400 static int t3_dump_rspq(SYSCTL_HANDLER_ARGS) { struct sge_rspq *rspq; struct sge_qset *qs; int i, err, dump_end, idx; struct sbuf *sb; struct rsp_desc *rspd; uint32_t data[4]; rspq = arg1; qs = rspq_to_qset(rspq); if (rspq->rspq_dump_count == 0) return (0); if (rspq->rspq_dump_count > RSPQ_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", rspq->rspq_dump_count); rspq->rspq_dump_count = 0; return (EINVAL); } if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", rspq->rspq_dump_start); rspq->rspq_dump_start = 0; return (EINVAL); } err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n", (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f), ((data[2] >> 26) & 1), ((data[2] >> 27) & 1)); sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n", ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]); sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start, (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1)); dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count; for (i = rspq->rspq_dump_start; i < dump_end; i++) { idx = i & (RSPQ_Q_SIZE-1); rspd = &rspq->desc[idx]; sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n", idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx, rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx)); sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n", rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags), be32toh(rspd->len_cq), rspd->intr_gen); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_eth(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; uint32_t data[4]; txq = arg1; qs = txq_to_qset(txq, TXQ_ETH); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > TX_ETH_Q_SIZE) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data); if (err) return (err); err = sysctl_wire_old_buffer(req, 0); if (err) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n", (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16), (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1)); sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n", ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1), ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1)); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1)); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS) { struct sge_txq *txq; struct sge_qset *qs; int i, j, err, dump_end; struct sbuf *sb; struct tx_desc *txd; uint32_t *WR, wr_hi, wr_lo, gen; txq = arg1; qs = txq_to_qset(txq, TXQ_CTRL); if (txq->txq_dump_count == 0) { return (0); } if (txq->txq_dump_count > 256) { log(LOG_WARNING, "dump count is too large %d\n", txq->txq_dump_count); txq->txq_dump_count = 1; return (EINVAL); } if (txq->txq_dump_start > 255) { log(LOG_WARNING, "dump start of %d is greater than queue size\n", txq->txq_dump_start); txq->txq_dump_start = 0; return (EINVAL); } err = sysctl_wire_old_buffer(req, 0); if (err != 0) return (err); sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req); sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx, txq->txq_dump_start, (txq->txq_dump_start + txq->txq_dump_count) & 255); dump_end = txq->txq_dump_start + txq->txq_dump_count; for (i = txq->txq_dump_start; i < dump_end; i++) { txd = &txq->desc[i & (255)]; WR = (uint32_t *)txd->flit; wr_hi = ntohl(WR[0]); wr_lo = ntohl(WR[1]); gen = G_WR_GEN(wr_lo); sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n", wr_hi, wr_lo, gen); for (j = 2; j < 30; j += 4) sbuf_printf(sb, "\t%08x %08x %08x %08x \n", WR[j], WR[j + 1], WR[j + 2], WR[j + 3]); } err = sbuf_finish(sb); sbuf_delete(sb); return (err); } static int t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; struct qset_params *qsp = &sc->params.sge.qset[0]; int coalesce_usecs; struct sge_qset *qs; int i, j, err, nqsets = 0; struct mtx *lock; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); coalesce_usecs = qsp->coalesce_usecs; err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req); if (err != 0) { return (err); } if (coalesce_usecs == qsp->coalesce_usecs) return (0); for (i = 0; i < sc->params.nports; i++) for (j = 0; j < sc->port[i].nqsets; j++) nqsets++; coalesce_usecs = max(1, coalesce_usecs); for (i = 0; i < nqsets; i++) { qs = &sc->sge.qs[i]; qsp = &sc->params.sge.qset[i]; qsp->coalesce_usecs = coalesce_usecs; lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock : &sc->sge.qs[0].rspq.lock; mtx_lock(lock); t3_update_qset_coalesce(qs, qsp); t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | V_NEWTIMER(qs->rspq.holdoff_tmr)); mtx_unlock(lock); } return (0); } static int t3_pkt_timestamp(SYSCTL_HANDLER_ARGS) { adapter_t *sc = arg1; int rc, timestamp; if ((sc->flags & FULL_INIT_DONE) == 0) return (ENXIO); timestamp = sc->timestamp; rc = sysctl_handle_int(oidp, ×tamp, arg2, req); if (rc != 0) return (rc); if (timestamp != sc->timestamp) { t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS, timestamp ? F_ENABLERXPKTTMSTPRSS : 0); sc->timestamp = timestamp; } return (0); } void t3_add_attach_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); /* random information */ SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, &sc->params.rev, 0, "chip model"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "port_types", CTLFLAG_RD, sc->port_types, 0, "type of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "enable_debug", CTLFLAG_RW, &cxgb_debug, 0, "enable verbose debugging output"); SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce", CTLFLAG_RD, &sc->tunq_coalesce, "#tunneled packets freed"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "txq_overrun", CTLFLAG_RD, &txq_fills, 0, "#times txq overrun"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, &sc->params.vpd.cclk, 0, "core clock frequency (in KHz)"); } static const char *rspq_name = "rspq"; static const char *txq_names[] = { "txq_eth", "txq_ofld", "txq_ctrl" }; static int sysctl_handle_macstat(SYSCTL_HANDLER_ARGS) { struct port_info *p = arg1; uint64_t *parg; if (!p) return (EINVAL); cxgb_refresh_stats(p); parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2); return (sysctl_handle_64(oidp, parg, 0, req)); } void t3_add_configured_sysctls(adapter_t *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; int i, j; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "intr_coal", CTLTYPE_INT|CTLFLAG_RW, sc, 0, t3_set_coalesce_usecs, "I", "interrupt coalescing timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pkt_timestamp", CTLTYPE_INT | CTLFLAG_RW, sc, 0, t3_pkt_timestamp, "I", "provide packet timestamp instead of connection hash"); for (i = 0; i < sc->params.nports; i++) { struct port_info *pi = &sc->port[i]; struct sysctl_oid *poid; struct sysctl_oid_list *poidlist; struct mac_stats *mstats = &pi->mac.stats; snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i); poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, pi->namebuf, CTLFLAG_RD, NULL, "port statistics"); poidlist = SYSCTL_CHILDREN(poid); SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO, "nqsets", CTLFLAG_RD, &pi->nqsets, 0, "#queue sets"); for (j = 0; j < pi->nqsets; j++) { struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j]; struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid; struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist; struct sge_txq *txq = &qs->txq[TXQ_ETH]; snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j); qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, qs->namebuf, CTLFLAG_RD, NULL, "qset statistics"); qspoidlist = SYSCTL_CHILDREN(qspoid); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty", CTLFLAG_RD, &qs->fl[0].empty, 0, "freelist #0 empty"); SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty", CTLFLAG_RD, &qs->fl[1].empty, 0, "freelist #1 empty"); rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, rspq_name, CTLFLAG_RD, NULL, "rspq statistics"); rspqpoidlist = SYSCTL_CHILDREN(rspqpoid); txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[0], CTLFLAG_RD, NULL, "txq statistics"); txqpoidlist = SYSCTL_CHILDREN(txqpoid); ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics"); ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid); lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO, "lro_stats", CTLFLAG_RD, NULL, "LRO statistics"); lropoidlist = SYSCTL_CHILDREN(lropoid); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size", CTLFLAG_RD, &qs->rspq.size, 0, "#entries in response queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx", CTLFLAG_RD, &qs->rspq.cidx, 0, "consumer index"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits", CTLFLAG_RD, &qs->rspq.credits, 0, "#credits"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved", CTLFLAG_RD, &qs->rspq.starved, 0, "#times starved"); SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &qs->rspq.phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->rspq.rspq_dump_start, 0, "start rspq dump entry"); SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->rspq.rspq_dump_count, 0, "#rspq entries to dump"); SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq, 0, t3_dump_rspq, "A", "dump of the response queue"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped", CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops, "#tunneled packets dropped"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen", CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len, 0, "#tunneled packets waiting to be sent"); #if 0 SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod, 0, "#tunneled packets queue producer index"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx", CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons, 0, "#tunneled packets queue consumer index"); #endif SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed", CTLFLAG_RD, &qs->txq[TXQ_ETH].processed, 0, "#tunneled packets processed by the card"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned", CTLFLAG_RD, &txq->cleaned, 0, "#tunneled packets cleaned"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use", CTLFLAG_RD, &txq->in_use, 0, "#tunneled packet slots in use"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees", CTLFLAG_RD, &txq->txq_frees, "#tunneled packets freed"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped", CTLFLAG_RD, &txq->txq_skipped, 0, "#tunneled packet descriptors skipped"); SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced", CTLFLAG_RD, &txq->txq_coalesced, "#tunneled packets coalesced"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued", CTLFLAG_RD, &txq->txq_enqueued, 0, "#tunneled packets enqueued to hardware"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags", CTLFLAG_RD, &qs->txq_stopped, 0, "tx queues stopped"); SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr", CTLFLAG_RD, &txq->phys_addr, "physical_address_of the queue"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen", CTLFLAG_RW, &qs->txq[TXQ_ETH].gen, 0, "txq generation"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx", CTLFLAG_RD, &txq->cidx, 0, "hardware queue cidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx", CTLFLAG_RD, &txq->pidx, 0, "hardware queue pidx"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start, 0, "txq start idx for dump"); SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count, 0, "txq #entries to dump"); SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A", "dump of the transmit queue"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start, 0, "ctrlq start idx for dump"); SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count", CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count, 0, "ctrl #entries to dump"); SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump", CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A", "dump of the transmit queue"); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued", CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed", CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL); SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum", CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL); SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt", CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL); } /* Now add a node for mac stats. */ poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC statistics"); poidlist = SYSCTL_CHILDREN(poid); /* * We (ab)use the length argument (arg2) to pass on the offset * of the data that we are interested in. This is only required * for the quad counters that are updated from the hardware (we * make sure that we return the latest value). * sysctl_handle_macstat first updates *all* the counters from * the hardware, and then returns the latest value of the * requested counter. Best would be to update only the * requested counter from hardware, but t3_mac_update_stats() * hides all the register details and we don't want to dive into * all that here. */ #define CXGB_SYSCTL_ADD_QUAD(a) SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \ (CTLTYPE_U64 | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \ sysctl_handle_macstat, "QU", 0) CXGB_SYSCTL_ADD_QUAD(tx_octets); CXGB_SYSCTL_ADD_QUAD(tx_octets_bad); CXGB_SYSCTL_ADD_QUAD(tx_frames); CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(tx_pause); CXGB_SYSCTL_ADD_QUAD(tx_deferred); CXGB_SYSCTL_ADD_QUAD(tx_late_collisions); CXGB_SYSCTL_ADD_QUAD(tx_total_collisions); CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions); CXGB_SYSCTL_ADD_QUAD(tx_underrun); CXGB_SYSCTL_ADD_QUAD(tx_len_errs); CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral); CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(tx_frames_64); CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max); CXGB_SYSCTL_ADD_QUAD(rx_octets); CXGB_SYSCTL_ADD_QUAD(rx_octets_bad); CXGB_SYSCTL_ADD_QUAD(rx_frames); CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames); CXGB_SYSCTL_ADD_QUAD(rx_pause); CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs); CXGB_SYSCTL_ADD_QUAD(rx_align_errs); CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs); CXGB_SYSCTL_ADD_QUAD(rx_data_errs); CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs); CXGB_SYSCTL_ADD_QUAD(rx_runt); CXGB_SYSCTL_ADD_QUAD(rx_jabber); CXGB_SYSCTL_ADD_QUAD(rx_short); CXGB_SYSCTL_ADD_QUAD(rx_too_long); CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs); CXGB_SYSCTL_ADD_QUAD(rx_cong_drops); CXGB_SYSCTL_ADD_QUAD(rx_frames_64); CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127); CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255); CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511); CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023); CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518); CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max); #undef CXGB_SYSCTL_ADD_QUAD #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \ CTLFLAG_RD, &mstats->a, 0) CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err); CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun); CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl); CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err); CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change); CXGB_SYSCTL_ADD_ULONG(num_toggled); CXGB_SYSCTL_ADD_ULONG(num_resets); CXGB_SYSCTL_ADD_ULONG(link_faults); #undef CXGB_SYSCTL_ADD_ULONG } } /** * t3_get_desc - dump an SGE descriptor for debugging purposes * @qs: the queue set * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx) * @idx: the descriptor index in the queue * @data: where to dump the descriptor contents * * Dumps the contents of a HW descriptor of an SGE queue. Returns the * size of the descriptor. */ int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data) { if (qnum >= 6) return (EINVAL); if (qnum < 3) { if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size) return -EINVAL; memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc)); return sizeof(struct tx_desc); } if (qnum == 3) { if (!qs->rspq.desc || idx >= qs->rspq.size) return (EINVAL); memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc)); return sizeof(struct rsp_desc); } qnum -= 4; if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size) return (EINVAL); memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc)); return sizeof(struct rx_desc); } Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c (revision 320528) @@ -1,300 +1,299 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCP_OFFLOAD #include #include #include #include #include #include static int iwch_mod_load(void); static int iwch_mod_unload(void); static int iwch_activate(struct adapter *); static int iwch_deactivate(struct adapter *); static struct uld_info iwch_uld_info = { .uld_id = ULD_IWARP, .activate = iwch_activate, .deactivate = iwch_deactivate, }; static void rnic_init(struct iwch_dev *rnicp) { idr_init(&rnicp->cqidr); idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); mtx_init(&rnicp->lock, "iwch rnic lock", NULL, MTX_DEF|MTX_DUPOK); rnicp->attr.vendor_id = 0x168; rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; rnicp->attr.max_rdma_read_resources = rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ rnicp->attr.max_rdma_read_depth = rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; rnicp->attr.rq_overflow_handled = 0; rnicp->attr.can_modify_ird = 0; rnicp->attr.can_modify_ord = 0; rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; rnicp->attr.stag0_value = 1; rnicp->attr.zbva_support = 1; rnicp->attr.local_invalidate_fence = 1; rnicp->attr.cq_overflow_detection = 1; return; } static void rnic_uninit(struct iwch_dev *rnicp) { idr_destroy(&rnicp->cqidr); idr_destroy(&rnicp->qpidr); idr_destroy(&rnicp->mmidr); mtx_destroy(&rnicp->lock); } static int iwch_activate(struct adapter *sc) { struct iwch_dev *rnicp; int rc; KASSERT(!isset(&sc->offload_map, MAX_NPORTS), ("%s: iWARP already activated on %s", __func__, device_get_nameunit(sc->dev))); rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); if (rnicp == NULL) return (ENOMEM); sc->iwarp_softc = rnicp; rnicp->rdev.adap = sc; cxio_hal_init(sc); iwch_cm_init_cpl(sc); rc = cxio_rdev_open(&rnicp->rdev); if (rc != 0) { printf("Unable to open CXIO rdev\n"); goto err1; } rnic_init(rnicp); rc = iwch_register_device(rnicp); if (rc != 0) { printf("Unable to register device\n"); goto err2; } return (0); err2: rnic_uninit(rnicp); cxio_rdev_close(&rnicp->rdev); err1: cxio_hal_uninit(sc); iwch_cm_term_cpl(sc); sc->iwarp_softc = NULL; return (rc); } static int iwch_deactivate(struct adapter *sc) { struct iwch_dev *rnicp; rnicp = sc->iwarp_softc; iwch_unregister_device(rnicp); rnic_uninit(rnicp); cxio_rdev_close(&rnicp->rdev); cxio_hal_uninit(sc); iwch_cm_term_cpl(sc); ib_dealloc_device(&rnicp->ibdev); sc->iwarp_softc = NULL; return (0); } static void iwch_activate_all(struct adapter *sc, void *arg __unused) { ADAPTER_LOCK(sc); if ((sc->open_device_map & sc->offload_map) != 0 && t3_activate_uld(sc, ULD_IWARP) == 0) setbit(&sc->offload_map, MAX_NPORTS); ADAPTER_UNLOCK(sc); } static void iwch_deactivate_all(struct adapter *sc, void *arg __unused) { ADAPTER_LOCK(sc); if (isset(&sc->offload_map, MAX_NPORTS) && t3_deactivate_uld(sc, ULD_IWARP) == 0) clrbit(&sc->offload_map, MAX_NPORTS); ADAPTER_UNLOCK(sc); } static int iwch_mod_load(void) { int rc; rc = iwch_cm_init(); if (rc != 0) return (rc); rc = t3_register_uld(&iwch_uld_info); if (rc != 0) { iwch_cm_term(); return (rc); } t3_iterate(iwch_activate_all, NULL); return (rc); } static int iwch_mod_unload(void) { t3_iterate(iwch_deactivate_all, NULL); iwch_cm_term(); if (t3_unregister_uld(&iwch_uld_info) == EBUSY) return (EBUSY); return (0); } #endif /* TCP_OFFLOAD */ static int iwch_modevent(module_t mod, int cmd, void *arg) { int rc = 0; #ifdef TCP_OFFLOAD switch (cmd) { case MOD_LOAD: rc = iwch_mod_load(); if(rc) printf("iw_cxgb: Chelsio T3 RDMA Driver failed to load\n"); else printf("iw_cxgb: Chelsio T3 RDMA Driver loaded\n"); break; case MOD_UNLOAD: rc = iwch_mod_unload(); if(rc) printf("iw_cxgb: Chelsio T3 RDMA Driver failed to unload\n"); else printf("iw_cxgb: Chelsio T3 RDMA Driver unloaded\n"); break; default: rc = EINVAL; } #else printf("iw_cxgb: compiled without TCP_OFFLOAD support.\n"); rc = EOPNOTSUPP; #endif return (rc); } static moduledata_t iwch_mod_data = { "iw_cxgb", iwch_modevent, 0 }; MODULE_VERSION(iw_cxgb, 1); DECLARE_MODULE(iw_cxgb, iwch_mod_data, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(t3_tom, cxgbc, 1, 1, 1); MODULE_DEPEND(iw_cxgb, toecore, 1, 1, 1); MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1); MODULE_DEPEND(iw_cxgb, ibcore, 1, 1, 1); MODULE_DEPEND(iw_cxgb, linuxkpi, 1, 1, 1); Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c (revision 320528) @@ -1,1686 +1,1685 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTR static char *states[] = { "idle", "listen", "connecting", "mpa_wait_req", "mpa_req_sent", "mpa_req_rcvd", "mpa_rep_sent", "fpdu_mode", "aborting", "closing", "moribund", "dead", NULL, }; #endif SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); static int ep_timeout_secs = 60; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, "CM Endpoint operation timeout in seconds (default=60)"); static int mpa_rev = 1; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); static int markers_enabled = 0; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, "Enable MPA MARKERS (default(0)=disabled)"); static int crc_enabled = 1; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, "Enable MPA CRC (default(1)=enabled)"); static int rcv_win = 256 * 1024; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, "TCP receive window in bytes (default=256KB)"); static int snd_win = 32 * 1024; SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, "TCP send window in bytes (default=32KB)"); static unsigned int nocong = 0; SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0, "Turn off congestion control (default=0)"); static unsigned int cong_flavor = 1; SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0, "TCP Congestion control flavor (default=1)"); static void ep_timeout(void *arg); static void connect_reply_upcall(struct iwch_ep *ep, int status); static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); /* * Cruft to offload socket upcalls onto thread. */ static struct mtx req_lock; static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; static struct task iw_cxgb_task; static struct taskqueue *iw_cxgb_taskq; static void process_req(void *ctx, int pending); static void start_ep_timer(struct iwch_ep *ep) { CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); if (callout_pending(&ep->timer)) { CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); callout_deactivate(&ep->timer); callout_drain(&ep->timer); } else { /* * XXX this looks racy */ get_ep(&ep->com); callout_init(&ep->timer, 1); } callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); } static void stop_ep_timer(struct iwch_ep *ep) { CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); if (!callout_pending(&ep->timer)) { CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n", __func__, ep, ep->com.state); return; } callout_drain(&ep->timer); put_ep(&ep->com); } static int set_tcpinfo(struct iwch_ep *ep) { struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; struct toepcb *toep; int rc = 0; INP_WLOCK(inp); tp = intotcpcb(inp); if ((tp->t_flags & TF_TOE) == 0) { rc = EINVAL; printf("%s: connection NOT OFFLOADED!\n", __func__); goto done; } toep = tp->t_toe; ep->hwtid = toep->tp_tid; ep->snd_seq = tp->snd_nxt; ep->rcv_seq = tp->rcv_nxt; ep->emss = tp->t_maxseg; if (ep->emss < 128) ep->emss = 128; done: INP_WUNLOCK(inp); return (rc); } static enum iwch_ep_state state_read(struct iwch_ep_common *epc) { enum iwch_ep_state state; mtx_lock(&epc->lock); state = epc->state; mtx_unlock(&epc->lock); return state; } static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) { epc->state = new; } static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) { mtx_lock(&epc->lock); CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); __state_set(epc, new); mtx_unlock(&epc->lock); return; } static void * alloc_ep(int size, int flags) { struct iwch_ep_common *epc; epc = malloc(size, M_DEVBUF, flags); if (epc) { memset(epc, 0, size); refcount_init(&epc->refcount, 1); mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); cv_init(&epc->waitq, "iwch_epc cv"); } CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); return epc; } void __free_ep(struct iwch_ep_common *epc) { CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); free(epc, M_DEVBUF); } static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) { struct in_addr addr; addr.s_addr = peer_ip; return (fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4)); } static void close_socket(struct iwch_ep_common *epc, int close) { CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); SOCK_LOCK(epc->so); soupcall_clear(epc->so, SO_RCV); SOCK_UNLOCK(epc->so); if (close) soclose(epc->so); else soshutdown(epc->so, SHUT_WR|SHUT_RD); epc->so = NULL; } static void shutdown_socket(struct iwch_ep_common *epc) { CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); soshutdown(epc->so, SHUT_WR); } static void abort_socket(struct iwch_ep *ep) { struct sockopt sopt; int err; struct linger l; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); l.l_onoff = 1; l.l_linger = 0; /* linger_time of 0 forces RST to be sent */ sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_LINGER; sopt.sopt_val = (caddr_t)&l; sopt.sopt_valsize = sizeof l; sopt.sopt_td = NULL; err = sosetopt(ep->com.so, &sopt); if (err) printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); } static void send_mpa_req(struct iwch_ep *ep) { int mpalen; struct mpa_message *mpa; struct mbuf *m; int err; CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); mpalen = sizeof(*mpa) + ep->plen; m = m_gethdr(mpalen, M_NOWAIT); if (m == NULL) { connect_reply_upcall(ep, -ENOMEM); return; } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); mpa->flags = (crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0); mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev; if (ep->plen) memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); if (err) { m_freem(m); connect_reply_upcall(ep, -ENOMEM); return; } start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); return; } static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) { int mpalen; struct mpa_message *mpa; struct mbuf *m; int err; CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); mpalen = sizeof(*mpa) + plen; m = m_gethdr(mpalen, M_NOWAIT); if (m == NULL) { printf("%s - cannot alloc mbuf!\n", __FUNCTION__); return (-ENOMEM); } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; mpa->revision = mpa_rev; mpa->private_data_size = htons(plen); if (plen) memcpy(mpa->private_data, pdata, plen); err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); PANIC_IF(err); return 0; } static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) { int mpalen; struct mpa_message *mpa; struct mbuf *m; CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); mpalen = sizeof(*mpa) + plen; m = m_gethdr(mpalen, M_NOWAIT); if (m == NULL) { printf("%s - cannot alloc mbuf!\n", __FUNCTION__); return (-ENOMEM); } mpa = mtod(m, struct mpa_message *); m->m_len = mpalen; m->m_pkthdr.len = mpalen; memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | (markers_enabled ? MPA_MARKERS : 0); mpa->revision = mpa_rev; mpa->private_data_size = htons(plen); if (plen) memcpy(mpa->private_data, pdata, plen); state_set(&ep->com, MPA_REP_SENT); return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); } static void close_complete_upcall(struct iwch_ep *ep) { struct iw_cm_event event; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; if (ep->com.cm_id) { CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; } } static void abort_connection(struct iwch_ep *ep) { CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); state_set(&ep->com, ABORTING); abort_socket(ep); close_socket(&ep->com, 0); close_complete_upcall(ep); state_set(&ep->com, DEAD); put_ep(&ep->com); } static void peer_close_upcall(struct iwch_ep *ep) { struct iw_cm_event event; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_DISCONNECT; if (ep->com.cm_id) { CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } } static void peer_abort_upcall(struct iwch_ep *ep) { struct iw_cm_event event; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = ECONNRESET; if (ep->com.cm_id) { CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; } } static void connect_reply_upcall(struct iwch_ep *ep, int status) { struct iw_cm_event event; CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = status; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; if ((status == 0) || (status == ECONNREFUSED)) { event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } if (ep->com.cm_id) { CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, ep->hwtid, status); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; ep->com.qp = NULL; } } static void connect_request_upcall(struct iwch_ep *ep) { struct iw_cm_event event; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); event.provider_data = ep; event.so = ep->com.so; if (state_read(&ep->parent_ep->com) != DEAD) { get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler( ep->parent_ep->com.cm_id, &event); } put_ep(&ep->parent_ep->com); } static void established_upcall(struct iwch_ep *ep) { struct iw_cm_event event; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; if (ep->com.cm_id) { CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } } static void process_mpa_reply(struct iwch_ep *ep) { struct mpa_message *mpa; u16 plen; struct iwch_qp_attributes attrs; enum iwch_qp_attr_mask mask; int err; struct mbuf *top, *m; int flags = MSG_DONTWAIT; struct uio uio; int len; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); /* * Stop mpa timer. If it expired, then the state has * changed and we bail since ep_timeout already aborted * the connection. */ stop_ep_timer(ep); if (state_read(&ep->com) != MPA_REQ_SENT) return; uio.uio_resid = len = 1000000; uio.uio_td = ep->com.thread; err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); if (err) { if (err == EWOULDBLOCK) { start_ep_timer(ep); return; } err = -err; goto err; } if (ep->com.so->so_rcv.sb_mb) { printf("%s data after soreceive called! so %p sb_mb %p top %p\n", __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); } m = top; do { /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { err = (-EINVAL); goto err; } /* * copy the new data into our accumulation buffer. */ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); ep->mpa_pkt_len += m->m_len; if (!m->m_next) m = m->m_nextpkt; else m = m->m_next; } while (m); m_freem(top); /* * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) return; mpa = (struct mpa_message *)ep->mpa_pkt; /* Validate MPA header. */ if (mpa->revision != mpa_rev) { CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); err = EPROTO; goto err; } if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); err = EPROTO; goto err; } plen = ntohs(mpa->private_data_size); /* * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); err = EPROTO; goto err; } /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); err = EPROTO; goto err; } ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) return; if (mpa->flags & MPA_REJECT) { err = ECONNREFUSED; goto err; } /* * If we get here we have accumulated the entire mpa * start reply message including private data. And * the MPA header is valid. */ CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); state_set(&ep->com, FPDU_MODE); ep->mpa_attr.initiator = 1; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa_rev; if (set_tcpinfo(ep)) { printf("%s set_tcpinfo error\n", __FUNCTION__); goto err; } CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " "xmit_marker_enabled=%d, version=%d", __FUNCTION__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = IWCH_QP_STATE_RTS; mask = IWCH_QP_ATTR_NEXT_STATE | IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; /* bind QP and TID with INIT_WR */ err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (!err) goto out; err: abort_connection(ep); out: connect_reply_upcall(ep, err); return; } static void process_mpa_request(struct iwch_ep *ep) { struct mpa_message *mpa; u16 plen; int flags = MSG_DONTWAIT; struct mbuf *top, *m; int err; struct uio uio; int len; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); /* * Stop mpa timer. If it expired, then the state has * changed and we bail since ep_timeout already aborted * the connection. */ stop_ep_timer(ep); if (state_read(&ep->com) != MPA_REQ_WAIT) return; uio.uio_resid = len = 1000000; uio.uio_td = ep->com.thread; err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); if (err) { if (err == EWOULDBLOCK) { start_ep_timer(ep); return; } err = -err; goto err; } m = top; do { /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, ep->mpa_pkt_len + m->m_len); goto err; } /* * Copy the new data into our accumulation buffer. */ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); ep->mpa_pkt_len += m->m_len; if (!m->m_next) m = m->m_nextpkt; else m = m->m_next; } while (m); m_freem(top); /* * If we don't even have the mpa message, then bail. * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < sizeof(*mpa)) { start_ep_timer(ep); CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, ep->mpa_pkt_len); return; } mpa = (struct mpa_message *) ep->mpa_pkt; /* * Validate MPA Header. */ if (mpa->revision != mpa_rev) { CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); goto err; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); goto err; } plen = ntohs(mpa->private_data_size); /* * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); goto err; } /* * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, ep->mpa_pkt_len); goto err; } ep->plen = (u8) plen; /* * If we don't have all the pdata yet, then bail. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { start_ep_timer(ep); CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, ep->mpa_pkt_len); return; } /* * If we get here we have accumulated the entire mpa * start reply message including private data. */ ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa_rev; if (set_tcpinfo(ep)) { printf("%s set_tcpinfo error\n", __FUNCTION__); goto err; } CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " "xmit_marker_enabled=%d, version=%d", __FUNCTION__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); state_set(&ep->com, MPA_REQ_RCVD); /* drive upcall */ connect_request_upcall(ep); return; err: abort_connection(ep); return; } static void process_peer_close(struct iwch_ep *ep) { struct iwch_qp_attributes attrs; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); mtx_lock(&ep->com.lock); switch (ep->com.state) { case MPA_REQ_WAIT: __state_set(&ep->com, CLOSING); break; case MPA_REQ_SENT: __state_set(&ep->com, CLOSING); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REQ_RCVD: /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. */ __state_set(&ep->com, CLOSING); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); break; case FPDU_MODE: start_ep_timer(ep); __state_set(&ep->com, CLOSING); attrs.next_state = IWCH_QP_STATE_CLOSING; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); peer_close_upcall(ep); break; case ABORTING: disconnect = 0; break; case CLOSING: __state_set(&ep->com, MORIBUND); disconnect = 0; break; case MORIBUND: stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = IWCH_QP_STATE_IDLE; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } close_socket(&ep->com, 0); close_complete_upcall(ep); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; break; case DEAD: disconnect = 0; break; default: PANIC_IF(1); } mtx_unlock(&ep->com.lock); if (disconnect) iwch_ep_disconnect(ep, 0, M_NOWAIT); if (release) put_ep(&ep->com); return; } static void process_conn_error(struct iwch_ep *ep) { struct iwch_qp_attributes attrs; int ret; mtx_lock(&ep->com.lock); CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state); switch (ep->com.state) { case MPA_REQ_WAIT: stop_ep_timer(ep); break; case MPA_REQ_SENT: stop_ep_timer(ep); connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REP_SENT: ep->com.rpl_err = ECONNRESET; CTR1(KTR_IW_CXGB, "waking up ep %p", ep); break; case MPA_REQ_RCVD: /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. */ break; case MORIBUND: case CLOSING: stop_ep_timer(ep); /*FALLTHROUGH*/ case FPDU_MODE: if (ep->com.cm_id && ep->com.qp) { attrs.next_state = IWCH_QP_STATE_ERROR; ret = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); if (ret) log(LOG_ERR, "%s - qp <- error failed!\n", __FUNCTION__); } peer_abort_upcall(ep); break; case ABORTING: break; case DEAD: mtx_unlock(&ep->com.lock); CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, ep->com.so->so_error); return; default: PANIC_IF(1); break; } if (ep->com.state != ABORTING) { close_socket(&ep->com, 0); __state_set(&ep->com, DEAD); put_ep(&ep->com); } mtx_unlock(&ep->com.lock); return; } static void process_close_complete(struct iwch_ep *ep) { struct iwch_qp_attributes attrs; int release = 0; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); PANIC_IF(!ep); /* The cm_id may be null if we failed to connect */ mtx_lock(&ep->com.lock); switch (ep->com.state) { case CLOSING: __state_set(&ep->com, MORIBUND); break; case MORIBUND: stop_ep_timer(ep); if ((ep->com.cm_id) && (ep->com.qp)) { attrs.next_state = IWCH_QP_STATE_IDLE; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } if (ep->parent_ep) close_socket(&ep->com, 1); else close_socket(&ep->com, 0); close_complete_upcall(ep); __state_set(&ep->com, DEAD); release = 1; break; case ABORTING: break; case DEAD: default: PANIC_IF(1); break; } mtx_unlock(&ep->com.lock); if (release) put_ep(&ep->com); return; } /* * T3A does 3 things when a TERM is received: * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet * 2) generate an async event on the QP with the TERMINATE opcode * 3) post a TERMINATE opcde cqe into the associated CQ. * * For (1), we save the message in the qp for later consumer consumption. * For (2), we move the QP into TERMINATE, post a QP event and disconnect. * For (3), we toss the CQE in cxio_poll_cq(). * * terminate() handles case (1)... */ static int terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) { struct adapter *sc = qs->adap; struct tom_data *td = sc->tom_softc; uint32_t hash = *((uint32_t *)r + 1); unsigned int tid = ntohl(hash) >> 8 & 0xfffff; struct toepcb *toep = lookup_tid(&td->tid_maps, tid); struct socket *so = toep->tp_inp->inp_socket; struct iwch_ep *ep = so->so_rcv.sb_upcallarg; if (state_read(&ep->com) != FPDU_MODE) goto done; m_adj(m, sizeof(struct cpl_rdma_terminate)); CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes", __func__, tid, ep, m->m_len); m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); ep->com.qp->attr.terminate_msg_len = m->m_len; ep->com.qp->attr.is_terminate_local = 0; done: m_freem(m); return (0); } static int ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) { struct adapter *sc = qs->adap; struct tom_data *td = sc->tom_softc; struct cpl_rdma_ec_status *rep = mtod(m, void *); unsigned int tid = GET_TID(rep); struct toepcb *toep = lookup_tid(&td->tid_maps, tid); struct socket *so = toep->tp_inp->inp_socket; struct iwch_ep *ep = so->so_rcv.sb_upcallarg; if (rep->status) { struct iwch_qp_attributes attrs; CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__); stop_ep_timer(ep); attrs.next_state = IWCH_QP_STATE_ERROR; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); abort_connection(ep); } m_freem(m); return (0); } static void ep_timeout(void *arg) { struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_qp_attributes attrs; int err = 0; int abort = 1; mtx_lock(&ep->com.lock); CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); switch (ep->com.state) { case MPA_REQ_SENT: __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: if (ep->com.cm_id && ep->com.qp) err = 1; __state_set(&ep->com, ABORTING); break; default: CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n", __func__, ep, ep->com.state); abort = 0; } mtx_unlock(&ep->com.lock); if (err){ attrs.next_state = IWCH_QP_STATE_ERROR; iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } if (abort) abort_connection(ep); put_ep(&ep->com); } int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { int err; struct iwch_ep *ep = to_ep(cm_id); CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); if (state_read(&ep->com) == DEAD) { put_ep(&ep->com); return (-ECONNRESET); } PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); if (mpa_rev == 0) { abort_connection(ep); } else { err = send_mpa_reject(ep, pdata, pdata_len); err = soshutdown(ep->com.so, 3); } put_ep(&ep->com); return 0; } int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err; struct iwch_qp_attributes attrs; enum iwch_qp_attr_mask mask; struct iwch_ep *ep = to_ep(cm_id); struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_qp *qp = get_qhp(h, conn_param->qpn); CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); if (state_read(&ep->com) == DEAD) { err = -ECONNRESET; goto err; } PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); PANIC_IF(!qp); if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { abort_connection(ep); err = -EINVAL; goto err; } cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; ep->com.rpl_err = 0; ep->com.rpl_done = 0; ep->ird = conn_param->ird; ep->ord = conn_param->ord; CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = IWCH_QP_STATE_RTS; /* bind QP and TID with INIT_WR */ mask = IWCH_QP_ATTR_NEXT_STATE | IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) goto err1; err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) goto err1; state_set(&ep->com, FPDU_MODE); established_upcall(ep); put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: put_ep(&ep->com); return err; } static int init_sock(struct iwch_ep_common *epc) { int err; struct sockopt sopt; int on=1; SOCK_LOCK(epc->so); soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); epc->so->so_state |= SS_NBIO; SOCK_UNLOCK(epc->so); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; sopt.sopt_val = (caddr_t)&on; sopt.sopt_valsize = sizeof on; sopt.sopt_td = NULL; err = sosetopt(epc->so, &sopt); if (err) printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); return 0; } static int is_loopback_dst(struct iw_cm_id *cm_id) { uint16_t port = cm_id->remote_addr.sin_port; int ifa_present; cm_id->remote_addr.sin_port = 0; ifa_present = ifa_ifwithaddr_check( (struct sockaddr *)&cm_id->remote_addr); cm_id->remote_addr.sin_port = port; return (ifa_present); } int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_ep *ep; struct nhop4_extended nh4; struct toedev *tdev; if (is_loopback_dst(cm_id)) { err = -ENOSYS; goto out; } ep = alloc_ep(sizeof(*ep), M_NOWAIT); if (!ep) { printf("%s - cannot alloc ep.\n", __FUNCTION__); err = (-ENOMEM); goto out; } callout_init(&ep->timer, 1); ep->plen = conn_param->private_data_len; if (ep->plen) memcpy(ep->mpa_pkt + sizeof(struct mpa_message), conn_param->private_data, ep->plen); ep->ird = conn_param->ird; ep->ord = conn_param->ord; cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = get_qhp(h, conn_param->qpn); ep->com.thread = curthread; PANIC_IF(!ep->com.qp); CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, ep->com.qp, cm_id); ep->com.so = cm_id->so; err = init_sock(&ep->com); if (err) goto fail2; /* find a route */ err = find_route(cm_id->local_addr.sin_addr.s_addr, cm_id->remote_addr.sin_addr.s_addr, cm_id->local_addr.sin_port, cm_id->remote_addr.sin_port, IPTOS_LOWDELAY, &nh4); if (err) { printf("%s - cannot find route.\n", __FUNCTION__); err = EHOSTUNREACH; goto fail2; } if (!(nh4.nh_ifp->if_flags & IFCAP_TOE)) { printf("%s - interface not TOE capable.\n", __FUNCTION__); fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); goto fail2; } tdev = TOEDEV(nh4.nh_ifp); if (tdev == NULL) { printf("%s - No toedev for interface.\n", __FUNCTION__); fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); goto fail2; } fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); state_set(&ep->com, CONNECTING); ep->com.local_addr = cm_id->local_addr; ep->com.remote_addr = cm_id->remote_addr; err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, ep->com.thread); if (!err) goto out; fail2: put_ep(&ep->com); out: return err; } int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog) { int err = 0; struct iwch_listen_ep *ep; ep = alloc_ep(sizeof(*ep), M_NOWAIT); if (!ep) { printf("%s - cannot alloc ep.\n", __FUNCTION__); err = ENOMEM; goto out; } CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->backlog = backlog; ep->com.local_addr = cm_id->local_addr; ep->com.thread = curthread; state_set(&ep->com, LISTEN); ep->com.so = cm_id->so; cm_id->provider_data = ep; out: return err; } void iwch_destroy_listen_ep(struct iw_cm_id *cm_id) { struct iwch_listen_ep *ep = to_listen_ep(cm_id); CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); state_set(&ep->com, DEAD); cm_id->rem_ref(cm_id); put_ep(&ep->com); return; } int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) { int close = 0; mtx_lock(&ep->com.lock); PANIC_IF(!ep); PANIC_IF(!ep->com.so); CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], abrupt); switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: close = 1; if (abrupt) ep->com.state = ABORTING; else { ep->com.state = CLOSING; start_ep_timer(ep); } break; case CLOSING: close = 1; if (abrupt) { stop_ep_timer(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; break; case MORIBUND: case ABORTING: case DEAD: CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n", __func__, ep, ep->com.state); break; default: panic("unknown state: %d\n", ep->com.state); break; } mtx_unlock(&ep->com.lock); if (close) { if (abrupt) abort_connection(ep); else { if (!ep->parent_ep) __state_set(&ep->com, MORIBUND); shutdown_socket(&ep->com); } } return 0; } static void process_data(struct iwch_ep *ep) { struct sockaddr_in *local, *remote; CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); switch (state_read(&ep->com)) { case MPA_REQ_SENT: process_mpa_reply(ep); break; case MPA_REQ_WAIT: /* * XXX * Set local and remote addrs here because when we * dequeue the newly accepted socket, they aren't set * yet in the pcb! */ in_getsockaddr(ep->com.so, (struct sockaddr **)&local); in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); CTR3(KTR_IW_CXGB, "%s local 0x%08x remote 0x%08x", __FUNCTION__, ntohl(local->sin_addr.s_addr), ntohl(remote->sin_addr.s_addr)); ep->com.local_addr = *local; ep->com.remote_addr = *remote; free(local, M_SONAME); free(remote, M_SONAME); process_mpa_request(ep); break; default: if (sbavail(&ep->com.so->so_rcv)) printf("%s Unexpected streaming data." " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb); break; } return; } static void process_connected(struct iwch_ep *ep) { CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { send_mpa_req(ep); } else { connect_reply_upcall(ep, -ep->com.so->so_error); close_socket(&ep->com, 0); state_set(&ep->com, DEAD); put_ep(&ep->com); } } void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) { struct iwch_ep *child_ep; struct sockaddr_in *local; struct sockaddr_in *remote; struct iwch_ep *parent_ep = parent_cm_id->provider_data; CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); if (!child_so) { log(LOG_ERR, "%s - invalid child socket!\n", __func__); return; } child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { log(LOG_ERR, "%s - failed to allocate ep entry!\n", __FUNCTION__); return; } SOCKBUF_LOCK(&child_so->so_rcv); soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep); SOCKBUF_UNLOCK(&child_so->so_rcv); in_getsockaddr(child_so, (struct sockaddr **)&local); in_getpeeraddr(child_so, (struct sockaddr **)&remote); CTR3(KTR_IW_CXGB, "%s remote addr 0x%08x port %d", __FUNCTION__, ntohl(remote->sin_addr.s_addr), ntohs(remote->sin_port)); child_ep->com.tdev = parent_ep->com.tdev; child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family; child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port; child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr; child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len; child_ep->com.remote_addr.sin_family = remote->sin_family; child_ep->com.remote_addr.sin_port = remote->sin_port; child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr; child_ep->com.remote_addr.sin_len = remote->sin_len; child_ep->com.so = child_so; child_ep->com.cm_id = NULL; child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; free(local, M_SONAME); free(remote, M_SONAME); get_ep(&parent_ep->com); callout_init(&child_ep->timer, 1); state_set(&child_ep->com, MPA_REQ_WAIT); start_ep_timer(child_ep); /* maybe the request has already been queued up on the socket... */ process_mpa_request(child_ep); } static int iwch_so_upcall(struct socket *so, void *arg, int waitflag) { struct iwch_ep *ep = arg; CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); mtx_lock(&req_lock); if (ep && ep->com.so && !ep->com.entry.tqe_prev) { get_ep(&ep->com); TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); } mtx_unlock(&req_lock); return (SU_OK); } static void process_socket_event(struct iwch_ep *ep) { int state = state_read(&ep->com); struct socket *so = ep->com.so; CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); if (state == CONNECTING) { process_connected(ep); return; } if (state == LISTEN) { /* socket listening events are handled at IWCM */ CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__, ep->com.state, ep); BUG(); return; } /* connection error */ if (so->so_error) { process_conn_error(ep); return; } /* peer close */ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { process_peer_close(ep); return; } /* close complete */ if (so->so_state & (SS_ISDISCONNECTED)) { process_close_complete(ep); return; } /* rx data */ process_data(ep); return; } static void process_req(void *ctx, int pending) { struct iwch_ep_common *epc; CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); mtx_lock(&req_lock); while (!TAILQ_EMPTY(&req_list)) { epc = TAILQ_FIRST(&req_list); TAILQ_REMOVE(&req_list, epc, entry); epc->entry.tqe_prev = NULL; mtx_unlock(&req_lock); if (epc->so) process_socket_event((struct iwch_ep *)epc); put_ep(epc); mtx_lock(&req_lock); } mtx_unlock(&req_lock); } int iwch_cm_init(void) { TAILQ_INIT(&req_list); mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, taskqueue_thread_enqueue, &iw_cxgb_taskq); if (iw_cxgb_taskq == NULL) { printf("failed to allocate iw_cxgb taskqueue\n"); return (ENOMEM); } taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); return (0); } void iwch_cm_term(void) { taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); taskqueue_free(iw_cxgb_taskq); } void iwch_cm_init_cpl(struct adapter *sc) { t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status); } void iwch_cm_term_cpl(struct adapter *sc) { t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL); } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c (revision 320528) @@ -1,268 +1,267 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Get one cq entry from cxio and map it to openib. * * Returns: * 0 cqe returned * -ENOBUFS EMPTY; * -EAGAIN caller must try again * any other neg errno fatal error */ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, struct ib_wc *wc) { struct iwch_qp *qhp = NULL; struct t3_cqe cqe, *rd_cqe; struct t3_wq *wq; u32 credit = 0; u8 cqe_flushed; u64 cookie; int ret = 1; rd_cqe = cxio_next_cqe(&chp->cq); if (!rd_cqe) return 0; qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); if (!qhp) wq = NULL; else { mtx_lock(&qhp->lock); wq = &(qhp->wq); } ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { CTR3(KTR_IW_CXGB, "%s updating %d cq credits on id %d", __FUNCTION__, credit, chp->cq.cqid); cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); } if (ret) { ret = -EAGAIN; goto out; } ret = 1; wc->wr_id = cookie; wc->qp = &qhp->ibqp; wc->vendor_err = CQE_STATUS(cqe); CTR4(KTR_IW_CXGB, "iwch_poll_cq_one qpid 0x%x type %d opcode %d status 0x%x", CQE_QPID(cqe), CQE_TYPE(cqe), CQE_OPCODE(cqe), CQE_STATUS(cqe)); CTR3(KTR_IW_CXGB, "wrid hi 0x%x lo 0x%x cookie 0x%llx", CQE_WRID_HI(cqe), CQE_WRID_LOW(cqe), (unsigned long long) cookie); if (CQE_TYPE(cqe) == 0) { if (!CQE_STATUS(cqe)) wc->byte_len = CQE_LEN(cqe); else wc->byte_len = 0; wc->opcode = IB_WC_RECV; } else { switch (CQE_OPCODE(cqe)) { case T3_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case T3_READ_REQ: wc->opcode = IB_WC_RDMA_READ; wc->byte_len = CQE_LEN(cqe); break; case T3_SEND: case T3_SEND_WITH_SE: wc->opcode = IB_WC_SEND; break; case T3_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; /* these aren't supported yet */ case T3_SEND_WITH_INV: case T3_SEND_WITH_SE_INV: case T3_LOCAL_INV: case T3_FAST_REGISTER: default: log(LOG_ERR, "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", CQE_OPCODE(cqe), CQE_QPID(cqe)); ret = -EINVAL; goto out; } } if (cqe_flushed) wc->status = IB_WC_WR_FLUSH_ERR; else { switch (CQE_STATUS(cqe)) { case TPT_ERR_SUCCESS: wc->status = IB_WC_SUCCESS; break; case TPT_ERR_STAG: wc->status = IB_WC_LOC_ACCESS_ERR; break; case TPT_ERR_PDID: wc->status = IB_WC_LOC_PROT_ERR; break; case TPT_ERR_QPID: case TPT_ERR_ACCESS: wc->status = IB_WC_LOC_ACCESS_ERR; break; case TPT_ERR_WRAP: wc->status = IB_WC_GENERAL_ERR; break; case TPT_ERR_BOUND: wc->status = IB_WC_LOC_LEN_ERR; break; case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: wc->status = IB_WC_MW_BIND_ERR; break; case TPT_ERR_CRC: case TPT_ERR_MARKER: case TPT_ERR_PDU_LEN_ERR: case TPT_ERR_OUT_OF_RQE: case TPT_ERR_DDP_VERSION: case TPT_ERR_RDMA_VERSION: case TPT_ERR_DDP_QUEUE_NUM: case TPT_ERR_MSN: case TPT_ERR_TBIT: case TPT_ERR_MO: case TPT_ERR_MSN_RANGE: case TPT_ERR_IRD_OVERFLOW: case TPT_ERR_OPCODE: wc->status = IB_WC_FATAL_ERR; break; case TPT_ERR_SWFLUSH: wc->status = IB_WC_WR_FLUSH_ERR; break; default: log(LOG_ERR, "Unexpected cqe_status 0x%x for " "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe)); ret = -EINVAL; } } out: if (wq) mtx_unlock(&qhp->lock); return ret; } int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { struct iwch_dev *rhp; struct iwch_cq *chp; int npolled; int err = 0; chp = to_iwch_cq(ibcq); rhp = chp->rhp; mtx_lock(&chp->lock); for (npolled = 0; npolled < num_entries; ++npolled) { #ifdef DEBUG int i=0; #endif /* * Because T3 can post CQEs that are _not_ associated * with a WR, we might have to poll again after removing * one of these. */ do { err = iwch_poll_cq_one(rhp, chp, wc + npolled); #ifdef DEBUG PANIC_IF(++i > 1000); #endif } while (err == -EAGAIN); if (err <= 0) break; } mtx_unlock(&chp->lock); if (err < 0) { return err; } else { return npolled; } } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c (revision 320528) @@ -1,278 +1,277 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INVARIANTS) && defined(TCP_OFFLOAD) #include #include #include #include #include #include #include #include static int cxio_rdma_get_mem(struct cxio_rdev *rdev, struct ch_mem_range *m) { struct adapter *sc = rdev->adap; struct mc7 *mem; if ((m->addr & 7) || (m->len & 7)) return (EINVAL); if (m->mem_id == MEM_CM) mem = &sc->cm; else if (m->mem_id == MEM_PMRX) mem = &sc->pmrx; else if (m->mem_id == MEM_PMTX) mem = &sc->pmtx; else return (EINVAL); return (t3_mc7_bd_read(mem, m->addr/8, m->len/8, (u64 *)m->buf)); } void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag) { struct ch_mem_range m; u64 *data; u32 addr; int rc; int size = 32; m.buf = malloc(size, M_DEVBUF, M_NOWAIT); if (m.buf == NULL) { CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); return; } m.mem_id = MEM_PMRX; m.addr = (stag >> 8) * 32 + rdev->rnic_info.tpt_base; m.len = size; CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m.addr, m.len); rc = cxio_rdma_get_mem(rdev, &m); if (rc) { CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); free(m.buf, M_DEVBUF); return; } data = (u64 *)m.buf; addr = m.addr; while (size > 0) { CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", addr, (unsigned long long) *data); size -= 8; data++; addr += 8; } free(m.buf, M_DEVBUF); } void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift) { struct ch_mem_range m; u64 *data; u32 addr; int rc; int size, npages; shift += 12; npages = (len + (1ULL << shift) - 1) >> shift; size = npages * sizeof(u64); m.buf = malloc(size, M_DEVBUF, M_NOWAIT); if (m.buf == NULL) { CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); return; } m.mem_id = MEM_PMRX; m.addr = pbl_addr; m.len = size; CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d", __FUNCTION__, m.addr, m.len, npages); rc = cxio_rdma_get_mem(rdev, &m); if (rc) { CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); free(m.buf, M_DEVBUF); return; } data = (u64 *)m.buf; addr = m.addr; while (size > 0) { CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", addr, (unsigned long long) *data); size -= 8; data++; addr += 8; } free(m.buf, M_DEVBUF); } void cxio_dump_wqe(union t3_wr *wqe) { uint64_t *data = (uint64_t *)wqe; uint32_t size = (uint32_t)(be64toh(*data) & 0xff); if (size == 0) size = 8; while (size > 0) { CTR2(KTR_IW_CXGB, "WQE %p: %016llx", data, (unsigned long long) be64toh(*data)); size--; data++; } } void cxio_dump_wce(struct t3_cqe *wce) { uint64_t *data = (uint64_t *)wce; int size = sizeof(*wce); while (size > 0) { CTR2(KTR_IW_CXGB, "WCE %p: %016llx", data, (unsigned long long) be64toh(*data)); size -= 8; data++; } } void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents) { struct ch_mem_range m; int size = nents * 64; u64 *data; u32 addr; int rc; m.buf = malloc(size, M_DEVBUF, M_NOWAIT); if (m.buf == NULL) { CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); return; } m.mem_id = MEM_PMRX; m.addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; m.len = size; CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m.addr, m.len); rc = cxio_rdma_get_mem(rdev, &m); if (rc) { CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); free(m.buf, M_DEVBUF); return; } data = (u64 *)m.buf; addr = m.addr; while (size > 0) { CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", addr, (unsigned long long) *data); size -= 8; data++; addr += 8; } free(m.buf, M_DEVBUF); } void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid) { struct ch_mem_range m; int size = TCB_SIZE; uint32_t *data; uint32_t addr; int rc; m.buf = malloc(size, M_DEVBUF, M_NOWAIT); if (m.buf == NULL) { CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); return; } m.mem_id = MEM_CM; m.addr = hwtid * size; m.len = size; CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m.addr, m.len); rc = cxio_rdma_get_mem(rdev, &m); if (rc) { CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); free(m.buf, M_DEVBUF); return; } data = (uint32_t *)m.buf; addr = m.addr; while (size > 0) { printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n", addr, *(data+2), *(data+3), *(data),*(data+1), *(data+6), *(data+7), *(data+4), *(data+5)); size -= 32; data += 8; addr += 32; } free(m.buf, M_DEVBUF); } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c (revision 320528) @@ -1,262 +1,261 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp, struct respQ_msg_t *rsp_msg, enum ib_event_type ib_event, int send_term) { struct ib_event event; struct iwch_qp_attributes attrs; mtx_lock(&rnicp->lock); if (!qhp) { CTR3(KTR_IW_CXGB, "%s unaffiliated error 0x%x qpid 0x%x\n", __func__, CQE_STATUS(rsp_msg->cqe), CQE_QPID(rsp_msg->cqe)); mtx_unlock(&rnicp->lock); return; } if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { CTR4(KTR_IW_CXGB, "%s AE received after RTS - " "qp state %d qpid 0x%x status 0x%x", __FUNCTION__, qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); mtx_unlock(&rnicp->lock); return; } log(LOG_ERR, "%s - AE qpid 0x%x opcode %d status 0x%x " "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); mtx_unlock(&rnicp->lock); if (qhp->attr.state == IWCH_QP_STATE_RTS) { attrs.next_state = IWCH_QP_STATE_TERMINATE; iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); if (send_term) iwch_post_terminate(qhp, rsp_msg); } event.event = ib_event; event.device = chp->ibcq.device; if (ib_event == IB_EVENT_CQ_ERR) event.element.cq = &chp->ibcq; else event.element.qp = &qhp->ibqp; if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); } void iwch_ev_dispatch(struct iwch_dev *rnicp, struct mbuf *m) { struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data; struct iwch_cq *chp; struct iwch_qp *qhp; u32 cqid = RSPQ_CQID(rsp_msg); mtx_lock(&rnicp->lock); chp = get_chp(rnicp, cqid); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); if (!chp || !qhp) { log(LOG_ERR,"BAD AE cqid 0x%x qpid 0x%x opcode %d " "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n", cqid, CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); mtx_unlock(&rnicp->lock); return; } iwch_qp_add_ref(&qhp->ibqp); mtx_lock(&chp->lock); ++chp->refcnt; mtx_unlock(&chp->lock); mtx_unlock(&rnicp->lock); /* * 1) completion of our sending a TERMINATE. * 2) incoming TERMINATE message. */ if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && (CQE_STATUS(rsp_msg->cqe) == 0)) { if (SQ_TYPE(rsp_msg->cqe)) { CTR3(KTR_IW_CXGB, "%s QPID 0x%x ep %p disconnecting", __FUNCTION__, qhp->wq.qpid, qhp->ep); iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT); } else { CTR2(KTR_IW_CXGB, "%s post REQ_ERR AE QPID 0x%x", __FUNCTION__, qhp->wq.qpid); post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 0); iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT); } goto done; } /* Bad incoming Read request */ if (SQ_TYPE(rsp_msg->cqe) && (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); goto done; } /* Bad incoming write */ if (RQ_TYPE(rsp_msg->cqe) && (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); goto done; } switch (CQE_STATUS(rsp_msg->cqe)) { /* Completion Events */ case TPT_ERR_SUCCESS: #if 0 /* * Confirm the destination entry if this is a RECV completion. */ if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) dst_confirm(qhp->ep->dst); #endif (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); break; case TPT_ERR_STAG: case TPT_ERR_PDID: case TPT_ERR_QPID: case TPT_ERR_ACCESS: case TPT_ERR_WRAP: case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; /* Device Fatal Errors */ case TPT_ERR_ECC: case TPT_ERR_ECC_PSTAG: case TPT_ERR_INTERNAL_ERR: post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); break; /* QP Fatal Errors */ case TPT_ERR_OUT_OF_RQE: case TPT_ERR_PBL_ADDR_BOUND: case TPT_ERR_CRC: case TPT_ERR_MARKER: case TPT_ERR_PDU_LEN_ERR: case TPT_ERR_DDP_VERSION: case TPT_ERR_RDMA_VERSION: case TPT_ERR_OPCODE: case TPT_ERR_DDP_QUEUE_NUM: case TPT_ERR_MSN: case TPT_ERR_TBIT: case TPT_ERR_MO: case TPT_ERR_MSN_GAP: case TPT_ERR_MSN_RANGE: case TPT_ERR_RQE_ADDR_BOUND: case TPT_ERR_IRD_OVERFLOW: post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); break; default: log(LOG_ERR,"Unknown T3 status 0x%x QPID 0x%x\n", CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); break; } done: mtx_lock(&chp->lock); if (--chp->refcnt == 0) wakeup(chp); mtx_unlock(&chp->lock); iwch_qp_rem_ref(&qhp->ibqp); } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c (revision 320528) @@ -1,1339 +1,1338 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Response queue used for RDMA events. */ #define ASYNC_NOTIF_RSPQ 0 static inline int cxio_rdma_cq_setup(struct cxio_rdev *rdev_p, unsigned id, uint64_t base_addr, unsigned size, unsigned ovfl_mode, unsigned credits, unsigned credit_thres) { struct adapter *sc = rdev_p->adap; int rc; mtx_lock_spin(&sc->sge.reg_lock); rc = -t3_sge_init_cqcntxt(sc, id, base_addr, size, ASYNC_NOTIF_RSPQ, ovfl_mode, credits, credit_thres); mtx_unlock_spin(&sc->sge.reg_lock); return (rc); } int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit) { int ret; struct t3_cqe *cqe; u32 rptr; struct adapter *sc = rdev_p->adap; if (op != CQ_CREDIT_UPDATE) credit = 0; mtx_lock_spin(&sc->sge.reg_lock); ret = t3_sge_cqcntxt_op(sc, cq->cqid, op, credit); mtx_unlock_spin(&sc->sge.reg_lock); if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) return (ret); /* * If the rearm returned an index other than our current index, * then there might be CQE's in flight (being DMA'd). We must wait * here for them to complete or the consumer can miss a notification. */ if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { int i=0; rptr = cq->rptr; /* * Keep the generation correct by bumping rptr until it * matches the index returned by the rearm - 1. */ while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) rptr++; /* * Now rptr is the index for the (last) cqe that was * in-flight at the time the HW rearmed the CQ. We * spin until that CQE is valid. */ cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { DELAY(1); if (i++ > 1000000) { struct adapter *sc = rdev_p->adap; log(LOG_ERR, "%s: stalled rnic\n", device_get_nameunit(sc->dev)); PANIC_IF(1); return (-EIO); } } return (1); } return (0); } static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) { return (cxio_rdma_cq_setup(rdev_p, cqid, 0, 0, 0, 0, 0)); } static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) { u64 sge_cmd; struct t3_modify_qp_wr *wqe; struct mbuf *m; m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe); if (m == NULL) { CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__); return (-ENOMEM); } wqe = mtod(m, struct t3_modify_qp_wr *); memset(wqe, 0, sizeof(*wqe)); build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); wqe->flags = htobe32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = htobe64(sge_cmd); return t3_offload_tx(rdev_p->adap, m); } int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) { int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); size += 1; /* one extra page for storing cq-in-err state */ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return (-ENOMEM); if (kernel) { cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO); if (!cq->sw_queue) return (-ENOMEM); } cq->queue = contigmalloc(size, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (cq->queue) cq->dma_addr = vtophys(cq->queue); else { free(cq->sw_queue, M_DEVBUF); return (-ENOMEM); } memset(cq->queue, 0, size); return (cxio_rdma_cq_setup(rdev_p, cq->cqid, cq->dma_addr, 1UL << cq->size_log2, 0, 65535, 1)); } static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { struct cxio_qpid *entry; u32 qpid; int i; mtx_lock(&uctx->lock); if (!TAILQ_EMPTY(&uctx->qpids)) { entry = TAILQ_FIRST(&uctx->qpids); TAILQ_REMOVE(&uctx->qpids, entry, entry); qpid = entry->qpid; free(entry, M_DEVBUF); } else { qpid = cxio_hal_get_qpid(rdev_p->rscp); if (!qpid) goto out; for (i = qpid+1; i & rdev_p->qpmask; i++) { entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT); if (!entry) break; entry->qpid = i; TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry); } } out: mtx_unlock(&uctx->lock); CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); return qpid; } static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, struct cxio_ucontext *uctx) { struct cxio_qpid *entry; entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT); CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); entry->qpid = qpid; mtx_lock(&uctx->lock); TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry); mtx_unlock(&uctx->lock); } void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { struct cxio_qpid *pos, *tmp; mtx_lock(&uctx->lock); TAILQ_FOREACH_SAFE(pos, &uctx->qpids, entry, tmp) { TAILQ_REMOVE(&uctx->qpids, pos, entry); if (!(pos->qpid & rdev_p->qpmask)) cxio_hal_put_qpid(rdev_p->rscp, pos->qpid); free(pos, M_DEVBUF); } mtx_unlock(&uctx->lock); } void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { TAILQ_INIT(&uctx->qpids); mtx_init(&uctx->lock, "cxio uctx", NULL, MTX_DEF|MTX_DUPOK); } int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, struct t3_wq *wq, struct cxio_ucontext *uctx) { int depth = 1UL << wq->size_log2; int rqsize = 1UL << wq->rq_size_log2; wq->qpid = get_qpid(rdev_p, uctx); if (!wq->qpid) return (-ENOMEM); wq->rq = malloc(depth * sizeof(struct t3_swrq), M_DEVBUF, M_NOWAIT|M_ZERO); if (!wq->rq) goto err1; wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); if (!wq->rq_addr) goto err2; wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO); if (!wq->sq) goto err3; wq->queue = contigmalloc(depth *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (wq->queue) wq->dma_addr = vtophys(wq->queue); else goto err4; memset(wq->queue, 0, depth * sizeof(union t3_wr)); wq->doorbell = rdev_p->rnic_info.kdb_addr; if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + (wq->qpid << rdev_p->qpshift); wq->rdev = rdev_p; CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__, wq->qpid, wq->doorbell, (unsigned long long) wq->udb); return 0; err4: free(wq->sq, M_DEVBUF); err3: cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); err2: free(wq->rq, M_DEVBUF); err1: put_qpid(rdev_p, wq->qpid, uctx); return (-ENOMEM); } int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) { int err; err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); free(cq->sw_queue, M_DEVBUF); #if 0 dma_free_coherent(&(rdev_p->rnic_info.pdev), (1UL << (cq->size_log2)) * sizeof(struct t3_cqe), cq->queue, /* pci_unmap_addr(cq, mapping)*/ 0); #else contigfree(cq->queue,(1UL << (cq->size_log2)) * sizeof(struct t3_cqe), M_DEVBUF); #endif cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); return err; } int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, struct cxio_ucontext *uctx) { #if 0 dma_free_coherent(&(rdev_p->rnic_info.pdev), (1UL << (wq->size_log2)) * sizeof(union t3_wr), wq->queue, /* pci_unmap_addr(wq, mapping)*/ 0); #else contigfree(wq->queue, (1UL << (wq->size_log2)) * sizeof(union t3_wr), M_DEVBUF); #endif free(wq->sq, M_DEVBUF); cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); free(wq->rq, M_DEVBUF); put_qpid(rdev_p, wq->qpid, uctx); return 0; } static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) { struct t3_cqe cqe; CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__, wq, cq, cq->sw_rptr, cq->sw_wptr); memset(&cqe, 0, sizeof(cqe)); cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | V_CQE_OPCODE(T3_SEND) | V_CQE_TYPE(0) | V_CQE_SWCQE(1) | V_CQE_QPID(wq->qpid) | V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, cq->size_log2))); *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; cq->sw_wptr++; } int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) { u32 ptr; int flushed = 0; CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq); /* flush RQ */ CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__, wq->rq_rptr, wq->rq_wptr, count); ptr = wq->rq_rptr + count; while (ptr++ != wq->rq_wptr) { insert_recv_cqe(wq, cq); flushed++; } return flushed; } static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, struct t3_swsq *sqp) { struct t3_cqe cqe; CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__, wq, cq, cq->sw_rptr, cq->sw_wptr); memset(&cqe, 0, sizeof(cqe)); cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | V_CQE_OPCODE(sqp->opcode) | V_CQE_TYPE(1) | V_CQE_SWCQE(1) | V_CQE_QPID(wq->qpid) | V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, cq->size_log2))); cqe.u.scqe.wrid_hi = sqp->sq_wptr; *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; cq->sw_wptr++; } int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { __u32 ptr; int flushed = 0; struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { insert_sq_cqe(wq, cq, sqp); ptr++; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); flushed++; } return flushed; } /* * Move all CQEs from the HWCQ into the SWCQ. */ void cxio_flush_hw_cq(struct t3_cq *cq) { struct t3_cqe *cqe, *swcqe; CTR3(KTR_IW_CXGB, "%s cq %p cqid 0x%x", __FUNCTION__, cq, cq->cqid); cqe = cxio_next_hw_cqe(cq); while (cqe) { CTR3(KTR_IW_CXGB, "%s flushing hwcq rptr 0x%x to swcq wptr 0x%x", __FUNCTION__, cq->rptr, cq->sw_wptr); swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); *swcqe = *cqe; swcqe->header |= htobe32(V_CQE_SWCQE(1)); cq->sw_wptr++; cq->rptr++; cqe = cxio_next_hw_cqe(cq); } } static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) { if (CQE_OPCODE(*cqe) == T3_TERMINATE) return 0; if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) return 0; if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) return 0; if (CQE_OPCODE(*cqe) && RQ_TYPE(*cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) return 0; return 1; } void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) { struct t3_cqe *cqe; u32 ptr; *count = 0; ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && (CQE_QPID(*cqe) == wq->qpid)) (*count)++; ptr++; } CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count); } void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) { struct t3_cqe *cqe; u32 ptr; *count = 0; CTR2(KTR_IW_CXGB, "%s count zero %d", __FUNCTION__, *count); ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) (*count)++; ptr++; } CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count); } static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) { return (cxio_rdma_cq_setup(rdev_p, 0, 0, 1, 1, 0, 0)); } static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) { int err; u64 sge_cmd, ctx0, ctx1; u64 base_addr; struct t3_modify_qp_wr *wqe; struct mbuf *m; m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe); if (m == NULL) { CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__); return (ENOMEM); } err = cxio_hal_init_ctrl_cq(rdev_p); if (err) { CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err); goto err; } rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2) *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (rdev_p->ctrl_qp.workq) rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq); else { CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__); err = ENOMEM; goto err; } rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr; memset(rdev_p->ctrl_qp.workq, 0, (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr)); mtx_init(&rdev_p->ctrl_qp.lock, "ctl-qp lock", NULL, MTX_DEF|MTX_DUPOK); /* update HW Ctrl QP context */ base_addr = rdev_p->ctrl_qp.dma_addr; base_addr >>= 12; ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | V_EC_BASE_LO((u32) base_addr & 0xffff)); ctx0 <<= 32; ctx0 |= V_EC_CREDITS(FW_WR_NUM); base_addr >>= 16; ctx1 = (u32) base_addr; base_addr >>= 32; ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | V_EC_TYPE(0) | V_EC_GEN(1) | V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; memset(wqe, 0, sizeof(*wqe)); build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, T3_CTL_QP_TID, 7); wqe->flags = htobe32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; wqe->sge_cmd = htobe64(sge_cmd); wqe->ctx1 = htobe64(ctx1); wqe->ctx0 = htobe64(ctx0); CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d", (unsigned long long) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); return t3_offload_tx(rdev_p->adap, m); err: m_freem(m); return err; } static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) { #if 0 dma_free_coherent(&(rdev_p->rnic_info.pdev), (1UL << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, /* pci_unmap_addr(&rdev_p->ctrl_qp, mapping)*/ 0); #else contigfree(rdev_p->ctrl_qp.workq,(1UL << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr), M_DEVBUF); #endif return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); } /* write len bytes of data into addr (32B aligned address) * If data is NULL, clear len byte of memory to zero. * caller aquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, u32 len, void *data) { u32 i, nr_wqe, copy_len; u8 *copy_data; u8 wr_len, utx_len; /* length in 8 byte flit */ enum t3_wr_flags flag; __be64 *wqe; u64 utx_cmd; addr &= 0x7FFFFFF; nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ CTR6(KTR_IW_CXGB, "cxio_hal_ctrl_qp_write_mem wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x", rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, nr_wqe, data, addr); utx_len = 3; /* in 32B unit */ for (i = 0; i < nr_wqe; i++) { if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2)) { CTR4(KTR_IW_CXGB, "%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, " "wait for more space i %d", __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); if (cxio_wait(&rdev_p->ctrl_qp, &rdev_p->ctrl_qp.lock, !Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2))) { CTR1(KTR_IW_CXGB, "%s ctrl_qp workq interrupted", __FUNCTION__); return (-ERESTART); } CTR2(KTR_IW_CXGB, "%s ctrl_qp wakeup, continue posting work request " "i %d", __FUNCTION__, i); } wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % (1 << T3_CTRL_QP_SIZE_LOG2))); flag = 0; if (i == (nr_wqe - 1)) { /* last WQE */ flag = T3_COMPLETION_FLAG; if (len % 32) utx_len = len / 32 + 1; else utx_len = len / 32; } /* * Force a CQE to return the credit to the workq in case * we posted more than half the max QP size of WRs */ if ((i != 0) && (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { flag = T3_COMPLETION_FLAG; CTR2(KTR_IW_CXGB, "%s force completion at i %d", __FUNCTION__, i); } /* build the utx mem command */ wqe += (sizeof(struct t3_bypass_wr) >> 3); utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); utx_cmd <<= 32; utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); *wqe = htobe64(utx_cmd); wqe++; copy_data = (u8 *) data + i * 96; copy_len = len > 96 ? 96 : len; /* clear memory content if data is NULL */ if (data) memcpy(wqe, copy_data, copy_len); else memset(wqe, 0, copy_len); if (copy_len % 32) memset(((u8 *) wqe) + copy_len, 0, 32 - (copy_len % 32)); wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + (utx_len << 2); wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % (1 << T3_CTRL_QP_SIZE_LOG2))); /* wptr in the WRID[31:0] */ ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; /* * This must be the last write with a memory barrier * for the genbit */ build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, Q_GENBIT(rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, wr_len); if (flag == T3_COMPLETION_FLAG) ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); len -= 96; rdev_p->ctrl_qp.wptr++; } return 0; } /* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size * OUT: stag index, actual pbl_size, pbl_addr allocated. * TBD: shared memory region support */ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum tpt_mem_type type, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, u8 page_size, u32 pbl_size, u32 pbl_addr) { int err; struct tpt_entry tpt; u32 stag_idx; u32 wptr; stag_state = stag_state > 0; stag_idx = (*stag) >> 8; if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { stag_idx = cxio_hal_get_stag(rdev_p->rscp); if (!stag_idx) return (-ENOMEM); *stag = (stag_idx << 8) | ((*stag) & 0xFF); } CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x", __FUNCTION__, stag_state, type, pdid, stag_idx); mtx_lock(&rdev_p->ctrl_qp.lock); /* write TPT entry */ if (reset_tpt_entry) memset(&tpt, 0, sizeof(tpt)); else { tpt.valid_stag_pdid = htobe32(F_TPT_VALID | V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | V_TPT_STAG_STATE(stag_state) | V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); PANIC_IF(page_size >= 28); tpt.flags_pagesize_qpid = htobe32(V_TPT_PERM(perm) | F_TPT_MW_BIND_ENABLE | V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | V_TPT_PAGE_SIZE(page_size)); tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = htobe32(len); tpt.va_hi = htobe32((u32) (to >> 32)); tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : htobe32(V_TPT_PBL_SIZE((pbl_size) >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + (rdev_p->rnic_info.tpt_base >> 5), sizeof(tpt), &tpt); /* release the stag index to free pool */ if (reset_tpt_entry) cxio_hal_put_stag(rdev_p->rscp, stag_idx); wptr = rdev_p->ctrl_qp.wptr; mtx_unlock(&rdev_p->ctrl_qp.lock); if (!err) if (cxio_wait(&rdev_p->ctrl_qp, &rdev_p->ctrl_qp.lock, SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr))) return (-ERESTART); return err; } int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, u32 pbl_addr, u32 pbl_size) { u32 wptr; int err; CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d", __func__, pbl_addr, rdev_p->rnic_info.pbl_base, pbl_size); mtx_lock(&rdev_p->ctrl_qp.lock); err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, pbl); wptr = rdev_p->ctrl_qp.wptr; mtx_unlock(&rdev_p->ctrl_qp.lock); if (err) return err; if (cxio_wait(&rdev_p->ctrl_qp, &rdev_p->ctrl_qp.lock, SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr))) return ERESTART; return 0; } int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, u8 page_size, u32 pbl_size, u32 pbl_addr) { *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, u8 page_size, u32 pbl_size, u32 pbl_addr) { return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, u32 pbl_addr) { return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); } int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) { *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, 0, 0); } int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) { return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, 0, 0); } int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr, struct socket *so) { struct t3_rdma_init_wr *wqe; struct mbuf *m; struct ofld_hdr *oh; int rc; struct tcpcb *tp; struct inpcb *inp; struct toepcb *toep; m = M_GETHDR_OFLD(0, CPL_PRIORITY_DATA, wqe); if (m == NULL) return (-ENOMEM); CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p); wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT)); wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) | V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); wqe->wrid.id1 = 0; wqe->qpid = htobe32(attr->qpid); wqe->pdid = htobe32(attr->pdid); wqe->scqid = htobe32(attr->scqid); wqe->rcqid = htobe32(attr->rcqid); wqe->rq_addr = htobe32(attr->rq_addr - rdev_p->rnic_info.rqt_base); wqe->rq_size = htobe32(attr->rq_size); wqe->mpaattrs = attr->mpaattrs; wqe->qpcaps = attr->qpcaps; wqe->ulpdu_size = htobe16(attr->tcp_emss); wqe->rqe_count = htobe16(attr->rqe_count); wqe->flags_rtr_type = htobe16(attr->flags | V_RTR_TYPE(attr->rtr_type) | V_CHAN(attr->chan)); wqe->ord = htobe32(attr->ord); wqe->ird = htobe32(attr->ird); wqe->qp_dma_addr = htobe64(attr->qp_dma_addr); wqe->qp_dma_size = htobe32(attr->qp_dma_size); wqe->irs = htobe32(attr->irs); /* XXX: bad form, fix later */ inp = sotoinpcb(so); INP_WLOCK(inp); tp = intotcpcb(inp); toep = tp->t_toe; oh = mtod(m, struct ofld_hdr *); oh->plen = 0; oh->flags |= F_HDR_DF; enqueue_wr(toep, m); toep->tp_wr_avail--; toep->tp_wr_unacked++; rc = t3_offload_tx(rdev_p->adap, m); INP_WUNLOCK(inp); return (rc); } static int cxio_hal_ev_handler(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) { struct adapter *sc = qs->adap; struct iwch_dev *rnicp = sc->iwarp_softc; struct cxio_rdev *rdev_p = &rnicp->rdev; struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data; int qpid = CQE_QPID(rsp_msg->cqe); CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x", __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg)); CTR4(KTR_IW_CXGB, "se %0x notify %0x cqbranch %0x creditth %0x", RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), RSPQ_CREDIT_THRESH(rsp_msg)); CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d", qpid, CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe)); CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); switch(qpid) { case T3_CTRL_QP_ID: mtx_lock(&rdev_p->ctrl_qp.lock); rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; wakeup(&rdev_p->ctrl_qp); mtx_unlock(&rdev_p->ctrl_qp.lock); break; case 0xfff8: break; default: iwch_ev_dispatch(rnicp, m); } m_freem(m); return (0); } /* Caller takes care of locking if needed */ int cxio_rdev_open(struct cxio_rdev *rdev_p) { int err = 0; struct rdma_info *ri = &rdev_p->rnic_info; struct adapter *sc = rdev_p->adap; KASSERT(rdev_p->adap, ("%s: adap is NULL", __func__)); memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); ri->udbell_physbase = rman_get_start(sc->udbs_res); ri->udbell_len = rman_get_size(sc->udbs_res); ri->tpt_base = t3_read_reg(sc, A_ULPTX_TPT_LLIMIT); ri->tpt_top = t3_read_reg(sc, A_ULPTX_TPT_ULIMIT); ri->pbl_base = t3_read_reg(sc, A_ULPTX_PBL_LLIMIT); ri->pbl_top = t3_read_reg(sc, A_ULPTX_PBL_ULIMIT); ri->rqt_base = t3_read_reg(sc, A_ULPRX_RQ_LLIMIT); ri->rqt_top = t3_read_reg(sc, A_ULPRX_RQ_ULIMIT); ri->kdb_addr = (void *)((unsigned long) rman_get_virtual(sc->regs_res) + A_SG_KDOORBELL); /* * qpshift is the number of bits to shift the qpid left in order * to get the correct address of the doorbell for that qp. */ cxio_init_ucontext(rdev_p, &rdev_p->uctx); rdev_p->qpshift = PAGE_SHIFT - ilog2(65536 >> ilog2(rdev_p->rnic_info.udbell_len >> PAGE_SHIFT)); rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %p info: tpt_base 0x%0x tpt_top 0x%0x num stags %d", rdev_p->adap, rdev_p->rnic_info.tpt_base, rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p)); CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x", rdev_p->rnic_info.pbl_base, rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, rdev_p->rnic_info.rqt_top); CTR6(KTR_IW_CXGB, "udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu " "qpnr %d qpmask 0x%x", rdev_p->rnic_info.udbell_len, rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); err = cxio_hal_init_ctrl_qp(rdev_p); if (err) { log(LOG_ERR, "%s error %d initializing ctrl_qp.\n", __FUNCTION__, err); goto err1; } err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, T3_MAX_NUM_PD); if (err) { log(LOG_ERR, "%s error %d initializing hal resources.\n", __FUNCTION__, err); goto err2; } err = cxio_hal_pblpool_create(rdev_p); if (err) { log(LOG_ERR, "%s error %d initializing pbl mem pool.\n", __FUNCTION__, err); goto err3; } err = cxio_hal_rqtpool_create(rdev_p); if (err) { log(LOG_ERR, "%s error %d initializing rqt mem pool.\n", __FUNCTION__, err); goto err4; } return 0; err4: cxio_hal_pblpool_destroy(rdev_p); err3: cxio_hal_destroy_resource(rdev_p->rscp); err2: cxio_hal_destroy_ctrl_qp(rdev_p); err1: return err; } void cxio_rdev_close(struct cxio_rdev *rdev_p) { cxio_hal_pblpool_destroy(rdev_p); cxio_hal_rqtpool_destroy(rdev_p); cxio_hal_destroy_ctrl_qp(rdev_p); cxio_hal_destroy_resource(rdev_p->rscp); } int cxio_hal_init(struct adapter *sc) { #ifdef needed if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) return (ENOMEM); #endif t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, cxio_hal_ev_handler); return (0); } void cxio_hal_uninit(struct adapter *sc) { t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, NULL); #ifdef needed cxio_hal_destroy_rhdl_resource(); #endif } static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) { struct t3_swsq *sqp; __u32 ptr = wq->sq_rptr; int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (count--) if (!sqp->signaled) { ptr++; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); } else if (sqp->complete) { /* * Insert this completed cqe into the swcq. */ CTR3(KTR_IW_CXGB, "%s moving cqe into swcq sq idx %ld cq idx %ld", __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2), Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = sqp->cqe; cq->sw_wptr++; sqp->signaled = 0; break; } else break; } static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, struct t3_cqe *read_cqe) { read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; read_cqe->len = wq->oldest_read->read_len; read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | V_CQE_SWCQE(SW_CQE(*hw_cqe)) | V_CQE_OPCODE(T3_READ_REQ) | V_CQE_TYPE(1)); } /* * Return a ptr to the next read wr in the SWSQ or NULL. */ static void advance_oldest_read(struct t3_wq *wq) { u32 rptr = wq->oldest_read - wq->sq + 1; u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); if (wq->oldest_read->opcode == T3_READ_REQ) return; rptr++; } wq->oldest_read = NULL; } /* * cxio_poll_cq * * Caller must: * check the validity of the first CQE, * supply the wq assicated with the qpid. * * credit: cq credit to return to sge. * cqe_flushed: 1 iff the CQE is flushed. * cqe: copy of the polled CQE. * * return value: * 0 CQE returned, * -1 CQE skipped, try again. */ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, u8 *cqe_flushed, u64 *cookie, u32 *credit) { int ret = 0; struct t3_cqe *hw_cqe, read_cqe; *cqe_flushed = 0; *credit = 0; hw_cqe = cxio_next_cqe(cq); CTR5(KTR_IW_CXGB, "cxio_poll_cq CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x", CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe)); CTR4(KTR_IW_CXGB, "opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), CQE_WRID_LOW(*hw_cqe)); /* * skip cqe's not affiliated with a QP. */ if (wq == NULL) { ret = -1; goto skip_cqe; } /* * Gotta tweak READ completions: * 1) the cqe doesn't contain the sq_wptr from the wr. * 2) opcode not reflected from the wr. * 3) read_len not reflected from the wr. * 4) cq_type is RQ_TYPE not SQ_TYPE. */ if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. */ create_read_req_cqe(wq, hw_cqe, &read_cqe); hw_cqe = &read_cqe; advance_oldest_read(wq); } /* * T3A: Discard TERMINATE CQEs. */ if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { ret = -1; wq->error = 1; goto skip_cqe; } if (CQE_STATUS(*hw_cqe) || wq->error) { *cqe_flushed = wq->error; wq->error = 1; /* * T3A inserts errors into the CQE. We cannot return * these as work completions. */ /* incoming write failures */ if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) && RQ_TYPE(*hw_cqe)) { ret = -1; goto skip_cqe; } /* incoming read request failures */ if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { ret = -1; goto skip_cqe; } /* incoming SEND with no receive posted failures */ if (CQE_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { ret = -1; goto skip_cqe; } PANIC_IF((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); goto proc_cqe; } /* * RECV completion. */ if (RQ_TYPE(*hw_cqe)) { /* * HW only validates 4 bits of MSN. So we must validate that * the MSN in the SEND is the next expected MSN. If its not, * then we complete this with TPT_ERR_MSN and mark the wq in * error. */ if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { wq->error = 1; ret = -1; goto skip_cqe; } if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { wq->error = 1; hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); goto proc_cqe; } goto proc_cqe; } /* * If we get here its a send completion. * * Handle out of order completion. These get stuffed * in the SW SQ. Then the SW SQ is walked to move any * now in-order completions into the SW CQ. This handles * 2 cases: * 1) reaping unsignaled WRs when the first subsequent * signaled WR is completed. * 2) out of order read completions. */ if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { struct t3_swsq *sqp; CTR2(KTR_IW_CXGB, "%s out of order completion going in swsq at idx %ld", __FUNCTION__, Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2)); sqp = wq->sq + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); sqp->cqe = *hw_cqe; sqp->complete = 1; ret = -1; goto flush_wq; } proc_cqe: *cqe = *hw_cqe; /* * Reap the associated WR(s) that are freed up with this * completion. */ if (SQ_TYPE(*hw_cqe)) { wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__, Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; wq->sq_rptr++; } else { CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__, Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) cxio_hal_pblpool_free(wq->rdev, wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); PANIC_IF(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); wq->rq_rptr++; } flush_wq: /* * Flush any completed cqes that are now in-order. */ flush_completed_wrs(wq, cq); skip_cqe: if (SW_CQE(*hw_cqe)) { CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x", __FUNCTION__, cq, cq->cqid, cq->sw_rptr); ++cq->sw_rptr; } else { CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip hw cqe rptr 0x%x", __FUNCTION__, cq, cq->cqid, cq->rptr); ++cq->rptr; /* * T3A: compute credits. */ if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) || ((cq->rptr - cq->wptr) >= 128)) { *credit = cq->rptr - cq->wptr; cq->wptr = cq->rptr; } } return ret; } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c (revision 320528) @@ -1,240 +1,239 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) { u32 mmid; mhp->attr.state = 1; mhp->attr.stag = stag; mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __func__, mmid, mhp); return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); } int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift) { u32 stag; int ret; if (cxio_register_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, mhp->attr.perms, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, shift - 12, mhp->attr.pbl_size, mhp->attr.pbl_addr)) return (-ENOMEM); ret = iwch_finish_mem_reg(mhp, stag); if (ret) cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); return ret; } int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, int npages) { u32 stag; int ret; /* We could support this... */ if (npages > mhp->attr.pbl_size) return (-ENOMEM); stag = mhp->attr.stag; if (cxio_reregister_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, mhp->attr.perms, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, shift - 12, mhp->attr.pbl_size, mhp->attr.pbl_addr)) return (-ENOMEM); ret = iwch_finish_mem_reg(mhp, stag); if (ret) cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); return ret; } int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) { mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, npages << 3); if (!mhp->attr.pbl_addr) return -ENOMEM; mhp->attr.pbl_size = npages; return 0; } void iwch_free_pbl(struct iwch_mr *mhp) { cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, mhp->attr.pbl_size << 3); } int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) { return cxio_write_pbl(&mhp->rhp->rdev, pages, mhp->attr.pbl_addr + (offset << 3), npages); } int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, u64 *total_size, int *npages, int *shift, __be64 **page_list) { u64 mask; int i, j, n; mask = 0; *total_size = 0; for (i = 0; i < num_phys_buf; ++i) { if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) return (-EINVAL); if (i != 0 && i != num_phys_buf - 1 && (buffer_list[i].size & ~PAGE_MASK)) return (-EINVAL); *total_size += buffer_list[i].size; if (i > 0) mask |= buffer_list[i].addr; else mask |= buffer_list[i].addr & PAGE_MASK; if (i != num_phys_buf - 1) mask |= buffer_list[i].addr + buffer_list[i].size; else mask |= (buffer_list[i].addr + buffer_list[i].size + PAGE_SIZE - 1) & PAGE_MASK; } if (*total_size > 0xFFFFFFFFULL) return (-ENOMEM); /* Find largest page shift we can use to cover buffers */ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) if ((1ULL << *shift) & mask) break; buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); buffer_list[0].addr &= ~0ull << *shift; *npages = 0; for (i = 0; i < num_phys_buf; ++i) *npages += (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; if (!*npages) return (-EINVAL); *page_list = kmalloc(sizeof(u64) * *npages, M_NOWAIT); if (!*page_list) return (-ENOMEM); n = 0; for (i = 0; i < num_phys_buf; ++i) for (j = 0; j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; ++j) (*page_list)[n++] = htobe64(buffer_list[i].addr + ((u64) j << *shift)); CTR6(KTR_IW_CXGB, "%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d", __FUNCTION__, (unsigned long long) *iova_start, (unsigned long long) mask, *shift, (unsigned long long) *total_size, *npages); return 0; } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c (revision 320528) @@ -1,1168 +1,1167 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int iwch_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, struct ib_port_modify *props) { return (-ENOSYS); } static struct ib_ah * iwch_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { return ERR_PTR(-ENOSYS); } static int iwch_ah_destroy(struct ib_ah *ah) { return (-ENOSYS); } static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return (-ENOSYS); } static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { return (-ENOSYS); } static int iwch_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { return (-ENOSYS); } static int iwch_dealloc_ucontext(struct ib_ucontext *context) { struct iwch_dev *rhp = to_iwch_dev(context->device); struct iwch_ucontext *ucontext = to_iwch_ucontext(context); struct iwch_mm_entry *mm, *tmp; CTR2(KTR_IW_CXGB, "%s context %p", __FUNCTION__, context); TAILQ_FOREACH_SAFE(mm, &ucontext->mmaps, entry, tmp) { TAILQ_REMOVE(&ucontext->mmaps, mm, entry); cxfree(mm); } cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); cxfree(ucontext); return 0; } static struct ib_ucontext * iwch_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct iwch_ucontext *context; struct iwch_dev *rhp = to_iwch_dev(ibdev); CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); context = malloc(sizeof(*context), M_DEVBUF, M_ZERO|M_NOWAIT); if (!context) return ERR_PTR(-ENOMEM); cxio_init_ucontext(&rhp->rdev, &context->uctx); TAILQ_INIT(&context->mmaps); mtx_init(&context->mmap_lock, "ucontext mmap", NULL, MTX_DEF); return &context->ibucontext; } static int iwch_destroy_cq(struct ib_cq *ib_cq) { struct iwch_cq *chp; CTR2(KTR_IW_CXGB, "%s ib_cq %p", __FUNCTION__, ib_cq); chp = to_iwch_cq(ib_cq); remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); mtx_lock(&chp->lock); if (--chp->refcnt) msleep(chp, &chp->lock, 0, "iwch_destroy_cq", 0); mtx_unlock(&chp->lock); cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); cxfree(chp); return 0; } static struct ib_cq * iwch_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_cq *chp; struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; struct iwch_ucontext *ucontext = NULL; static int warned; size_t resplen; int entries = attr->cqe; CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries); rhp = to_iwch_dev(ibdev); chp = malloc(sizeof(*chp), M_DEVBUF, M_NOWAIT|M_ZERO); if (!chp) { return ERR_PTR(-ENOMEM); } if (ib_context) { ucontext = to_iwch_ucontext(ib_context); if (!t3a_device(rhp)) { if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { cxfree(chp); return ERR_PTR(-EFAULT); } chp->user_rptr_addr = (u32 /*__user */*)(unsigned long)ureq.user_rptr_addr; } } if (t3a_device(rhp)) { /* * T3A: Add some fluff to handle extra CQEs inserted * for various errors. * Additional CQE possibilities: * TERMINATE, * incoming RDMA WRITE Failures * incoming RDMA READ REQUEST FAILUREs * NOTE: We cannot ensure the CQ won't overflow. */ entries += 16; } entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { cxfree(chp); return ERR_PTR(-ENOMEM); } chp->rhp = rhp; chp->ibcq.cqe = 1 << chp->cq.size_log2; mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK); chp->refcnt = 1; if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); cxfree(chp); return ERR_PTR(-ENOMEM); } if (ucontext) { struct iwch_mm_entry *mm; mm = kmalloc(sizeof *mm, M_NOWAIT); if (!mm) { iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-ENOMEM); } uresp.cqid = chp->cq.cqid; uresp.size_log2 = chp->cq.size_log2; mtx_lock(&ucontext->mmap_lock); uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; mtx_unlock(&ucontext->mmap_lock); mm->key = uresp.key; mm->addr = vtophys(chp->cq.queue); if (udata->outlen < sizeof uresp) { if (!warned++) CTR1(KTR_IW_CXGB, "%s Warning - " "downlevel libcxgb3 (non-fatal).\n", __func__); mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * sizeof(struct t3_cqe)); resplen = sizeof(struct iwch_create_cq_resp_v0); } else { mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * sizeof(struct t3_cqe)); uresp.memsize = mm->len; resplen = sizeof uresp; } if (ib_copy_to_udata(udata, &uresp, resplen)) { cxfree(mm); iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-EFAULT); } insert_mmap(ucontext, mm); } CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx", chp->cq.cqid, chp, (1 << chp->cq.size_log2), (unsigned long long) chp->cq.dma_addr); return &chp->ibcq; } static int iwch_resize_cq(struct ib_cq *cq __unused, int cqe __unused, struct ib_udata *udata __unused) { return (-ENOSYS); } static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct iwch_dev *rhp; struct iwch_cq *chp; enum t3_cq_opcode cq_op; int err; u32 rptr; chp = to_iwch_cq(ibcq); rhp = chp->rhp; if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) cq_op = CQ_ARM_SE; else cq_op = CQ_ARM_AN; if (chp->user_rptr_addr) { if (copyin(chp->user_rptr_addr, &rptr, sizeof(rptr))) return (-EFAULT); mtx_lock(&chp->lock); chp->cq.rptr = rptr; } else mtx_lock(&chp->lock); CTR2(KTR_IW_CXGB, "%s rptr 0x%x", __FUNCTION__, chp->cq.rptr); err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); mtx_unlock(&chp->lock); if (err < 0) log(LOG_ERR, "Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) err = 0; return err; } static int iwch_mmap(struct ib_ucontext *context __unused, struct vm_area_struct *vma __unused) { return (-ENOSYS); } static int iwch_deallocate_pd(struct ib_pd *pd) { struct iwch_dev *rhp; struct iwch_pd *php; php = to_iwch_pd(pd); rhp = php->rhp; CTR3(KTR_IW_CXGB, "%s ibpd %p pdid 0x%x", __FUNCTION__, pd, php->pdid); cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); cxfree(php); return 0; } static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) { struct iwch_pd *php; u32 pdid; struct iwch_dev *rhp; CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); rhp = (struct iwch_dev *) ibdev; pdid = cxio_hal_get_pdid(rhp->rdev.rscp); if (!pdid) return ERR_PTR(-EINVAL); php = malloc(sizeof(*php), M_DEVBUF, M_ZERO|M_NOWAIT); if (!php) { cxio_hal_put_pdid(rhp->rdev.rscp, pdid); return ERR_PTR(-ENOMEM); } php->pdid = pdid; php->rhp = rhp; if (context) { if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) { iwch_deallocate_pd(&php->ibpd); return ERR_PTR(-EFAULT); } } CTR3(KTR_IW_CXGB, "%s pdid 0x%0x ptr 0x%p", __FUNCTION__, pdid, php); return &php->ibpd; } static int iwch_dereg_mr(struct ib_mr *ib_mr) { struct iwch_dev *rhp; struct iwch_mr *mhp; u32 mmid; CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr); /* There can be no memory windows */ if (atomic_load_acq_int(&ib_mr->usecnt.counter)) return (-EINVAL); mhp = to_iwch_mr(ib_mr); rhp = mhp->rhp; mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); iwch_free_pbl(mhp); remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) cxfree((void *) (unsigned long) mhp->kva); if (mhp->umem) ib_umem_release(mhp->umem); CTR3(KTR_IW_CXGB, "%s mmid 0x%x ptr %p", __FUNCTION__, mmid, mhp); cxfree(mhp); return 0; } static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, u64 *iova_start) { __be64 *page_list; int shift; u64 total_size; int npages; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; int ret; CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); php = to_iwch_pd(pd); rhp = php->rhp; mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT); if (!mhp) return ERR_PTR(-ENOMEM); mhp->rhp = rhp; /* First check that we have enough alignment */ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { ret = -EINVAL; goto err; } if (num_phys_buf > 1 && ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { ret = -EINVAL; goto err; } ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, &total_size, &npages, &shift, &page_list); if (ret) goto err; ret = iwch_alloc_pbl(mhp, npages); if (ret) { cxfree(page_list); goto err_pbl; } ret = iwch_write_pbl(mhp, page_list, npages, 0); cxfree(page_list); if (ret) goto err; mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = *iova_start; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) total_size; mhp->attr.pbl_size = npages; ret = iwch_register_mem(rhp, php, mhp, shift); if (ret) goto err_pbl; return &mhp->ibmr; err_pbl: iwch_free_pbl(mhp); err: cxfree(mhp); return ERR_PTR(ret); } static int iwch_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int acc, u64 * iova_start) { struct iwch_mr mh, *mhp; struct iwch_pd *php; struct iwch_dev *rhp; __be64 *page_list = NULL; int shift = 0; u64 total_size; int npages = 0; int ret; CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd); /* There can be no memory windows */ if (atomic_load_acq_int(&mr->usecnt.counter)) return (-EINVAL); mhp = to_iwch_mr(mr); rhp = mhp->rhp; php = to_iwch_pd(mr->pd); /* make sure we are on the same adapter */ if (rhp != php->rhp) return (-EINVAL); memcpy(&mh, mhp, sizeof *mhp); if (mr_rereg_mask & IB_MR_REREG_PD) php = to_iwch_pd(pd); if (mr_rereg_mask & IB_MR_REREG_ACCESS) mh.attr.perms = iwch_ib_to_tpt_access(acc); if (mr_rereg_mask & IB_MR_REREG_TRANS) { ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, &total_size, &npages, &shift, &page_list); if (ret) return ret; } ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); cxfree(page_list); if (ret) { return ret; } if (mr_rereg_mask & IB_MR_REREG_PD) mhp->attr.pdid = php->pdid; if (mr_rereg_mask & IB_MR_REREG_ACCESS) mhp->attr.perms = iwch_ib_to_tpt_access(acc); if (mr_rereg_mask & IB_MR_REREG_TRANS) { mhp->attr.zbva = 0; mhp->attr.va_fbo = *iova_start; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) total_size; mhp->attr.pbl_size = npages; } return 0; } static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata, int mr_id) { __be64 *pages; int shift, n, len; int i, k, entry; int err = 0; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; struct scatterlist *sg; CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); php = to_iwch_pd(pd); rhp = php->rhp; mhp = malloc(sizeof(*mhp), M_DEVBUF, M_NOWAIT|M_ZERO); if (!mhp) return ERR_PTR(-ENOMEM); mhp->rhp = rhp; mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); cxfree(mhp); return ERR_PTR(-err); } shift = ffs(mhp->umem->page_size) - 1; n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) goto err; pages = (__be64 *) kmalloc(n * sizeof(u64), M_NOWAIT); if (!pages) { err = -ENOMEM; goto err_pbl; } i = n = 0; for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); if (err) goto pbl_done; n += i; i = 0; } } } #if 0 TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry) for (j = 0; j < chunk->nmap; ++j) { len = sg_dma_len(&chunk->page_list[j]) >> shift; for (k = 0; k < len; ++k) { pages[i++] = htobe64(sg_dma_address( &chunk->page_list[j]) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); if (err) goto pbl_done; n += i; i = 0; } } } #endif if (i) err = iwch_write_pbl(mhp, pages, i, n); pbl_done: cxfree(pages); if (err) goto err_pbl; mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = virt; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) length; err = iwch_register_mem(rhp, php, mhp, shift); if (err) goto err_pbl; if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3; CTR2(KTR_IW_CXGB, "%s user resp pbl_addr 0x%x", __FUNCTION__, uresp.pbl_addr); if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { iwch_dereg_mr(&mhp->ibmr); err = EFAULT; goto err; } } return &mhp->ibmr; err_pbl: iwch_free_pbl(mhp); err: ib_umem_release(mhp->umem); cxfree(mhp); return ERR_PTR(-err); } static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) { struct ib_phys_buf bl; u64 kva; struct ib_mr *ibmr; CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); /* * T3 only supports 32 bits of size. */ bl.size = 0xffffffff; bl.addr = 0; kva = 0; ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva); return ibmr; } static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mw *mhp; u32 mmid; u32 stag = 0; int ret; php = to_iwch_pd(pd); rhp = php->rhp; mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT); if (!mhp) return ERR_PTR(-ENOMEM); ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); if (ret) { cxfree(mhp); return ERR_PTR(-ret); } mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.type = TPT_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); cxfree(mhp); return ERR_PTR(-ENOMEM); } CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag); return &(mhp->ibmw); } static int iwch_dealloc_mw(struct ib_mw *mw) { struct iwch_dev *rhp; struct iwch_mw *mhp; u32 mmid; mhp = to_iwch_mw(mw); rhp = mhp->rhp; mmid = (mw->rkey) >> 8; cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); remove_handle(rhp, &rhp->mmidr, mmid); cxfree(mhp); CTR4(KTR_IW_CXGB, "%s ib_mw %p mmid 0x%x ptr %p", __FUNCTION__, mw, mmid, mhp); return 0; } static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; struct iwch_qp *qhp; struct iwch_qp_attributes attrs; struct iwch_ucontext *ucontext; qhp = to_iwch_qp(ib_qp); rhp = qhp->rhp; attrs.next_state = IWCH_QP_STATE_ERROR; iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); mtx_lock(&qhp->lock); if (qhp->ep) msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp1", 0); mtx_unlock(&qhp->lock); remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); mtx_lock(&qhp->lock); if (--qhp->refcnt) msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp2", 0); mtx_unlock(&qhp->lock); ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context) : NULL; cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); CTR4(KTR_IW_CXGB, "%s ib_qp %p qpid 0x%0x qhp %p", __FUNCTION__, ib_qp, qhp->wq.qpid, qhp); cxfree(qhp); return 0; } static struct ib_qp *iwch_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_qp *qhp; struct iwch_pd *php; struct iwch_cq *schp; struct iwch_cq *rchp; struct iwch_create_qp_resp uresp; int wqsize, sqsize, rqsize; struct iwch_ucontext *ucontext; CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); if (attrs->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); php = to_iwch_pd(pd); rhp = php->rhp; schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); if (!schp || !rchp) return ERR_PTR(-EINVAL); /* The RQT size must be # of entries + 1 rounded up to a power of two */ rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); if (rqsize == attrs->cap.max_recv_wr) rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); /* T3 doesn't support RQT depth < 16 */ if (rqsize < 16) rqsize = 16; if (rqsize > T3_MAX_RQ_SIZE) return ERR_PTR(-EINVAL); if (attrs->cap.max_inline_data > T3_MAX_INLINE) return ERR_PTR(-EINVAL); /* * NOTE: The SQ and total WQ sizes don't need to be * a power of two. However, all the code assumes * they are. EG: Q_FREECNT() and friends. */ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); wqsize = roundup_pow_of_two(rqsize + sqsize); CTR4(KTR_IW_CXGB, "%s wqsize %d sqsize %d rqsize %d", __FUNCTION__, wqsize, sqsize, rqsize); qhp = malloc(sizeof(*qhp), M_DEVBUF, M_ZERO|M_NOWAIT); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.size_log2 = ilog2(wqsize); qhp->wq.rq_size_log2 = ilog2(rqsize); qhp->wq.sq_size_log2 = ilog2(sqsize); ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { cxfree(qhp); return ERR_PTR(-ENOMEM); } attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize; attrs->cap.max_inline_data = T3_MAX_INLINE; qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.next_state = IWCH_QP_STATE_IDLE; /* * XXX - These don't get passed in from the openib user * at create time. The CM sets them via a QP modify. * Need to fix... I think the CM should */ qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK); qhp->refcnt = 1; if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); cxfree(qhp); return ERR_PTR(-ENOMEM); } if (udata) { struct iwch_mm_entry *mm1, *mm2; mm1 = kmalloc(sizeof *mm1, M_NOWAIT); if (!mm1) { iwch_destroy_qp(&qhp->ibqp); return ERR_PTR(-ENOMEM); } mm2 = kmalloc(sizeof *mm2, M_NOWAIT); if (!mm2) { cxfree(mm1); iwch_destroy_qp(&qhp->ibqp); return ERR_PTR(-ENOMEM); } uresp.qpid = qhp->wq.qpid; uresp.size_log2 = qhp->wq.size_log2; uresp.sq_size_log2 = qhp->wq.sq_size_log2; uresp.rq_size_log2 = qhp->wq.rq_size_log2; mtx_lock(&ucontext->mmap_lock); uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.db_key = ucontext->key; ucontext->key += PAGE_SIZE; mtx_unlock(&ucontext->mmap_lock); if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { cxfree(mm1); cxfree(mm2); iwch_destroy_qp(&qhp->ibqp); return ERR_PTR(-EFAULT); } mm1->key = uresp.key; mm1->addr = vtophys(qhp->wq.queue); mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr)); insert_mmap(ucontext, mm1); mm2->key = uresp.db_key; mm2->addr = qhp->wq.udb & PAGE_MASK; mm2->len = PAGE_SIZE; insert_mmap(ucontext, mm2); } qhp->ibqp.qp_num = qhp->wq.qpid; callout_init(&(qhp->timer), 1); CTR6(KTR_IW_CXGB, "sq_num_entries %d, rq_num_entries %d " "qpid 0x%0x qhp %p dma_addr 0x%llx size %d", qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, 1 << qhp->wq.size_log2); return &qhp->ibqp; } static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct iwch_dev *rhp; struct iwch_qp *qhp; enum iwch_qp_attr_mask mask = 0; struct iwch_qp_attributes attrs; CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, ibqp); /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; /* Make sure we still have something left to do */ if (!attr_mask) return 0; memset(&attrs, 0, sizeof attrs); qhp = to_iwch_qp(ibqp); rhp = qhp->rhp; attrs.next_state = iwch_convert_state(attr->qp_state); attrs.enable_rdma_read = (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) ? 1 : 0; attrs.enable_rdma_write = (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? (IWCH_QP_ATTR_ENABLE_RDMA_READ | IWCH_QP_ATTR_ENABLE_RDMA_WRITE | IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); } void iwch_qp_add_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp); mtx_lock(&to_iwch_qp(qp)->lock); to_iwch_qp(qp)->refcnt++; mtx_unlock(&to_iwch_qp(qp)->lock); } void iwch_qp_rem_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp); mtx_lock(&to_iwch_qp(qp)->lock); if (--to_iwch_qp(qp)->refcnt == 0) wakeup(to_iwch_qp(qp)); mtx_unlock(&to_iwch_qp(qp)->lock); } static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) { CTR3(KTR_IW_CXGB, "%s ib_dev %p qpn 0x%x", __FUNCTION__, dev, qpn); return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); } static int iwch_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey) { CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); *pkey = 0; return 0; } static int iwch_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { struct iwch_dev *dev; struct port_info *pi; struct adapter *sc; CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p", __FUNCTION__, ibdev, port, index, gid); dev = to_iwch_dev(ibdev); sc = dev->rdev.adap; PANIC_IF(port == 0 || port > 2); pi = &sc->port[port - 1]; memset(&(gid->raw[0]), 0, sizeof(gid->raw)); memcpy(&(gid->raw[0]), pi->hw_addr, 6); return 0; } static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct iwch_dev *dev; struct adapter *sc; CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); dev = to_iwch_dev(ibdev); sc = dev->rdev.adap; memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, sc->port[0].hw_addr, 6); props->device_cap_flags = dev->device_cap_flags; props->page_size_cap = dev->attr.mem_pgsizes_bitmask; props->vendor_id = pci_get_vendor(sc->dev); props->vendor_part_id = pci_get_device(sc->dev); props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; props->max_pd = dev->attr.max_pds; props->local_ca_ack_delay = 0; return 0; } static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; props->active_mtu = IB_MTU_2048; props->state = IB_PORT_ACTIVE; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; props->active_width = 2; props->active_speed = 2; props->max_msg_sz = -1; return 0; } int iwch_register_device(struct iwch_dev *dev) { int ret; struct adapter *sc = dev->rdev.adap; CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev); strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, sc->port[0].hw_addr, 6); dev->device_cap_flags = (IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW); dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = sc->params.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dma_device = NULL; dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; dev->ibdev.modify_port = iwch_modify_port; dev->ibdev.query_pkey = iwch_query_pkey; dev->ibdev.query_gid = iwch_query_gid; dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; dev->ibdev.mmap = iwch_mmap; dev->ibdev.alloc_pd = iwch_allocate_pd; dev->ibdev.dealloc_pd = iwch_deallocate_pd; dev->ibdev.create_ah = iwch_ah_create; dev->ibdev.destroy_ah = iwch_ah_destroy; dev->ibdev.create_qp = iwch_create_qp; dev->ibdev.modify_qp = iwch_ib_modify_qp; dev->ibdev.destroy_qp = iwch_destroy_qp; dev->ibdev.create_cq = iwch_create_cq; dev->ibdev.destroy_cq = iwch_destroy_cq; dev->ibdev.resize_cq = iwch_resize_cq; dev->ibdev.poll_cq = iwch_poll_cq; dev->ibdev.get_dma_mr = iwch_get_dma_mr; dev->ibdev.reg_phys_mr = iwch_register_phys_mem; dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem; dev->ibdev.reg_user_mr = iwch_reg_user_mr; dev->ibdev.dereg_mr = iwch_dereg_mr; dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), M_NOWAIT); if (!dev->ibdev.iwcm) return (ENOMEM); dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep; dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep; dev->ibdev.iwcm->newconn = process_newconn; dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; ret = ib_register_device(&dev->ibdev, NULL); if (ret) goto bail1; return (0); bail1: cxfree(dev->ibdev.iwcm); return (ret); } void iwch_unregister_device(struct iwch_dev *dev) { ib_unregister_device(&dev->ibdev); cxfree(dev->ibdev.iwcm); return; } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c (revision 320528) @@ -1,1106 +1,1105 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NO_SUPPORT -1 static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, u8 * flit_cnt) { int i; u32 plen; switch (wr->opcode) { case IB_WR_SEND: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE; else wqe->send.rdmaop = T3_SEND; wqe->send.rem_stag = 0; break; case IB_WR_SEND_WITH_IMM: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE_INV; else wqe->send.rdmaop = T3_SEND_WITH_INV; wqe->send.rem_stag = 0; break; default: return -EINVAL; } if (wr->num_sge > T3_MAX_SGE) return (-EINVAL); wqe->send.reserved[0] = 0; wqe->send.reserved[1] = 0; wqe->send.reserved[2] = 0; plen = 0; for (i = 0; i < wr->num_sge; i++) { if ((plen + wr->sg_list[i].length) < plen) { return (-EMSGSIZE); } plen += wr->sg_list[i].length; wqe->send.sgl[i].stag = htobe32(wr->sg_list[i].lkey); wqe->send.sgl[i].len = htobe32(wr->sg_list[i].length); wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr); } wqe->send.num_sgle = htobe32(wr->num_sge); *flit_cnt = 4 + ((wr->num_sge) << 1); wqe->send.plen = htobe32(plen); return 0; } static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { int i; u32 plen; if (wr->num_sge > T3_MAX_SGE) return (-EINVAL); wqe->write.rdmaop = T3_RDMA_WRITE; wqe->write.reserved[0] = 0; wqe->write.reserved[1] = 0; wqe->write.reserved[2] = 0; wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey); wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr); if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; wqe->write.sgl[0].stag = wr->ex.imm_data; wqe->write.sgl[0].len = 0; wqe->write.num_sgle = 0; *flit_cnt = 6; } else { plen = 0; for (i = 0; i < wr->num_sge; i++) { if ((plen + wr->sg_list[i].length) < plen) { return (-EMSGSIZE); } plen += wr->sg_list[i].length; wqe->write.sgl[i].stag = htobe32(wr->sg_list[i].lkey); wqe->write.sgl[i].len = htobe32(wr->sg_list[i].length); wqe->write.sgl[i].to = htobe64(wr->sg_list[i].addr); } wqe->write.num_sgle = htobe32(wr->num_sge); *flit_cnt = 5 + ((wr->num_sge) << 1); } wqe->write.plen = htobe32(plen); return 0; } static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { if (wr->num_sge > 1) return (-EINVAL); wqe->read.rdmaop = T3_READ_REQ; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; wqe->read.reserved[2] = 0; wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey); wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr); wqe->read.local_stag = htobe32(wr->sg_list[0].lkey); wqe->read.local_len = htobe32(wr->sg_list[0].length); wqe->read.local_to = htobe64(wr->sg_list[0].addr); *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; return 0; } static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, u32 num_sgle, u32 * pbl_addr, u8 * page_size) { int i; struct iwch_mr *mhp; u64 offset; for (i = 0; i < num_sgle; i++) { mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); if (!mhp) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EIO); } if (!mhp->attr.state) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EIO); } if (mhp->attr.zbva) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EIO); } if (sg_list[i].addr < mhp->attr.va_fbo) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EINVAL); } if (sg_list[i].addr + ((u64) sg_list[i].length) < sg_list[i].addr) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EINVAL); } if (sg_list[i].addr + ((u64) sg_list[i].length) > mhp->attr.va_fbo + ((u64) mhp->attr.len)) { CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); return (-EINVAL); } offset = sg_list[i].addr - mhp->attr.va_fbo; offset += mhp->attr.va_fbo & ((1UL << (12 + mhp->attr.page_size)) - 1); pbl_addr[i] = ((mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3) + (offset >> (12 + mhp->attr.page_size)); page_size[i] = mhp->attr.page_size; } return 0; } static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { int i, err = 0; u32 pbl_addr[T3_MAX_SGE]; u8 page_size[T3_MAX_SGE]; if (wr->num_sge > T3_MAX_SGE) return (-EINVAL); err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, page_size); if (err) return err; wqe->recv.pagesz[0] = page_size[0]; wqe->recv.pagesz[1] = page_size[1]; wqe->recv.pagesz[2] = page_size[2]; wqe->recv.pagesz[3] = page_size[3]; wqe->recv.num_sgle = htobe32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey); wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length); wqe->recv.sgl[i].to = htobe64(((u32)wr->sg_list[i].addr) & ((1UL << (12 + page_size[i])) - 1)); /* pbl_addr is the adapters address in the PBL */ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); } for (; i < T3_MAX_SGE; i++) { wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; wqe->recv.pbl_addr[i] = 0; } qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)].wr_id = wr->wr_id; qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)].pbl_addr = 0; return 0; } static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { int i; u32 pbl_addr; u32 pbl_offset; /* * The T3 HW requires the PBL in the HW recv descriptor to reference * a PBL entry. So we allocate the max needed PBL memory here and pass * it to the uP in the recv WR. The uP will build the PBL and setup * the HW recv descriptor. */ pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); if (!pbl_addr) return -ENOMEM; /* * Compute the 8B aligned offset. */ pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); for (i = 0; i < wr->num_sge; i++) { /* * Use a 128MB page size. This and an imposed 128MB * sge length limit allows us to require only a 2-entry HW * PBL for each SGE. This restriction is acceptable since * since it is not possible to allocate 128MB of contiguous * DMA coherent memory! */ if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) return -EINVAL; wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; /* * T3 restricts a recv to all zero-stag or all non-zero-stag. */ if (wr->sg_list[i].lkey != 0) return -EINVAL; wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length); wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr); wqe->recv.pbl_addr[i] = htobe32(pbl_offset); pbl_offset += 2; } for (; i < T3_MAX_SGE; i++) { wqe->recv.pagesz[i] = 0; wqe->recv.sgl[i].stag = 0; wqe->recv.sgl[i].len = 0; wqe->recv.sgl[i].to = 0; wqe->recv.pbl_addr[i] = 0; } qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)].wr_id = wr->wr_id; qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; return 0; } int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; u8 t3_wr_flit_cnt = 0; enum t3_wr_opcode t3_wr_opcode = 0; enum t3_wr_flags t3_wr_flags; struct iwch_qp *qhp; u32 idx; union t3_wr *wqe; u32 num_wrs; struct t3_swsq *sqp; qhp = to_iwch_qp(ibqp); mtx_lock(&qhp->lock); if (qhp->attr.state > IWCH_QP_STATE_RTS) { mtx_unlock(&qhp->lock); err = -EINVAL; goto out; } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); if (num_wrs == 0) { mtx_unlock(&qhp->lock); err = -EINVAL; goto out; } while (wr) { if (num_wrs == 0) { err = -ENOMEM; break; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); t3_wr_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; if (wr->send_flags & IB_SEND_FENCE) t3_wr_flags |= T3_READ_FENCE_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) t3_wr_flags |= T3_COMPLETION_FLAG; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); switch (wr->opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: t3_wr_opcode = T3_WR_SEND; err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: t3_wr_opcode = T3_WR_WRITE; err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); if (err) break; sqp->read_len = wqe->read.local_len; if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; default: CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__, wr->opcode); err = -EINVAL; } if (err) break; wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp->wr_id = wr->wr_id; sqp->opcode = wr2opcode(t3_wr_opcode); sqp->sq_wptr = qhp->wq.sq_wptr; sqp->complete = 0; sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, t3_wr_flit_cnt); CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d", __FUNCTION__, (unsigned long long) wr->wr_id, idx, Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), sqp->opcode); wr = wr->next; num_wrs--; ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); } mtx_unlock(&qhp->lock); ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) *bad_wr = wr; return err; } int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { int err = 0; struct iwch_qp *qhp; u32 idx; union t3_wr *wqe; u32 num_wrs; qhp = to_iwch_qp(ibqp); mtx_lock(&qhp->lock); if (qhp->attr.state > IWCH_QP_STATE_RTS) { mtx_unlock(&qhp->lock); err = -EINVAL; goto out; } num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, qhp->wq.rq_size_log2) - 1; if (!wr) { mtx_unlock(&qhp->lock); err = -EINVAL; goto out; } while (wr) { if (wr->num_sge > T3_MAX_SGE) { err = -EINVAL; break; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); if (num_wrs) { if (wr->sg_list[0].lkey) err = build_rdma_recv(qhp, wqe, wr); else err = build_zero_stag_recv(qhp, wqe, wr); } else err = -ENOMEM; if (err) break; build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, sizeof(struct t3_receive_wr) >> 3); CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id, idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); ++(qhp->wq.rq_wptr); ++(qhp->wq.wptr); wr = wr->next; num_wrs--; } mtx_unlock(&qhp->lock); ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); out: if (err) *bad_wr = wr; return err; } int iwch_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { struct iwch_dev *rhp; struct iwch_mw *mhp; struct iwch_qp *qhp; union t3_wr *wqe; u32 pbl_addr; u8 page_size; u32 num_wrs; struct ib_sge sgl; int err=0; enum t3_wr_flags t3_wr_flags; u32 idx; struct t3_swsq *sqp; qhp = to_iwch_qp(qp); mhp = to_iwch_mw(mw); rhp = qhp->rhp; mtx_lock(&qhp->lock); if (qhp->attr.state > IWCH_QP_STATE_RTS) { mtx_unlock(&qhp->lock); return (-EINVAL); } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); if ((num_wrs) == 0) { mtx_unlock(&qhp->lock); return (-ENOMEM); } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx, mw, mw_bind); wqe = (union t3_wr *) (qhp->wq.queue + idx); t3_wr_flags = 0; if (mw_bind->send_flags & IB_SEND_SIGNALED) t3_wr_flags = T3_COMPLETION_FLAG; sgl.addr = mw_bind->bind_info.addr; sgl.lkey = mw_bind->bind_info.mr->lkey; sgl.length = mw_bind->bind_info.length; wqe->bind.reserved = 0; wqe->bind.type = T3_VA_BASED_TO; /* TBD: check perms */ wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags); wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey); wqe->bind.mw_stag = htobe32(mw->rkey); wqe->bind.mw_len = htobe32(mw_bind->bind_info.length); wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr); err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { mtx_unlock(&qhp->lock); return (err); } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); sqp->wr_id = mw_bind->wr_id; sqp->opcode = T3_BIND_MW; sqp->sq_wptr = qhp->wq.sq_wptr; sqp->complete = 0; sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); wqe->bind.mr_pbl_addr = htobe32(pbl_addr); wqe->bind.mr_pagesz = page_size; wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, sizeof(struct t3_bind_mw_wr) >> 3); ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); mtx_unlock(&qhp->lock); ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); return err; } static void build_term_codes(struct respQ_msg_t *rsp_msg, u8 *layer_type, u8 *ecode) { int status = TPT_ERR_INTERNAL_ERR; int tagged = 0; int opcode = -1; int rqtype = 0; int send_inv = 0; if (rsp_msg) { status = CQE_STATUS(rsp_msg->cqe); opcode = CQE_OPCODE(rsp_msg->cqe); rqtype = RQ_TYPE(rsp_msg->cqe); send_inv = (opcode == T3_SEND_WITH_INV) || (opcode == T3_SEND_WITH_SE_INV); tagged = (opcode == T3_RDMA_WRITE) || (rqtype && (opcode == T3_READ_RESP)); } switch (status) { case TPT_ERR_STAG: if (send_inv) { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_INV_STAG; } break; case TPT_ERR_PDID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; if ((opcode == T3_SEND_WITH_INV) || (opcode == T3_SEND_WITH_SE_INV)) *ecode = RDMAP_CANT_INV_STAG; else *ecode = RDMAP_STAG_NOT_ASSOC; break; case TPT_ERR_QPID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_STAG_NOT_ASSOC; break; case TPT_ERR_ACCESS: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_ACC_VIOL; break; case TPT_ERR_WRAP: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_TO_WRAP; break; case TPT_ERR_BOUND: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_BASE_BOUNDS; } break; case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; break; case TPT_ERR_ECC: case TPT_ERR_ECC_PSTAG: case TPT_ERR_INTERNAL_ERR: *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; *ecode = 0; break; case TPT_ERR_OUT_OF_RQE: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_NOBUF; break; case TPT_ERR_PBL_ADDR_BOUND: *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; break; case TPT_ERR_CRC: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_CRC_ERR; break; case TPT_ERR_MARKER: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_MARKER_ERR; break; case TPT_ERR_PDU_LEN_ERR: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_MSG_TOOBIG; break; case TPT_ERR_DDP_VERSION: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_INV_VERS; } else { *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_VERS; } break; case TPT_ERR_RDMA_VERSION: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_VERS; break; case TPT_ERR_OPCODE: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_OPCODE; break; case TPT_ERR_DDP_QUEUE_NUM: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_QN; break; case TPT_ERR_MSN: case TPT_ERR_MSN_GAP: case TPT_ERR_MSN_RANGE: case TPT_ERR_IRD_OVERFLOW: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_RANGE; break; case TPT_ERR_TBIT: *layer_type = LAYER_DDP|DDP_LOCAL_CATA; *ecode = 0; break; case TPT_ERR_MO: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MO; break; default: *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; *ecode = 0; break; } } /* * This posts a TERMINATE with layer=RDMA, type=catastrophic. */ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) { union t3_wr *wqe; struct terminate_message *term; struct mbuf *m; struct ofld_hdr *oh; CTR3(KTR_IW_CXGB, "%s: tid %u, %p", __func__, qhp->ep->hwtid, rsp_msg); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__); return (-ENOMEM); } oh = mtod(m, struct ofld_hdr *); m->m_pkthdr.len = m->m_len = sizeof(*oh) + 40; oh->flags = V_HDR_NDESC(1) | V_HDR_CTRL(CPL_PRIORITY_DATA) | V_HDR_QSET(0); wqe = (void *)(oh + 1); memset(wqe, 0, 40); wqe->send.rdmaop = T3_TERMINATE; /* immediate data length */ wqe->send.plen = htonl(4); /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) | V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid)); return t3_offload_tx(qhp->rhp->rdev.adap, m); } /* * Assumes qhp lock is held. */ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, struct iwch_cq *schp) { int count; int flushed; CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ qhp->refcnt++; mtx_unlock(&qhp->lock); /* locking hierarchy: cq lock first, then qp lock. */ mtx_lock(&rchp->lock); mtx_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); mtx_unlock(&qhp->lock); mtx_unlock(&rchp->lock); if (flushed) (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); /* locking hierarchy: cq lock first, then qp lock. */ mtx_lock(&schp->lock); mtx_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); mtx_unlock(&qhp->lock); mtx_unlock(&schp->lock); if (flushed) (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); /* deref */ mtx_lock(&qhp->lock); if (--qhp->refcnt == 0) wakeup(qhp); } static void flush_qp(struct iwch_qp *qhp) { struct iwch_cq *rchp, *schp; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); cxio_set_cq_in_error(&rchp->cq); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); if (schp != rchp) { cxio_set_cq_in_error(&schp->cq); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); } return; } __flush_qp(qhp, rchp, schp); } /* * Return non zero if at least one RECV was pre-posted. */ static int rqes_posted(struct iwch_qp *qhp) { union t3_wr *wqe = qhp->wq.queue; u16 count = 0; while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { count++; wqe++; } return count; } static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, enum iwch_qp_attr_mask mask, struct iwch_qp_attributes *attrs) { struct t3_rdma_init_attr init_attr; int ret; struct socket *so = qhp->ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp; struct toepcb *toep; init_attr.tid = qhp->ep->hwtid; init_attr.qpid = qhp->wq.qpid; init_attr.pdid = qhp->attr.pd; init_attr.scqid = qhp->attr.scq; init_attr.rcqid = qhp->attr.rcq; init_attr.rq_addr = qhp->wq.rq_addr; init_attr.rq_size = 1 << qhp->wq.rq_size_log2; init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | qhp->attr.mpa_attr.recv_marker_enabled | (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | (qhp->attr.mpa_attr.crc_enabled << 2); init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | uP_RI_QP_RDMA_WRITE_ENABLE | uP_RI_QP_BIND_ENABLE; if (!qhp->ibqp.uobject) init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE; init_attr.tcp_emss = qhp->ep->emss; init_attr.ord = qhp->attr.max_ord; init_attr.ird = qhp->attr.max_ird; init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; init_attr.rtr_type = 0; tp = intotcpcb(inp); toep = tp->t_toe; init_attr.chan = toep->tp_l2t->smt_idx; init_attr.irs = qhp->ep->rcv_seq; CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " "flags 0x%x qpcaps 0x%x", __FUNCTION__, init_attr.rq_addr, init_attr.rq_size, init_attr.flags, init_attr.qpcaps); ret = cxio_rdma_init(&rhp->rdev, &init_attr, qhp->ep->com.so); CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret); return ret; } int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, enum iwch_qp_attr_mask mask, struct iwch_qp_attributes *attrs, int internal) { int ret = 0; struct iwch_qp_attributes newattr = qhp->attr; int disconnect = 0; int terminate = 0; int abort = 0; int free = 0; struct iwch_ep *ep = NULL; CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__, qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); mtx_lock(&qhp->lock); /* Process attr changes if in IDLE */ if (mask & IWCH_QP_ATTR_VALID_MODIFY) { if (qhp->attr.state != IWCH_QP_STATE_IDLE) { ret = -EIO; goto out; } if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) newattr.enable_rdma_read = attrs->enable_rdma_read; if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) newattr.enable_rdma_write = attrs->enable_rdma_write; if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) newattr.enable_bind = attrs->enable_bind; if (mask & IWCH_QP_ATTR_MAX_ORD) { if (attrs->max_ord > rhp->attr.max_rdma_read_qp_depth) { ret = -EINVAL; goto out; } newattr.max_ord = attrs->max_ord; } if (mask & IWCH_QP_ATTR_MAX_IRD) { if (attrs->max_ird > rhp->attr.max_rdma_reads_per_qp) { ret = -EINVAL; goto out; } newattr.max_ird = attrs->max_ird; } qhp->attr = newattr; } if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) goto out; if (qhp->attr.state == attrs->next_state) goto out; switch (qhp->attr.state) { case IWCH_QP_STATE_IDLE: switch (attrs->next_state) { case IWCH_QP_STATE_RTS: if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { ret = -EINVAL; goto out; } if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { ret = -EINVAL; goto out; } qhp->attr.mpa_attr = attrs->mpa_attr; qhp->attr.llp_stream_handle = attrs->llp_stream_handle; qhp->ep = qhp->attr.llp_stream_handle; qhp->attr.state = IWCH_QP_STATE_RTS; /* * Ref the endpoint here and deref when we * disassociate the endpoint from the QP. This * happens in CLOSING->IDLE transition or *->ERROR * transition. */ get_ep(&qhp->ep->com); mtx_unlock(&qhp->lock); ret = rdma_init(rhp, qhp, mask, attrs); mtx_lock(&qhp->lock); if (ret) goto err; break; case IWCH_QP_STATE_ERROR: qhp->attr.state = IWCH_QP_STATE_ERROR; flush_qp(qhp); break; default: ret = -EINVAL; goto out; } break; case IWCH_QP_STATE_RTS: switch (attrs->next_state) { case IWCH_QP_STATE_CLOSING: PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2); qhp->attr.state = IWCH_QP_STATE_CLOSING; if (!internal) { abort=0; disconnect = 1; ep = qhp->ep; get_ep(&ep->com); } break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); if (!internal) terminate = 1; break; case IWCH_QP_STATE_ERROR: qhp->attr.state = IWCH_QP_STATE_ERROR; if (!internal) { abort=1; disconnect = 1; ep = qhp->ep; get_ep(&ep->com); } goto err; break; default: ret = -EINVAL; goto out; } break; case IWCH_QP_STATE_CLOSING: if (!internal) { ret = -EINVAL; goto out; } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: flush_qp(qhp); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); qhp->ep = NULL; wakeup(qhp); break; case IWCH_QP_STATE_ERROR: goto err; default: ret = -EINVAL; goto err; } break; case IWCH_QP_STATE_ERROR: if (attrs->next_state != IWCH_QP_STATE_IDLE) { ret = -EINVAL; goto out; } if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { ret = -EINVAL; goto out; } qhp->attr.state = IWCH_QP_STATE_IDLE; memset(&qhp->attr, 0, sizeof(qhp->attr)); break; case IWCH_QP_STATE_TERMINATE: if (!internal) { ret = -EINVAL; goto out; } goto err; break; default: log(LOG_ERR, "%s in a bad state %d\n", __FUNCTION__, qhp->attr.state); ret = -EINVAL; goto err; break; } goto out; err: CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep, qhp->wq.qpid); /* disassociate the LLP connection */ qhp->attr.llp_stream_handle = NULL; ep = qhp->ep; qhp->ep = NULL; qhp->attr.state = IWCH_QP_STATE_ERROR; free=1; wakeup(qhp); PANIC_IF(!ep); flush_qp(qhp); out: mtx_unlock(&qhp->lock); if (terminate) iwch_post_terminate(qhp, NULL); /* * If disconnect is 1, then we need to initiate a disconnect * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ if (disconnect) { iwch_ep_disconnect(ep, abort, M_NOWAIT); put_ep(&ep->com); } /* * If free is 1, then we've disassociated the EP from the QP * and we need to dereference the EP. */ if (free) put_ep(&ep->com); CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state); return ret; } #endif Index: head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c =================================================================== --- head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c (revision 320527) +++ head/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c (revision 320528) @@ -1,376 +1,375 @@ /************************************************************************** Copyright (c) 2007, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***************************************************************************/ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef needed static struct buf_ring *rhdl_fifo; static struct mtx rhdl_fifo_lock; #endif #define RANDOM_SIZE 16 static int __cxio_init_resource_fifo(struct buf_ring **fifo, struct mtx *fifo_lock, u32 nr, u32 skip_low, u32 skip_high, int randomize) { u32 i, j, idx; u32 random_bytes; u32 rarray[16]; mtx_init(fifo_lock, "cxio fifo", NULL, MTX_DEF|MTX_DUPOK); *fifo = buf_ring_alloc(nr, M_DEVBUF, M_NOWAIT, fifo_lock); if (*fifo == NULL) return (-ENOMEM); #if 0 for (i = 0; i < skip_low + skip_high; i++) { u32 entry = 0; buf_ring_enqueue(*fifo, (uintptr_t) entry); } #endif if (randomize) { j = 0; random_bytes = random(); for (i = 0; i < RANDOM_SIZE; i++) rarray[i] = i + skip_low; for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { if (j >= RANDOM_SIZE) { j = 0; random_bytes = random(); } idx = (random_bytes >> (j * 2)) & 0xF; buf_ring_enqueue(*fifo, (void *)(uintptr_t)rarray[idx]); rarray[idx] = i; j++; } for (i = 0; i < RANDOM_SIZE; i++) buf_ring_enqueue(*fifo, (void *) (uintptr_t)rarray[i]); } else for (i = skip_low; i < nr - skip_high; i++) buf_ring_enqueue(*fifo, (void *) (uintptr_t)i); #if 0 for (i = 0; i < skip_low + skip_high; i++) buf_ring_dequeue_sc(*fifo); #endif return 0; } static int cxio_init_resource_fifo(struct buf_ring **fifo, struct mtx * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, skip_high, 0)); } static int cxio_init_resource_fifo_random(struct buf_ring **fifo, struct mtx * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, skip_high, 1)); } static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) { u32 i; mtx_init(&rdev_p->rscp->qpid_fifo_lock, "qpid fifo", NULL, MTX_DEF); rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_DEVBUF, M_NOWAIT, &rdev_p->rscp->qpid_fifo_lock); if (rdev_p->rscp->qpid_fifo == NULL) return (-ENOMEM); for (i = 16; i < T3_MAX_NUM_QP; i++) if (!(i & rdev_p->qpmask)) buf_ring_enqueue(rdev_p->rscp->qpid_fifo, (void *) (uintptr_t)i); return 0; } #ifdef needed int cxio_hal_init_rhdl_resource(u32 nr_rhdl) { return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, 0); } void cxio_hal_destroy_rhdl_resource(void) { buf_ring_free(rhdl_fifo, M_DEVBUF); } #endif /* nr_* must be power of 2 */ int cxio_hal_init_resource(struct cxio_rdev *rdev_p, u32 nr_tpt, u32 nr_pbl, u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) { int err = 0; struct cxio_hal_resource *rscp; rscp = malloc(sizeof(*rscp), M_DEVBUF, M_NOWAIT|M_ZERO); if (!rscp) return (-ENOMEM); rdev_p->rscp = rscp; err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, nr_tpt, 1, 0); if (err) goto tpt_err; err = cxio_init_qpid_fifo(rdev_p); if (err) goto qpid_err; err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, nr_cqid, 1, 0); if (err) goto cqid_err; err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, nr_pdid, 1, 0); if (err) goto pdid_err; return 0; pdid_err: buf_ring_free(rscp->cqid_fifo, M_DEVBUF); cqid_err: buf_ring_free(rscp->qpid_fifo, M_DEVBUF); qpid_err: buf_ring_free(rscp->tpt_fifo, M_DEVBUF); tpt_err: return (-ENOMEM); } /* * returns 0 if no resource available */ static u32 cxio_hal_get_resource(struct buf_ring *fifo, struct mtx *lock) { u32 entry; mtx_lock(lock); entry = (u32)(uintptr_t)buf_ring_dequeue_sc(fifo); mtx_unlock(lock); return entry; } static void cxio_hal_put_resource(struct buf_ring *fifo, u32 entry, struct mtx *lock) { mtx_lock(lock); buf_ring_enqueue(fifo, (void *) (uintptr_t)entry); mtx_unlock(lock); } u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) { return cxio_hal_get_resource(rscp->tpt_fifo, &rscp->tpt_fifo_lock); } void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) { cxio_hal_put_resource(rscp->tpt_fifo, stag, &rscp->tpt_fifo_lock); } u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) { u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo, &rscp->qpid_fifo_lock); CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); return qpid; } void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) { CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); cxio_hal_put_resource(rscp->qpid_fifo, qpid, &rscp->qpid_fifo_lock); } u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) { return cxio_hal_get_resource(rscp->cqid_fifo, &rscp->cqid_fifo_lock); } void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) { cxio_hal_put_resource(rscp->cqid_fifo, cqid, &rscp->cqid_fifo_lock); } u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) { return cxio_hal_get_resource(rscp->pdid_fifo, &rscp->pdid_fifo_lock); } void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) { cxio_hal_put_resource(rscp->pdid_fifo, pdid, &rscp->pdid_fifo_lock); } void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) { buf_ring_free(rscp->tpt_fifo, M_DEVBUF); buf_ring_free(rscp->cqid_fifo, M_DEVBUF); buf_ring_free(rscp->qpid_fifo, M_DEVBUF); buf_ring_free(rscp->pdid_fifo, M_DEVBUF); free(rscp, M_DEVBUF); } /* * PBL Memory Manager. Uses Linux generic allocator. */ #define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ #define PBL_CHUNK 2*1024*1024 u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) { unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size); return (u32)addr; } void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) { CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size); gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); } int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) { rdev_p->pbl_pool = gen_pool_create(rdev_p->rnic_info.pbl_base, MIN_PBL_SHIFT, rdev_p->rnic_info.pbl_top - rdev_p->rnic_info.pbl_base); #if 0 if (rdev_p->pbl_pool) { unsigned long i; for (i = rdev_p->rnic_info.pbl_base; i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; i += PBL_CHUNK) gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); } #endif return rdev_p->pbl_pool ? 0 : (-ENOMEM); } void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) { gen_pool_destroy(rdev_p->pbl_pool); } /* * RQT Memory Manager. Uses Linux generic allocator. */ #define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ #define RQT_CHUNK 2*1024*1024 u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) { unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size << 6); return (u32)addr; } void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) { CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size << 6); gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); } int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) { rdev_p->rqt_pool = gen_pool_create(rdev_p->rnic_info.rqt_base, MIN_RQT_SHIFT, rdev_p->rnic_info.rqt_top - rdev_p->rnic_info.rqt_base); #if 0 if (rdev_p->rqt_pool) { unsigned long i; for (i = rdev_p->rnic_info.rqt_base; i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; i += RQT_CHUNK) gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); } #endif return rdev_p->rqt_pool ? 0 : (-ENOMEM); } void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) { gen_pool_destroy(rdev_p->rqt_pool); } #endif Index: head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 320527) +++ head/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 320528) @@ -1,2385 +1,2384 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * StorVSC driver for Hyper-V. This driver presents a SCSI HBA interface * to the Comman Access Method (CAM) layer. CAM control blocks (CCBs) are * converted into VSCSI protocol messages which are delivered to the parent * partition StorVSP driver over the Hyper-V VMBUS. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vstorage.h" #include "vmbus_if.h" #define STORVSC_MAX_LUNS_PER_TARGET (64) #define STORVSC_MAX_IO_REQUESTS (STORVSC_MAX_LUNS_PER_TARGET * 2) #define BLKVSC_MAX_IDE_DISKS_PER_TARGET (1) #define BLKVSC_MAX_IO_REQUESTS STORVSC_MAX_IO_REQUESTS #define STORVSC_MAX_TARGETS (2) #define VSTOR_PKT_SIZE (sizeof(struct vstor_packet) - vmscsi_size_delta) /* * 33 segments are needed to allow 128KB maxio, in case the data * in the first page is _not_ PAGE_SIZE aligned, e.g. * * |<----------- 128KB ----------->| * | | * 0 2K 4K 8K 16K 124K 128K 130K * | | | | | | | | * +--+--+-----+-----+.......+-----+--+--+ * | | | | | | | | | DATA * | | | | | | | | | * +--+--+-----+-----+.......------+--+--+ * | | | | * | 1| 31 | 1| ...... # of segments */ #define STORVSC_DATA_SEGCNT_MAX 33 #define STORVSC_DATA_SEGSZ_MAX PAGE_SIZE #define STORVSC_DATA_SIZE_MAX \ ((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX) struct storvsc_softc; struct hv_sgl_node { LIST_ENTRY(hv_sgl_node) link; struct sglist *sgl_data; }; struct hv_sgl_page_pool{ LIST_HEAD(, hv_sgl_node) in_use_sgl_list; LIST_HEAD(, hv_sgl_node) free_sgl_list; boolean_t is_init; } g_hv_sgl_page_pool; enum storvsc_request_type { WRITE_TYPE, READ_TYPE, UNKNOWN_TYPE }; SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V storage interface"); static u_int hv_storvsc_use_win8ext_flags = 1; SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_win8ext_flags, CTLFLAG_RW, &hv_storvsc_use_win8ext_flags, 0, "Use win8 extension flags or not"); static u_int hv_storvsc_use_pim_unmapped = 1; SYSCTL_UINT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN, &hv_storvsc_use_pim_unmapped, 0, "Optimize storvsc by using unmapped I/O"); static u_int hv_storvsc_ringbuffer_size = (64 * PAGE_SIZE); SYSCTL_UINT(_hw_storvsc, OID_AUTO, ringbuffer_size, CTLFLAG_RDTUN, &hv_storvsc_ringbuffer_size, 0, "Hyper-V storage ringbuffer size"); static u_int hv_storvsc_max_io = 512; SYSCTL_UINT(_hw_storvsc, OID_AUTO, max_io, CTLFLAG_RDTUN, &hv_storvsc_max_io, 0, "Hyper-V storage max io limit"); static int hv_storvsc_chan_cnt = 0; SYSCTL_INT(_hw_storvsc, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hv_storvsc_chan_cnt, 0, "# of channels to use"); #define STORVSC_MAX_IO \ vmbus_chan_prplist_nelem(hv_storvsc_ringbuffer_size, \ STORVSC_DATA_SEGCNT_MAX, VSTOR_PKT_SIZE) struct hv_storvsc_sysctl { u_long data_bio_cnt; u_long data_vaddr_cnt; u_long data_sg_cnt; u_long chan_send_cnt[MAXCPU]; }; struct storvsc_gpa_range { struct vmbus_gpa_range gpa_range; uint64_t gpa_page[STORVSC_DATA_SEGCNT_MAX]; } __packed; struct hv_storvsc_request { LIST_ENTRY(hv_storvsc_request) link; struct vstor_packet vstor_packet; int prp_cnt; struct storvsc_gpa_range prp_list; void *sense_data; uint8_t sense_info_len; uint8_t retries; union ccb *ccb; struct storvsc_softc *softc; struct callout callout; struct sema synch_sema; /*Synchronize the request/response if needed */ struct sglist *bounce_sgl; unsigned int bounce_sgl_count; uint64_t not_aligned_seg_bits; bus_dmamap_t data_dmap; }; struct storvsc_softc { struct vmbus_channel *hs_chan; LIST_HEAD(, hv_storvsc_request) hs_free_list; struct mtx hs_lock; struct storvsc_driver_props *hs_drv_props; int hs_unit; uint32_t hs_frozen; struct cam_sim *hs_sim; struct cam_path *hs_path; uint32_t hs_num_out_reqs; boolean_t hs_destroy; boolean_t hs_drain_notify; struct sema hs_drain_sema; struct hv_storvsc_request hs_init_req; struct hv_storvsc_request hs_reset_req; device_t hs_dev; bus_dma_tag_t storvsc_req_dtag; struct hv_storvsc_sysctl sysctl_data; uint32_t hs_nchan; struct vmbus_channel *hs_sel_chan[MAXCPU]; }; static eventhandler_tag storvsc_handler_tag; /* * The size of the vmscsi_request has changed in win8. The * additional size is for the newly added elements in the * structure. These elements are valid only when we are talking * to a win8 host. * Track the correct size we need to apply. */ static int vmscsi_size_delta = sizeof(struct vmscsi_win8_extension); /** * HyperV storvsc timeout testing cases: * a. IO returned after first timeout; * b. IO returned after second timeout and queue freeze; * c. IO returned while timer handler is running * The first can be tested by "sg_senddiag -vv /dev/daX", * and the second and third can be done by * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX". */ #define HVS_TIMEOUT_TEST 0 /* * Bus/adapter reset functionality on the Hyper-V host is * buggy and it will be disabled until * it can be further tested. */ #define HVS_HOST_RESET 0 struct storvsc_driver_props { char *drv_name; char *drv_desc; uint8_t drv_max_luns_per_target; uint32_t drv_max_ios_per_target; uint32_t drv_ringbuffer_size; }; enum hv_storage_type { DRIVER_BLKVSC, DRIVER_STORVSC, DRIVER_UNKNOWN }; #define HS_MAX_ADAPTERS 10 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */ static const struct hyperv_guid gStorVscDeviceType={ .hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d, 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f} }; /* {32412632-86cb-44a2-9b5c-50d1417354f5} */ static const struct hyperv_guid gBlkVscDeviceType={ .hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44, 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5} }; static struct storvsc_driver_props g_drv_props_table[] = { {"blkvsc", "Hyper-V IDE", BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS, 20*PAGE_SIZE}, {"storvsc", "Hyper-V SCSI", STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS, 20*PAGE_SIZE} }; /* * Sense buffer size changed in win8; have a run-time * variable to track the size we should use. */ static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE; /* * The storage protocol version is determined during the * initial exchange with the host. It will indicate which * storage functionality is available in the host. */ static int vmstor_proto_version; struct vmstor_proto { int proto_version; int sense_buffer_size; int vmscsi_size_delta; }; static const struct vmstor_proto vmstor_proto_list[] = { { VMSTOR_PROTOCOL_VERSION_WIN10, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8_1, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN8, POST_WIN7_STORVSC_SENSE_BUFFER_SIZE, 0 }, { VMSTOR_PROTOCOL_VERSION_WIN7, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), }, { VMSTOR_PROTOCOL_VERSION_WIN6, PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE, sizeof(struct vmscsi_win8_extension), } }; /* static functions */ static int storvsc_probe(device_t dev); static int storvsc_attach(device_t dev); static int storvsc_detach(device_t dev); static void storvsc_poll(struct cam_sim * sim); static void storvsc_action(struct cam_sim * sim, union ccb * ccb); static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp); static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp); static enum hv_storage_type storvsc_get_storage_type(device_t dev); static void hv_storvsc_rescan_target(struct storvsc_softc *sc); static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc); static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request); static int hv_storvsc_connect_vsp(struct storvsc_softc *); static void storvsc_io_done(struct hv_storvsc_request *reqp); static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits); void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits); static device_method_t storvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, storvsc_probe), DEVMETHOD(device_attach, storvsc_attach), DEVMETHOD(device_detach, storvsc_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD_END }; static driver_t storvsc_driver = { "storvsc", storvsc_methods, sizeof(struct storvsc_softc), }; static devclass_t storvsc_devclass; DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0); MODULE_VERSION(storvsc, 1); MODULE_DEPEND(storvsc, vmbus, 1, 1, 1); static void storvsc_subchan_attach(struct storvsc_softc *sc, struct vmbus_channel *new_channel) { struct vmstor_chan_props props; int ret = 0; memset(&props, 0, sizeof(props)); vmbus_chan_cpu_rr(new_channel); ret = vmbus_chan_open(new_channel, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); } /** * @brief Send multi-channel creation request to host * * @param device a Hyper-V device pointer * @param max_chans the max channels supported by vmbus */ static void storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_subch) { struct vmbus_channel **subchan; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; int request_subch; int ret, i; /* get sub-channel count that need to create */ request_subch = MIN(max_subch, mp_ncpus - 1); request = &sc->hs_init_req; /* request the host to create multi-channel */ memset(request, 0, sizeof(struct hv_storvsc_request)); sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet = &request->vstor_packet; vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.multi_channels_cnt = request_subch; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { printf("Storvsc_error: create multi-channel invalid operation " "(%d) or statue (%u)\n", vstor_packet->operation, vstor_packet->status); return; } /* Update channel count */ sc->hs_nchan = request_subch + 1; /* Wait for sub-channels setup to complete. */ subchan = vmbus_subchan_get(sc->hs_chan, request_subch); /* Attach the sub-channels. */ for (i = 0; i < request_subch; ++i) storvsc_subchan_attach(sc, subchan[i]); /* Release the sub-channels. */ vmbus_subchan_rel(subchan, request_subch); if (bootverbose) printf("Storvsc create multi-channel success!\n"); } /** * @brief initialize channel connection to parent partition * * @param dev a Hyper-V device pointer * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_channel_init(struct storvsc_softc *sc) { int ret = 0, i; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; uint16_t max_subch; boolean_t support_multichannel; uint32_t version; max_subch = 0; support_multichannel = FALSE; request = &sc->hs_init_req; memset(request, 0, sizeof(struct hv_storvsc_request)); vstor_packet = &request->vstor_packet; request->softc = sc; /** * Initiate the vsc/vsp initialization protocol on the open channel */ sema_init(&request->synch_sema, 0, ("stor_synch_sema")); vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } for (i = 0; i < nitems(vmstor_proto_list); i++) { /* reuse the packet for version range supported */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; vstor_packet->u.version.major_minor = vmstor_proto_list[i].proto_version; /* revision is only significant for Windows guests */ vstor_packet->u.version.revision = 0; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) goto cleanup; sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) { ret = EINVAL; goto cleanup; } if (vstor_packet->status == 0) { vmstor_proto_version = vmstor_proto_list[i].proto_version; sense_buffer_size = vmstor_proto_list[i].sense_buffer_size; vmscsi_size_delta = vmstor_proto_list[i].vmscsi_size_delta; break; } } if (vstor_packet->status != 0) { ret = EINVAL; goto cleanup; } /** * Query channel properties */ memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if ( ret != 0) goto cleanup; sema_wait(&request->synch_sema); /* TODO: Check returned version */ if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) { goto cleanup; } max_subch = vstor_packet->u.chan_props.max_channel_cnt; if (hv_storvsc_chan_cnt > 0 && hv_storvsc_chan_cnt < (max_subch + 1)) max_subch = hv_storvsc_chan_cnt - 1; /* multi-channels feature is supported by WIN8 and above version */ version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev); if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 && (vstor_packet->u.chan_props.flags & HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) { support_multichannel = TRUE; } if (bootverbose) { device_printf(sc->hs_dev, "max chans %d%s\n", max_subch + 1, support_multichannel ? ", multi-chan capable" : ""); } memset(vstor_packet, 0, sizeof(struct vstor_packet)); vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(sc->hs_chan, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); if (ret != 0) { goto cleanup; } sema_wait(&request->synch_sema); if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO || vstor_packet->status != 0) goto cleanup; /* * If multi-channel is supported, send multichannel create * request to host. */ if (support_multichannel && max_subch > 0) storvsc_send_multichannel_request(sc, max_subch); cleanup: sema_destroy(&request->synch_sema); return (ret); } /** * @brief Open channel connection to paraent partition StorVSP driver * * Open and initialize channel connection to parent partition StorVSP driver. * * @param pointer to a Hyper-V device * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_connect_vsp(struct storvsc_softc *sc) { int ret = 0; struct vmstor_chan_props props; memset(&props, 0, sizeof(struct vmstor_chan_props)); /* * Open the channel */ vmbus_chan_cpu_rr(sc->hs_chan); ret = vmbus_chan_open( sc->hs_chan, sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_ringbuffer_size, (void *)&props, sizeof(struct vmstor_chan_props), hv_storvsc_on_channel_callback, sc); if (ret != 0) { return ret; } ret = hv_storvsc_channel_init(sc); return (ret); } #if HVS_HOST_RESET static int hv_storvsc_host_reset(struct storvsc_softc *sc) { int ret = 0; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; request = &sc->hs_reset_req; request->softc = sc; vstor_packet = &request->vstor_packet; sema_init(&request->synch_sema, 0, "stor synch sema"); vstor_packet->operation = VSTOR_OPERATION_RESETBUS; vstor_packet->flags = REQUEST_COMPLETION_FLAG; ret = vmbus_chan_send(dev->channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)&sc->hs_reset_req); if (ret != 0) { goto cleanup; } sema_wait(&request->synch_sema); /* * At this point, all outstanding requests in the adapter * should have been flushed out and return to us */ cleanup: sema_destroy(&request->synch_sema); return (ret); } #endif /* HVS_HOST_RESET */ /** * @brief Function to initiate an I/O request * * @param device Hyper-V device pointer * @param request pointer to a request structure * @returns 0 on success, non-zero error on failure */ static int hv_storvsc_io_request(struct storvsc_softc *sc, struct hv_storvsc_request *request) { struct vstor_packet *vstor_packet = &request->vstor_packet; struct vmbus_channel* outgoing_channel = NULL; int ret = 0, ch_sel; vstor_packet->flags |= REQUEST_COMPLETION_FLAG; vstor_packet->u.vm_srb.length = sizeof(struct vmscsi_req) - vmscsi_size_delta; vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size; vstor_packet->u.vm_srb.transfer_len = request->prp_list.gpa_range.gpa_len; vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB; ch_sel = (vstor_packet->u.vm_srb.lun + curcpu) % sc->hs_nchan; outgoing_channel = sc->hs_sel_chan[ch_sel]; mtx_unlock(&request->softc->hs_lock); if (request->prp_list.gpa_range.gpa_len) { ret = vmbus_chan_send_prplist(outgoing_channel, &request->prp_list.gpa_range, request->prp_cnt, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } else { ret = vmbus_chan_send(outgoing_channel, VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC, vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request); } /* statistic for successful request sending on each channel */ if (!ret) { sc->sysctl_data.chan_send_cnt[ch_sel]++; } mtx_lock(&request->softc->hs_lock); if (ret != 0) { printf("Unable to send packet %p ret %d", vstor_packet, ret); } else { atomic_add_int(&sc->hs_num_out_reqs, 1); } return (ret); } /** * Process IO_COMPLETION_OPERATION and ready * the result to be completed for upper layer * processing by the CAM layer. */ static void hv_storvsc_on_iocompletion(struct storvsc_softc *sc, struct vstor_packet *vstor_packet, struct hv_storvsc_request *request) { struct vmscsi_req *vm_srb; vm_srb = &vstor_packet->u.vm_srb; /* * Copy some fields of the host's response into the request structure, * because the fields will be used later in storvsc_io_done(). */ request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status; request->vstor_packet.u.vm_srb.srb_status = vm_srb->srb_status; request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len; if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) && (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) { /* Autosense data available */ KASSERT(vm_srb->sense_info_len <= request->sense_info_len, ("vm_srb->sense_info_len <= " "request->sense_info_len")); memcpy(request->sense_data, vm_srb->u.sense_data, vm_srb->sense_info_len); request->sense_info_len = vm_srb->sense_info_len; } /* Complete request by passing to the CAM layer */ storvsc_io_done(request); atomic_subtract_int(&sc->hs_num_out_reqs, 1); if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) { sema_post(&sc->hs_drain_sema); } } static void hv_storvsc_rescan_target(struct storvsc_softc *sc) { path_id_t pathid; target_id_t targetid; union ccb *ccb; pathid = cam_sim_path(sc->hs_sim); targetid = CAM_TARGET_WILDCARD; /* * Allocate a CCB and schedule a rescan. */ ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { printf("unable to alloc CCB for rescan\n"); return; } if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { printf("unable to create path for rescan, pathid: %u," "targetid: %u\n", pathid, targetid); xpt_free_ccb(ccb); return; } if (targetid == CAM_TARGET_WILDCARD) ccb->ccb_h.func_code = XPT_SCAN_BUS; else ccb->ccb_h.func_code = XPT_SCAN_TGT; xpt_rescan(ccb); } static void hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc) { int ret = 0; struct storvsc_softc *sc = xsc; uint32_t bytes_recvd; uint64_t request_id; uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)]; struct hv_storvsc_request *request; struct vstor_packet *vstor_packet; bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8); ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* XXX check bytes_recvd to make sure that it contains enough data */ while ((ret == 0) && (bytes_recvd > 0)) { request = (struct hv_storvsc_request *)(uintptr_t)request_id; if ((request == &sc->hs_init_req) || (request == &sc->hs_reset_req)) { memcpy(&request->vstor_packet, packet, sizeof(struct vstor_packet)); sema_post(&request->synch_sema); } else { vstor_packet = (struct vstor_packet *)packet; switch(vstor_packet->operation) { case VSTOR_OPERATION_COMPLETEIO: if (request == NULL) panic("VMBUS: storvsc received a " "packet with NULL request id in " "COMPLETEIO operation."); hv_storvsc_on_iocompletion(sc, vstor_packet, request); break; case VSTOR_OPERATION_REMOVEDEVICE: printf("VMBUS: storvsc operation %d not " "implemented.\n", vstor_packet->operation); /* TODO: implement */ break; case VSTOR_OPERATION_ENUMERATE_BUS: hv_storvsc_rescan_target(sc); break; default: break; } } bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8), ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id); KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough")); /* * XXX check bytes_recvd to make sure that it contains * enough data */ } } /** * @brief StorVSC probe function * * Device probe function. Returns 0 if the input device is a StorVSC * device. Otherwise, a ENXIO is returned. If the input device is * for BlkVSC (paravirtual IDE) device and this support is disabled in * favor of the emulated ATA/IDE device, return ENXIO. * * @param a device * @returns 0 on success, ENXIO if not a matcing StorVSC device */ static int storvsc_probe(device_t dev) { int ret = ENXIO; switch (storvsc_get_storage_type(dev)) { case DRIVER_BLKVSC: if(bootverbose) device_printf(dev, "Enlightened ATA/IDE detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; case DRIVER_STORVSC: if(bootverbose) device_printf(dev, "Enlightened SCSI device detected\n"); device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc); ret = BUS_PROBE_DEFAULT; break; default: ret = ENXIO; } return (ret); } static void storvsc_create_chan_sel(struct storvsc_softc *sc) { struct vmbus_channel **subch; int i, nsubch; sc->hs_sel_chan[0] = sc->hs_chan; nsubch = sc->hs_nchan - 1; if (nsubch == 0) return; subch = vmbus_subchan_get(sc->hs_chan, nsubch); for (i = 0; i < nsubch; i++) sc->hs_sel_chan[i + 1] = subch[i]; vmbus_subchan_rel(subch, nsubch); } static int storvsc_init_requests(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp; int error, i; LIST_INIT(&sc->hs_free_list); error = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1, /* alignment */ PAGE_SIZE, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ STORVSC_DATA_SIZE_MAX, /* maxsize */ STORVSC_DATA_SEGCNT_MAX, /* nsegments */ STORVSC_DATA_SEGSZ_MAX, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &sc->storvsc_req_dtag); if (error) { device_printf(dev, "failed to create storvsc dma tag\n"); return (error); } for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) { reqp = malloc(sizeof(struct hv_storvsc_request), M_DEVBUF, M_WAITOK|M_ZERO); reqp->softc = sc; error = bus_dmamap_create(sc->storvsc_req_dtag, 0, &reqp->data_dmap); if (error) { device_printf(dev, "failed to allocate storvsc " "data dmamap\n"); goto cleanup; } LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } return (0); cleanup: while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) { LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } return (error); } static void storvsc_sysctl(device_t dev) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; struct sysctl_oid *ch_tree, *chid_tree; struct storvsc_softc *sc; char name[16]; int i; sc = device_get_softc(dev); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW, &sc->sysctl_data.data_bio_cnt, "# of bio data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW, &sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW, &sc->sysctl_data.data_sg_cnt, "# of sg data block"); /* dev.storvsc.UNIT.channel */ ch_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; for (i = 0; i < sc->hs_nchan; i++) { uint32_t ch_id; ch_id = vmbus_chan_id(sc->hs_sel_chan[i]); snprintf(name, sizeof(name), "%d", ch_id); /* dev.storvsc.UNIT.channel.CHID */ chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; /* dev.storvsc.UNIT.channel.CHID.send_req */ SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "send_req", CTLFLAG_RD, &sc->sysctl_data.chan_send_cnt[i], "# of request sending from this channel"); } } /** * @brief StorVSC attach function * * Function responsible for allocating per-device structures, * setting up CAM interfaces and scanning for available LUNs to * be used for SCSI device peripherals. * * @param a device * @returns 0 on success or an error on failure */ static int storvsc_attach(device_t dev) { enum hv_storage_type stor_type; struct storvsc_softc *sc; struct cam_devq *devq; int ret, i, j; struct hv_storvsc_request *reqp; struct root_hold_token *root_mount_token = NULL; struct hv_sgl_node *sgl_node = NULL; void *tmp_buff = NULL; /* * We need to serialize storvsc attach calls. */ root_mount_token = root_mount_hold("storvsc"); sc = device_get_softc(dev); sc->hs_nchan = 1; sc->hs_chan = vmbus_get_channel(dev); stor_type = storvsc_get_storage_type(dev); if (stor_type == DRIVER_UNKNOWN) { ret = ENODEV; goto cleanup; } /* fill in driver specific properties */ sc->hs_drv_props = &g_drv_props_table[stor_type]; sc->hs_drv_props->drv_ringbuffer_size = hv_storvsc_ringbuffer_size; sc->hs_drv_props->drv_max_ios_per_target = MIN(STORVSC_MAX_IO, hv_storvsc_max_io); if (bootverbose) { printf("storvsc ringbuffer size: %d, max_io: %d\n", sc->hs_drv_props->drv_ringbuffer_size, sc->hs_drv_props->drv_max_ios_per_target); } /* fill in device specific properties */ sc->hs_unit = device_get_unit(dev); sc->hs_dev = dev; mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF); ret = storvsc_init_requests(dev); if (ret != 0) goto cleanup; /* create sg-list page pool */ if (FALSE == g_hv_sgl_page_pool.is_init) { g_hv_sgl_page_pool.is_init = TRUE; LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list); /* * Pre-create SG list, each SG list with * STORVSC_DATA_SEGCNT_MAX segments, each * segment has one page buffer */ for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; i++) { sgl_node = malloc(sizeof(struct hv_sgl_node), M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data = sglist_alloc(STORVSC_DATA_SEGCNT_MAX, M_WAITOK|M_ZERO); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { tmp_buff = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK|M_ZERO); sgl_node->sgl_data->sg_segs[j].ss_paddr = (vm_paddr_t)tmp_buff; } LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } } sc->hs_destroy = FALSE; sc->hs_drain_notify = FALSE; sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema"); ret = hv_storvsc_connect_vsp(sc); if (ret != 0) { goto cleanup; } /* Construct cpu to channel mapping */ storvsc_create_chan_sel(sc); /* * Create the device queue. * Hyper-V maps each target to one SCSI HBA */ devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target); if (devq == NULL) { device_printf(dev, "Failed to alloc device queue\n"); ret = ENOMEM; goto cleanup; } sc->hs_sim = cam_sim_alloc(storvsc_action, storvsc_poll, sc->hs_drv_props->drv_name, sc, sc->hs_unit, &sc->hs_lock, 1, sc->hs_drv_props->drv_max_ios_per_target, devq); if (sc->hs_sim == NULL) { device_printf(dev, "Failed to alloc sim\n"); cam_simq_free(devq); ret = ENOMEM; goto cleanup; } mtx_lock(&sc->hs_lock); /* bus_id is set to 0, need to get it from VMBUS channel query? */ if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) { cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to register SCSI bus\n"); ret = ENXIO; goto cleanup; } if (xpt_create_path(&sc->hs_path, /*periph*/NULL, cam_sim_path(sc->hs_sim), CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_bus_deregister(cam_sim_path(sc->hs_sim)); cam_sim_free(sc->hs_sim, /*free_devq*/TRUE); mtx_unlock(&sc->hs_lock); device_printf(dev, "Unable to create path\n"); ret = ENXIO; goto cleanup; } mtx_unlock(&sc->hs_lock); storvsc_sysctl(dev); root_mount_rel(root_mount_token); return (0); cleanup: root_mount_rel(root_mount_token); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) { if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (ret); } /** * @brief StorVSC device detach function * * This function is responsible for safely detaching a * StorVSC device. This includes waiting for inbound responses * to complete and freeing associated per-device structures. * * @param dev a device * returns 0 on success */ static int storvsc_detach(device_t dev) { struct storvsc_softc *sc = device_get_softc(dev); struct hv_storvsc_request *reqp = NULL; struct hv_sgl_node *sgl_node = NULL; int j = 0; sc->hs_destroy = TRUE; /* * At this point, all outbound traffic should be disabled. We * only allow inbound traffic (responses) to proceed so that * outstanding requests can be completed. */ sc->hs_drain_notify = TRUE; sema_wait(&sc->hs_drain_sema); sc->hs_drain_notify = FALSE; /* * Since we have already drained, we don't need to busy wait. * The call to close the channel will reset the callback * under the protection of the incoming channel lock. */ vmbus_chan_close(sc->hs_chan); mtx_lock(&sc->hs_lock); while (!LIST_EMPTY(&sc->hs_free_list)) { reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap); free(reqp, M_DEVBUF); } mtx_unlock(&sc->hs_lock); while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){ if (NULL != (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) { free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF); } } sglist_free(sgl_node->sgl_data); free(sgl_node, M_DEVBUF); } return (0); } #if HVS_TIMEOUT_TEST /** * @brief unit test for timed out operations * * This function provides unit testing capability to simulate * timed out operations. Recompilation with HV_TIMEOUT_TEST=1 * is required. * * @param reqp pointer to a request structure * @param opcode SCSI operation being performed * @param wait if 1, wait for I/O to complete */ static void storvsc_timeout_test(struct hv_storvsc_request *reqp, uint8_t opcode, int wait) { int ret; union ccb *ccb = reqp->ccb; struct storvsc_softc *sc = reqp->softc; if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) { return; } if (wait) { mtx_lock(&reqp->event.mtx); } ret = hv_storvsc_io_request(sc, reqp); if (ret != 0) { if (wait) { mtx_unlock(&reqp->event.mtx); } printf("%s: io_request failed with %d.\n", __func__, ret); ccb->ccb_h.status = CAM_PROVIDE_FAIL; mtx_lock(&sc->hs_lock); storvsc_free_request(sc, reqp); xpt_done(ccb); mtx_unlock(&sc->hs_lock); return; } if (wait) { xpt_print(ccb->ccb_h.path, "%u: %s: waiting for IO return.\n", ticks, __func__); ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz); mtx_unlock(&reqp->event.mtx); xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n", ticks, __func__, (ret == 0)? "IO return detected" : "IO return not detected"); /* * Now both the timer handler and io done are running * simultaneously. We want to confirm the io done always * finishes after the timer handler exits. So reqp used by * timer handler is not freed or stale. Do busy loop for * another 1/10 second to make sure io done does * wait for the timer handler to complete. */ DELAY(100*1000); mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: %s: finishing, queue frozen %d, " "ccb status 0x%x scsi_status 0x%x.\n", ticks, __func__, sc->hs_frozen, ccb->ccb_h.status, ccb->csio.scsi_status); mtx_unlock(&sc->hs_lock); } } #endif /* HVS_TIMEOUT_TEST */ #ifdef notyet /** * @brief timeout handler for requests * * This function is called as a result of a callout expiring. * * @param arg pointer to a request */ static void storvsc_timeout(void *arg) { struct hv_storvsc_request *reqp = arg; struct storvsc_softc *sc = reqp->softc; union ccb *ccb = reqp->ccb; if (reqp->retries == 0) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO timed out (req=0x%p), wait for another %u secs.\n", ticks, reqp, ccb->ccb_h.timeout / 1000); cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL); mtx_unlock(&sc->hs_lock); reqp->retries++; callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0); #endif return; } mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n", ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000, (sc->hs_frozen == 0)? "freezing the queue" : "the queue is already frozen"); if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1); } mtx_unlock(&sc->hs_lock); #if HVS_TIMEOUT_TEST storvsc_timeout_test(reqp, MODE_SELECT_10, 1); #endif } #endif /** * @brief StorVSC device poll function * * This function is responsible for servicing requests when * interrupts are disabled (i.e when we are dumping core.) * * @param sim a pointer to a CAM SCSI interface module */ static void storvsc_poll(struct cam_sim *sim) { struct storvsc_softc *sc = cam_sim_softc(sim); mtx_assert(&sc->hs_lock, MA_OWNED); mtx_unlock(&sc->hs_lock); hv_storvsc_on_channel_callback(sc->hs_chan, sc); mtx_lock(&sc->hs_lock); } /** * @brief StorVSC device action function * * This function is responsible for handling SCSI operations which * are passed from the CAM layer. The requests are in the form of * CAM control blocks which indicate the action being performed. * Not all actions require converting the request to a VSCSI protocol * message - these actions can be responded to by this driver. * Requests which are destined for a backend storage device are converted * to a VSCSI protocol message and sent on the channel connection associated * with this device. * * @param sim pointer to a CAM SCSI interface module * @param ccb pointer to a CAM control block */ static void storvsc_action(struct cam_sim *sim, union ccb *ccb) { struct storvsc_softc *sc = cam_sim_softc(sim); int res; mtx_assert(&sc->hs_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET; if (hv_storvsc_use_pim_unmapped) cpi->hba_misc |= PIM_UNMAPPED; cpi->maxio = STORVSC_DATA_SIZE_MAX; cpi->hba_eng_cnt = 0; cpi->max_target = STORVSC_MAX_TARGETS; cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target; cpi->initiator_id = cpi->max_target; cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 300000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_SPC2; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings *cts = &ccb->cts; cts->transport = XPORT_SAS; cts->transport_version = 0; cts->protocol = PROTO_SCSI; cts->protocol_version = SCSI_REV_SPC2; /* enable tag queuing and disconnected mode */ cts->proto_specific.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; cts->xport_specific.valid = CTS_SPI_VALID_DISC; cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_SET_TRAN_SETTINGS: { ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; } case XPT_CALC_GEOMETRY:{ cam_calc_geometry(&ccb->ccg, 1); xpt_done(ccb); return; } case XPT_RESET_BUS: case XPT_RESET_DEV:{ #if HVS_HOST_RESET if ((res = hv_storvsc_host_reset(sc)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_host_reset failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; xpt_done(ccb); return; } ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); return; #else xpt_print(ccb->ccb_h.path, "%s reset not supported.\n", (ccb->ccb_h.func_code == XPT_RESET_BUS)? "bus" : "dev"); ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; #endif /* HVS_HOST_RESET */ } case XPT_SCSI_IO: case XPT_IMMED_NOTIFY: { struct hv_storvsc_request *reqp = NULL; bus_dmamap_t dmap_saved; if (ccb->csio.cdb_len == 0) { panic("cdl_len is 0\n"); } if (LIST_EMPTY(&sc->hs_free_list)) { ccb->ccb_h.status = CAM_REQUEUE_REQ; if (sc->hs_frozen == 0) { sc->hs_frozen = 1; xpt_freeze_simq(sim, /* count*/1); } xpt_done(ccb); return; } reqp = LIST_FIRST(&sc->hs_free_list); LIST_REMOVE(reqp, link); /* Save the data_dmap before reset request */ dmap_saved = reqp->data_dmap; /* XXX this is ugly */ bzero(reqp, sizeof(struct hv_storvsc_request)); /* Restore necessary bits */ reqp->data_dmap = dmap_saved; reqp->softc = sc; ccb->ccb_h.status |= CAM_SIM_QUEUED; if ((res = create_storvsc_request(ccb, reqp)) != 0) { ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } #ifdef notyet if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_init(&reqp->callout, 1); callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout, 0, storvsc_timeout, reqp, 0); #if HVS_TIMEOUT_TEST cv_init(&reqp->event.cv, "storvsc timeout cv"); mtx_init(&reqp->event.mtx, "storvsc timeout mutex", NULL, MTX_DEF); switch (reqp->vstor_packet.vm_srb.cdb[0]) { case MODE_SELECT_10: case SEND_DIAGNOSTIC: /* To have timer send the request. */ return; default: break; } #endif /* HVS_TIMEOUT_TEST */ } #endif if ((res = hv_storvsc_io_request(sc, reqp)) != 0) { xpt_print(ccb->ccb_h.path, "hv_storvsc_io_request failed with %d\n", res); ccb->ccb_h.status = CAM_PROVIDE_FAIL; storvsc_free_request(sc, reqp); xpt_done(ccb); return; } return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; xpt_done(ccb); return; } } /** * @brief destroy bounce buffer * * This function is responsible for destroy a Scatter/Gather list * that create by storvsc_create_bounce_buffer() * * @param sgl- the Scatter/Gather need be destroy * @param sg_count- page count of the SG list. * */ static void storvsc_destroy_bounce_buffer(struct sglist *sgl) { struct hv_sgl_node *sgl_node = NULL; if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { printf("storvsc error: not enough in use sgl\n"); return; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); LIST_REMOVE(sgl_node, link); sgl_node->sgl_data = sgl; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); } /** * @brief create bounce buffer * * This function is responsible for create a Scatter/Gather list, * which hold several pages that can be aligned with page size. * * @param seg_count- SG-list segments count * @param write - if WRITE_TYPE, set SG list page used size to 0, * otherwise set used size to page size. * * return NULL if create failed */ static struct sglist * storvsc_create_bounce_buffer(uint16_t seg_count, int write) { int i = 0; struct sglist *bounce_sgl = NULL; unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE); struct hv_sgl_node *sgl_node = NULL; /* get struct sglist from free_sgl_list */ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { printf("storvsc error: not enough free sgl\n"); return NULL; } sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); LIST_REMOVE(sgl_node, link); bounce_sgl = sgl_node->sgl_data; LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); bounce_sgl->sg_maxseg = seg_count; if (write == WRITE_TYPE) bounce_sgl->sg_nseg = 0; else bounce_sgl->sg_nseg = seg_count; for (i = 0; i < seg_count; i++) bounce_sgl->sg_segs[i].ss_len = buf_len; return bounce_sgl; } /** * @brief copy data from SG list to bounce buffer * * This function is responsible for copy data from one SG list's segments * to another SG list which used as bounce buffer. * * @param bounce_sgl - the destination SG list * @param orig_sgl - the segment of the source SG list. * @param orig_sgl_count - the count of segments. * @param orig_sgl_count - indicate which segment need bounce buffer, * set 1 means need. * */ static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl, bus_dma_segment_t *orig_sgl, unsigned int orig_sgl_count, uint64_t seg_bits) { int src_sgl_idx = 0; for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) { if (seg_bits & (1 << src_sgl_idx)) { memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr, (void*)orig_sgl[src_sgl_idx].ds_addr, orig_sgl[src_sgl_idx].ds_len); bounce_sgl->sg_segs[src_sgl_idx].ss_len = orig_sgl[src_sgl_idx].ds_len; } } } /** * @brief copy data from SG list which used as bounce to another SG list * * This function is responsible for copy data from one SG list with bounce * buffer to another SG list's segments. * * @param dest_sgl - the destination SG list's segments * @param dest_sgl_count - the count of destination SG list's segment. * @param src_sgl - the source SG list. * @param seg_bits - indicate which segment used bounce buffer of src SG-list. * */ void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl, unsigned int dest_sgl_count, struct sglist* src_sgl, uint64_t seg_bits) { int sgl_idx = 0; for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) { if (seg_bits & (1 << sgl_idx)) { memcpy((void*)(dest_sgl[sgl_idx].ds_addr), (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr), src_sgl->sg_segs[sgl_idx].ss_len); } } } /** * @brief check SG list with bounce buffer or not * * This function is responsible for check if need bounce buffer for SG list. * * @param sgl - the SG list's segments * @param sg_count - the count of SG list's segment. * @param bits - segmengs number that need bounce buffer * * return -1 if SG list needless bounce buffer */ static int storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl, unsigned int sg_count, uint64_t *bits) { int i = 0; int offset = 0; uint64_t phys_addr = 0; uint64_t tmp_bits = 0; boolean_t found_hole = FALSE; boolean_t pre_aligned = TRUE; if (sg_count < 2){ return -1; } *bits = 0; phys_addr = vtophys(sgl[0].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset != 0) { pre_aligned = FALSE; tmp_bits |= 1; } for (i = 1; i < sg_count; i++) { phys_addr = vtophys(sgl[i].ds_addr); offset = phys_addr - trunc_page(phys_addr); if (offset == 0) { if (FALSE == pre_aligned){ /* * This segment is aligned, if the previous * one is not aligned, find a hole */ found_hole = TRUE; } pre_aligned = TRUE; } else { tmp_bits |= 1ULL << i; if (!pre_aligned) { if (phys_addr != vtophys(sgl[i-1].ds_addr + sgl[i-1].ds_len)) { /* * Check whether connect to previous * segment,if not, find the hole */ found_hole = TRUE; } } else { found_hole = TRUE; } pre_aligned = FALSE; } } if (!found_hole) { return (-1); } else { *bits = tmp_bits; return 0; } } /** * Copy bus_dma segments to multiple page buffer, which requires * the pages are compact composed except for the 1st and last pages. */ static void storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct hv_storvsc_request *reqp = arg; union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_gpa_range *prplist; int i; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK; for (i = 0; i < nsegs; i++) { #ifdef INVARIANTS if (nsegs > 1) { if (i == 0) { KASSERT((segs[i].ds_addr & PAGE_MASK) + segs[i].ds_len == PAGE_SIZE, ("invalid 1st page, ofs 0x%jx, len %zu", (uintmax_t)segs[i].ds_addr, segs[i].ds_len)); } else if (i == nsegs - 1) { KASSERT((segs[i].ds_addr & PAGE_MASK) == 0, ("invalid last page, ofs 0x%jx", (uintmax_t)segs[i].ds_addr)); } else { KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 && segs[i].ds_len == PAGE_SIZE, ("not a full page, ofs 0x%jx, len %zu", (uintmax_t)segs[i].ds_addr, segs[i].ds_len)); } } #endif prplist->gpa_page[i] = atop(segs[i].ds_addr); } reqp->prp_cnt = nsegs; } /** * @brief Fill in a request structure based on a CAM control block * * Fills in a request structure based on the contents of a CAM control * block. The request structure holds the payload information for * VSCSI protocol request. * * @param ccb pointer to a CAM contorl block * @param reqp pointer to a request structure */ static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp) { struct ccb_scsiio *csio = &ccb->csio; uint64_t phys_addr; uint32_t pfn; uint64_t not_aligned_seg_bits = 0; int error; /* refer to struct vmscsi_req for meanings of these two fields */ reqp->vstor_packet.u.vm_srb.port = cam_sim_unit(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.path_id = cam_sim_bus(xpt_path_sim(ccb->ccb_h.path)); reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id; reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun; reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len; if(ccb->ccb_h.flags & CAM_CDB_POINTER) { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr, csio->cdb_len); } else { memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes, csio->cdb_len); } if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.time_out_value = 60; reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DISABLE_SYNCH_TRANSFER; } switch (ccb->ccb_h.flags & CAM_DIR_MASK) { case CAM_DIR_OUT: reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DATA_OUT; } break; case CAM_DIR_IN: reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_DATA_IN; } break; case CAM_DIR_NONE: reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE; if (hv_storvsc_use_win8ext_flags) { reqp->vstor_packet.u.vm_srb.win8_extension.srb_flags |= SRB_FLAGS_NO_DATA_TRANSFER; } break; default: printf("Error: unexpected data direction: 0x%x\n", ccb->ccb_h.flags & CAM_DIR_MASK); return (EINVAL); } reqp->sense_data = &csio->sense_data; reqp->sense_info_len = csio->sense_len; reqp->ccb = ccb; if (0 == csio->dxfer_len) { return (0); } switch (ccb->ccb_h.flags & CAM_DATA_MASK) { case CAM_DATA_BIO: case CAM_DATA_VADDR: error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag, reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp, BUS_DMA_NOWAIT); if (error) { xpt_print(ccb->ccb_h.path, "bus_dmamap_load_ccb failed: %d\n", error); return (error); } if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO) reqp->softc->sysctl_data.data_bio_cnt++; else reqp->softc->sysctl_data.data_vaddr_cnt++; break; case CAM_DATA_SG: { struct storvsc_gpa_range *prplist; int i = 0; int offset = 0; int ret; bus_dma_segment_t *storvsc_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt; prplist = &reqp->prp_list; prplist->gpa_range.gpa_len = csio->dxfer_len; printf("Storvsc: get SG I/O operation, %d\n", reqp->vstor_packet.u.vm_srb.data_in); if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){ printf("Storvsc: %d segments is too much, " "only support %d segments\n", storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX); return (EINVAL); } /* * We create our own bounce buffer function currently. Idealy * we should use BUS_DMA(9) framework. But with current BUS_DMA * code there is no callback API to check the page alignment of * middle segments before busdma can decide if a bounce buffer * is needed for particular segment. There is callback, * "bus_dma_filter_t *filter", but the parrameters are not * sufficient for storvsc driver. * TODO: * Add page alignment check in BUS_DMA(9) callback. Once * this is complete, switch the following code to use * BUS_DMA(9) for storvsc bounce buffer support. */ /* check if we need to create bounce buffer */ ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist, storvsc_sg_count, ¬_aligned_seg_bits); if (ret != -1) { reqp->bounce_sgl = storvsc_create_bounce_buffer(storvsc_sg_count, reqp->vstor_packet.u.vm_srb.data_in); if (NULL == reqp->bounce_sgl) { printf("Storvsc_error: " "create bounce buffer failed.\n"); return (ENOMEM); } reqp->bounce_sgl_count = storvsc_sg_count; reqp->not_aligned_seg_bits = not_aligned_seg_bits; /* * if it is write, we need copy the original data *to bounce buffer */ if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_sgl_to_bounce_buf( reqp->bounce_sgl, storvsc_sglist, storvsc_sg_count, reqp->not_aligned_seg_bits); } /* transfer virtual address to physical frame number */ if (reqp->not_aligned_seg_bits & 0x1){ phys_addr = vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr); }else{ phys_addr = vtophys(storvsc_sglist[0].ds_addr); } prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[0] = pfn; for (i = 1; i < storvsc_sg_count; i++) { if (reqp->not_aligned_seg_bits & (1 << i)) { phys_addr = vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr); } else { phys_addr = vtophys(storvsc_sglist[i].ds_addr); } pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; } else { phys_addr = vtophys(storvsc_sglist[0].ds_addr); prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK; for (i = 0; i < storvsc_sg_count; i++) { phys_addr = vtophys(storvsc_sglist[i].ds_addr); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; } reqp->prp_cnt = i; /* check the last segment cross boundary or not */ offset = phys_addr & PAGE_MASK; if (offset) { /* Add one more PRP entry */ phys_addr = vtophys(storvsc_sglist[i-1].ds_addr + PAGE_SIZE - offset); pfn = phys_addr >> PAGE_SHIFT; prplist->gpa_page[i] = pfn; reqp->prp_cnt++; } reqp->bounce_sgl_count = 0; } reqp->softc->sysctl_data.data_sg_cnt++; break; } default: printf("Unknow flags: %d\n", ccb->ccb_h.flags); return(EINVAL); } return(0); } static uint32_t is_scsi_valid(const struct scsi_inquiry_data *inq_data) { u_int8_t type; type = SID_TYPE(inq_data); if (type == T_NODEVICE) return (0); if (SID_QUAL(inq_data) == SID_QUAL_BAD_LU) return (0); return (1); } /** * @brief completion function before returning to CAM * * I/O process has been completed and the result needs * to be passed to the CAM layer. * Free resources related to this request. * * @param reqp pointer to a request structure */ static void storvsc_io_done(struct hv_storvsc_request *reqp) { union ccb *ccb = reqp->ccb; struct ccb_scsiio *csio = &ccb->csio; struct storvsc_softc *sc = reqp->softc; struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb; bus_dma_segment_t *ori_sglist = NULL; int ori_sg_count = 0; const struct scsi_generic *cmd; /* destroy bounce buffer if it is used */ if (reqp->bounce_sgl_count) { ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr; ori_sg_count = ccb->csio.sglist_cnt; /* * If it is READ operation, we should copy back the data * to original SG list. */ if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) { storvsc_copy_from_bounce_buf_to_sgl(ori_sglist, ori_sg_count, reqp->bounce_sgl, reqp->not_aligned_seg_bits); } storvsc_destroy_bounce_buffer(reqp->bounce_sgl); reqp->bounce_sgl_count = 0; } if (reqp->retries > 0) { mtx_lock(&sc->hs_lock); #if HVS_TIMEOUT_TEST xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "waking up timer handler if any.\n", ticks); mtx_lock(&reqp->event.mtx); cv_signal(&reqp->event.cv); mtx_unlock(&reqp->event.mtx); #endif reqp->retries = 0; xpt_print(ccb->ccb_h.path, "%u: IO returned after timeout, " "stopping timer if any.\n", ticks); mtx_unlock(&sc->hs_lock); } #ifdef notyet /* * callout_drain() will wait for the timer handler to finish * if it is running. So we don't need any lock to synchronize * between this routine and the timer handler. * Note that we need to make sure reqp is not freed when timer * handler is using or will use it. */ if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) { callout_drain(&reqp->callout); } #endif cmd = (const struct scsi_generic *) ((ccb->ccb_h.flags & CAM_CDB_POINTER) ? csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); ccb->ccb_h.status &= ~CAM_SIM_QUEUED; ccb->ccb_h.status &= ~CAM_STATUS_MASK; int srb_status = SRB_STATUS(vm_srb->srb_status); if (vm_srb->scsi_status == SCSI_STATUS_OK) { if (srb_status != SRB_STATUS_SUCCESS) { /* * If there are errors, for example, invalid LUN, * host will inform VM through SRB status. */ if (bootverbose) { if (srb_status == SRB_STATUS_INVALID_LUN) { xpt_print(ccb->ccb_h.path, "invalid LUN %d for op: %s\n", vm_srb->lun, scsi_op_desc(cmd->opcode, NULL)); } else { xpt_print(ccb->ccb_h.path, "Unknown SRB flag: %d for op: %s\n", srb_status, scsi_op_desc(cmd->opcode, NULL)); } } /* * XXX For a selection timeout, all of the LUNs * on the target will be gone. It works for SCSI * disks, but does not work for IDE disks. * * For CAM_DEV_NOT_THERE, CAM will only get * rid of the device(s) specified by the path. */ if (storvsc_get_storage_type(sc->hs_dev) == DRIVER_STORVSC) ccb->ccb_h.status |= CAM_SEL_TIMEOUT; else ccb->ccb_h.status |= CAM_DEV_NOT_THERE; } else { ccb->ccb_h.status |= CAM_REQ_CMP; } if (cmd->opcode == INQUIRY && srb_status == SRB_STATUS_SUCCESS) { int resp_xfer_len, resp_buf_len, data_len; uint8_t *resp_buf = (uint8_t *)csio->data_ptr; struct scsi_inquiry_data *inq_data = (struct scsi_inquiry_data *)csio->data_ptr; /* Get the buffer length reported by host */ resp_xfer_len = vm_srb->transfer_len; /* Get the available buffer length */ resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0; data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len; if (bootverbose && data_len >= 5) { xpt_print(ccb->ccb_h.path, "storvsc inquiry " "(%d) [%x %x %x %x %x ... ]\n", data_len, resp_buf[0], resp_buf[1], resp_buf[2], resp_buf[3], resp_buf[4]); } /* * XXX: Manually fix the wrong response returned from WS2012 */ if (!is_scsi_valid(inq_data) && (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN7)) { if (data_len >= 4 && (resp_buf[2] == 0 || resp_buf[3] == 0)) { resp_buf[2] = 5; // verion=5 means SPC-3 resp_buf[3] = 2; // resp fmt must be 2 if (bootverbose) xpt_print(ccb->ccb_h.path, "fix version and resp fmt for 0x%x\n", vmstor_proto_version); } } else if (data_len >= SHORT_INQUIRY_LENGTH) { char vendor[16]; cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), sizeof(vendor)); /* * XXX: Upgrade SPC2 to SPC3 if host is WIN8 or * WIN2012 R2 in order to support UNMAP feature. */ if (!strncmp(vendor, "Msft", 4) && SID_ANSI_REV(inq_data) == SCSI_REV_SPC2 && (vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8_1 || vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN8)) { inq_data->version = SCSI_REV_SPC3; if (bootverbose) { xpt_print(ccb->ccb_h.path, "storvsc upgrades " "SPC2 to SPC3\n"); } } } } } else { /** * On Some Windows hosts TEST_UNIT_READY command can return * SRB_STATUS_ERROR and sense data, for example, asc=0x3a,1 * "(Medium not present - tray closed)". This error can be * ignored since it will be sent to host periodically. */ boolean_t unit_not_ready = \ vm_srb->scsi_status == SCSI_STATUS_CHECK_COND && cmd->opcode == TEST_UNIT_READY && srb_status == SRB_STATUS_ERROR; if (!unit_not_ready && bootverbose) { mtx_lock(&sc->hs_lock); xpt_print(ccb->ccb_h.path, "storvsc scsi_status = %d, srb_status = %d\n", vm_srb->scsi_status, srb_status); mtx_unlock(&sc->hs_lock); } ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR; } ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF); ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len; if (reqp->sense_info_len != 0) { csio->sense_resid = csio->sense_len - reqp->sense_info_len; ccb->ccb_h.status |= CAM_AUTOSNS_VALID; } mtx_lock(&sc->hs_lock); if (reqp->softc->hs_frozen == 1) { xpt_print(ccb->ccb_h.path, "%u: storvsc unfreezing softc 0x%p.\n", ticks, reqp->softc); ccb->ccb_h.status |= CAM_RELEASE_SIMQ; reqp->softc->hs_frozen = 0; } storvsc_free_request(sc, reqp); mtx_unlock(&sc->hs_lock); xpt_done_direct(ccb); } /** * @brief Free a request structure * * Free a request structure by returning it to the free list * * @param sc pointer to a softc * @param reqp pointer to a request structure */ static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp) { LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link); } /** * @brief Determine type of storage device from GUID * * Using the type GUID, determine if this is a StorVSC (paravirtual * SCSI or BlkVSC (paravirtual IDE) device. * * @param dev a device * returns an enum */ static enum hv_storage_type storvsc_get_storage_type(device_t dev) { device_t parent = device_get_parent(dev); if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0) return DRIVER_BLKVSC; if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0) return DRIVER_STORVSC; return DRIVER_UNKNOWN; } #define PCI_VENDOR_INTEL 0x8086 #define PCI_PRODUCT_PIIX4 0x7111 static void storvsc_ada_probe_veto(void *arg __unused, struct cam_path *path, struct ata_params *ident_buf __unused, int *veto) { /* * The ATA disks are shared with the controllers managed * by this driver, so veto the ATA disks' attachment; the * ATA disks will be attached as SCSI disks once this driver * attached. */ if (path->device->protocol == PROTO_ATA) { struct ccb_pathinq cpi; bzero(&cpi, sizeof(cpi)); xpt_setup_ccb(&cpi.ccb_h, path, CAM_PRIORITY_NONE); cpi.ccb_h.func_code = XPT_PATH_INQ; xpt_action((union ccb *)&cpi); if (cpi.ccb_h.status == CAM_REQ_CMP && cpi.hba_vendor == PCI_VENDOR_INTEL && cpi.hba_device == PCI_PRODUCT_PIIX4) { (*veto)++; if (bootverbose) { xpt_print(path, "Disable ATA disks on " "simulated ATA controller (0x%04x%04x)\n", cpi.hba_device, cpi.hba_vendor); } } } } static void storvsc_sysinit(void *arg __unused) { if (vm_guest == VM_GUEST_HV) { storvsc_handler_tag = EVENTHANDLER_REGISTER(ada_probe_veto, storvsc_ada_probe_veto, NULL, EVENTHANDLER_PRI_ANY); } } SYSINIT(storvsc_sys_init, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysinit, NULL); static void storvsc_sysuninit(void *arg __unused) { if (storvsc_handler_tag != NULL) EVENTHANDLER_DEREGISTER(ada_probe_veto, storvsc_handler_tag); } SYSUNINIT(storvsc_sys_uninit, SI_SUB_DRIVERS, SI_ORDER_SECOND, storvsc_sysuninit, NULL); Index: head/sys/dev/mfi/mfi.c =================================================================== --- head/sys/dev/mfi/mfi.c (revision 320527) +++ head/sys/dev/mfi/mfi.c (revision 320528) @@ -1,3797 +1,3796 @@ /*- * Copyright (c) 2006 IronPort Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 2007 LSI Corp. * Copyright (c) 2007 Rajesh Prabhakaran. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mfi.h" #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int mfi_alloc_commands(struct mfi_softc *); static int mfi_comms_init(struct mfi_softc *); static int mfi_get_controller_info(struct mfi_softc *); static int mfi_get_log_state(struct mfi_softc *, struct mfi_evt_log_state **); static int mfi_parse_entries(struct mfi_softc *, int, int); static void mfi_data_cb(void *, bus_dma_segment_t *, int, int); static void mfi_startup(void *arg); static void mfi_intr(void *arg); static void mfi_ldprobe(struct mfi_softc *sc); static void mfi_syspdprobe(struct mfi_softc *sc); static void mfi_handle_evt(void *context, int pending); static int mfi_aen_register(struct mfi_softc *sc, int seq, int locale); static void mfi_aen_complete(struct mfi_command *); static int mfi_add_ld(struct mfi_softc *sc, int); static void mfi_add_ld_complete(struct mfi_command *); static int mfi_add_sys_pd(struct mfi_softc *sc, int); static void mfi_add_sys_pd_complete(struct mfi_command *); static struct mfi_command * mfi_bio_command(struct mfi_softc *); static void mfi_bio_complete(struct mfi_command *); static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*); static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*); static int mfi_send_frame(struct mfi_softc *, struct mfi_command *); static int mfi_std_send_frame(struct mfi_softc *, struct mfi_command *); static int mfi_abort(struct mfi_softc *, struct mfi_command **); static int mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *); static void mfi_timeout(void *); static int mfi_user_command(struct mfi_softc *, struct mfi_ioc_passthru *); static void mfi_enable_intr_xscale(struct mfi_softc *sc); static void mfi_enable_intr_ppc(struct mfi_softc *sc); static int32_t mfi_read_fw_status_xscale(struct mfi_softc *sc); static int32_t mfi_read_fw_status_ppc(struct mfi_softc *sc); static int mfi_check_clear_intr_xscale(struct mfi_softc *sc); static int mfi_check_clear_intr_ppc(struct mfi_softc *sc); static void mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt); static void mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt); static int mfi_config_lock(struct mfi_softc *sc, uint32_t opcode); static void mfi_config_unlock(struct mfi_softc *sc, int locked); static int mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm); static void mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm); static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm); SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters"); static int mfi_event_locale = MFI_EVT_LOCALE_ALL; SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale, 0, "event message locale"); static int mfi_event_class = MFI_EVT_CLASS_INFO; SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class, 0, "event message class"); static int mfi_max_cmds = 128; SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds, 0, "Max commands limit (-1 = controller limit)"); static int mfi_detect_jbod_change = 1; SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN, &mfi_detect_jbod_change, 0, "Detect a change to a JBOD"); int mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS; SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN, &mfi_polled_cmd_timeout, 0, "Polled command timeout - used for firmware flash etc (in seconds)"); static int mfi_cmd_timeout = MFI_CMD_TIMEOUT; SYSCTL_INT(_hw_mfi, OID_AUTO, cmd_timeout, CTLFLAG_RWTUN, &mfi_cmd_timeout, 0, "Command timeout (in seconds)"); /* Management interface */ static d_open_t mfi_open; static d_close_t mfi_close; static d_ioctl_t mfi_ioctl; static d_poll_t mfi_poll; static struct cdevsw mfi_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = mfi_open, .d_close = mfi_close, .d_ioctl = mfi_ioctl, .d_poll = mfi_poll, .d_name = "mfi", }; MALLOC_DEFINE(M_MFIBUF, "mfibuf", "Buffers for the MFI driver"); #define MFI_INQ_LENGTH SHORT_INQUIRY_LENGTH struct mfi_skinny_dma_info mfi_skinny; static void mfi_enable_intr_xscale(struct mfi_softc *sc) { MFI_WRITE4(sc, MFI_OMSK, 0x01); } static void mfi_enable_intr_ppc(struct mfi_softc *sc) { if (sc->mfi_flags & MFI_FLAGS_1078) { MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF); MFI_WRITE4(sc, MFI_OMSK, ~MFI_1078_EIM); } else if (sc->mfi_flags & MFI_FLAGS_GEN2) { MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF); MFI_WRITE4(sc, MFI_OMSK, ~MFI_GEN2_EIM); } else if (sc->mfi_flags & MFI_FLAGS_SKINNY) { MFI_WRITE4(sc, MFI_OMSK, ~0x00000001); } } static int32_t mfi_read_fw_status_xscale(struct mfi_softc *sc) { return MFI_READ4(sc, MFI_OMSG0); } static int32_t mfi_read_fw_status_ppc(struct mfi_softc *sc) { return MFI_READ4(sc, MFI_OSP0); } static int mfi_check_clear_intr_xscale(struct mfi_softc *sc) { int32_t status; status = MFI_READ4(sc, MFI_OSTS); if ((status & MFI_OSTS_INTR_VALID) == 0) return 1; MFI_WRITE4(sc, MFI_OSTS, status); return 0; } static int mfi_check_clear_intr_ppc(struct mfi_softc *sc) { int32_t status; status = MFI_READ4(sc, MFI_OSTS); if (sc->mfi_flags & MFI_FLAGS_1078) { if (!(status & MFI_1078_RM)) { return 1; } } else if (sc->mfi_flags & MFI_FLAGS_GEN2) { if (!(status & MFI_GEN2_RM)) { return 1; } } else if (sc->mfi_flags & MFI_FLAGS_SKINNY) { if (!(status & MFI_SKINNY_RM)) { return 1; } } if (sc->mfi_flags & MFI_FLAGS_SKINNY) MFI_WRITE4(sc, MFI_OSTS, status); else MFI_WRITE4(sc, MFI_ODCR0, status); return 0; } static void mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt) { MFI_WRITE4(sc, MFI_IQP,(bus_add >>3)|frame_cnt); } static void mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt) { if (sc->mfi_flags & MFI_FLAGS_SKINNY) { MFI_WRITE4(sc, MFI_IQPL, (bus_add | frame_cnt <<1)|1 ); MFI_WRITE4(sc, MFI_IQPH, 0x00000000); } else { MFI_WRITE4(sc, MFI_IQP, (bus_add | frame_cnt <<1)|1 ); } } int mfi_transition_firmware(struct mfi_softc *sc) { uint32_t fw_state, cur_state; int max_wait, i; uint32_t cur_abs_reg_val = 0; uint32_t prev_abs_reg_val = 0; cur_abs_reg_val = sc->mfi_read_fw_status(sc); fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK; while (fw_state != MFI_FWSTATE_READY) { if (bootverbose) device_printf(sc->mfi_dev, "Waiting for firmware to " "become ready\n"); cur_state = fw_state; switch (fw_state) { case MFI_FWSTATE_FAULT: device_printf(sc->mfi_dev, "Firmware fault\n"); return (ENXIO); case MFI_FWSTATE_WAIT_HANDSHAKE: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_CLEAR_HANDSHAKE); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_CLEAR_HANDSHAKE); max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_OPERATIONAL: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, 7); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_READY); max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_UNDEFINED: case MFI_FWSTATE_BB_INIT: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_FW_INIT_2: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_FW_INIT: case MFI_FWSTATE_FLUSH_CACHE: max_wait = MFI_RESET_WAIT_TIME; break; case MFI_FWSTATE_DEVICE_SCAN: max_wait = MFI_RESET_WAIT_TIME; /* wait for 180 seconds */ prev_abs_reg_val = cur_abs_reg_val; break; case MFI_FWSTATE_BOOT_MESSAGE_PENDING: if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT) MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_HOTPLUG); else MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_HOTPLUG); max_wait = MFI_RESET_WAIT_TIME; break; default: device_printf(sc->mfi_dev, "Unknown firmware state %#x\n", fw_state); return (ENXIO); } for (i = 0; i < (max_wait * 10); i++) { cur_abs_reg_val = sc->mfi_read_fw_status(sc); fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK; if (fw_state == cur_state) DELAY(100000); else break; } if (fw_state == MFI_FWSTATE_DEVICE_SCAN) { /* Check the device scanning progress */ if (prev_abs_reg_val != cur_abs_reg_val) { continue; } } if (fw_state == cur_state) { device_printf(sc->mfi_dev, "Firmware stuck in state " "%#x\n", fw_state); return (ENXIO); } } return (0); } static void mfi_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *addr; addr = arg; *addr = segs[0].ds_addr; } int mfi_attach(struct mfi_softc *sc) { uint32_t status; int error, commsz, framessz, sensesz; int frames, unit, max_fw_sge, max_fw_cmds; uint32_t tb_mem_size = 0; struct cdev *dev_t; if (sc == NULL) return EINVAL; device_printf(sc->mfi_dev, "Megaraid SAS driver Ver %s \n", MEGASAS_VERSION); mtx_init(&sc->mfi_io_lock, "MFI I/O lock", NULL, MTX_DEF); sx_init(&sc->mfi_config_lock, "MFI config"); TAILQ_INIT(&sc->mfi_ld_tqh); TAILQ_INIT(&sc->mfi_syspd_tqh); TAILQ_INIT(&sc->mfi_ld_pend_tqh); TAILQ_INIT(&sc->mfi_syspd_pend_tqh); TAILQ_INIT(&sc->mfi_evt_queue); TASK_INIT(&sc->mfi_evt_task, 0, mfi_handle_evt, sc); TASK_INIT(&sc->mfi_map_sync_task, 0, mfi_handle_map_sync, sc); TAILQ_INIT(&sc->mfi_aen_pids); TAILQ_INIT(&sc->mfi_cam_ccbq); mfi_initq_free(sc); mfi_initq_ready(sc); mfi_initq_busy(sc); mfi_initq_bio(sc); sc->adpreset = 0; sc->last_seq_num = 0; sc->disableOnlineCtrlReset = 1; sc->issuepend_done = 1; sc->hw_crit_error = 0; if (sc->mfi_flags & MFI_FLAGS_1064R) { sc->mfi_enable_intr = mfi_enable_intr_xscale; sc->mfi_read_fw_status = mfi_read_fw_status_xscale; sc->mfi_check_clear_intr = mfi_check_clear_intr_xscale; sc->mfi_issue_cmd = mfi_issue_cmd_xscale; } else if (sc->mfi_flags & MFI_FLAGS_TBOLT) { sc->mfi_enable_intr = mfi_tbolt_enable_intr_ppc; sc->mfi_disable_intr = mfi_tbolt_disable_intr_ppc; sc->mfi_read_fw_status = mfi_tbolt_read_fw_status_ppc; sc->mfi_check_clear_intr = mfi_tbolt_check_clear_intr_ppc; sc->mfi_issue_cmd = mfi_tbolt_issue_cmd_ppc; sc->mfi_adp_reset = mfi_tbolt_adp_reset; sc->mfi_tbolt = 1; TAILQ_INIT(&sc->mfi_cmd_tbolt_tqh); } else { sc->mfi_enable_intr = mfi_enable_intr_ppc; sc->mfi_read_fw_status = mfi_read_fw_status_ppc; sc->mfi_check_clear_intr = mfi_check_clear_intr_ppc; sc->mfi_issue_cmd = mfi_issue_cmd_ppc; } /* Before we get too far, see if the firmware is working */ if ((error = mfi_transition_firmware(sc)) != 0) { device_printf(sc->mfi_dev, "Firmware not in READY state, " "error %d\n", error); return (ENXIO); } /* Start: LSIP200113393 */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MEGASAS_MAX_NAME*sizeof(bus_addr_t), /* maxsize */ 1, /* msegments */ MEGASAS_MAX_NAME*sizeof(bus_addr_t), /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->verbuf_h_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmat DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->verbuf_h_dmat, (void **)&sc->verbuf, BUS_DMA_NOWAIT, &sc->verbuf_h_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmamap memory\n"); return (ENOMEM); } bzero(sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t)); bus_dmamap_load(sc->verbuf_h_dmat, sc->verbuf_h_dmamap, sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t), mfi_addr_cb, &sc->verbuf_h_busaddr, 0); /* End: LSIP200113393 */ /* * Get information needed for sizing the contiguous memory for the * frame pool. Size down the sgl parameter since we know that * we will never need more than what's required for MAXPHYS. * It would be nice if these constants were available at runtime * instead of compile time. */ status = sc->mfi_read_fw_status(sc); max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK; if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) { device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n", max_fw_cmds, mfi_max_cmds); sc->mfi_max_fw_cmds = mfi_max_cmds; } else { sc->mfi_max_fw_cmds = max_fw_cmds; } max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16; sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1)); /* ThunderBolt Support get the contiguous memory */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { mfi_tbolt_init_globals(sc); device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, " "MaxSgl = %d, state = %#x\n", max_fw_cmds, sc->mfi_max_fw_cmds, sc->mfi_max_sge, status); tb_mem_size = mfi_tbolt_get_memory_requirement(sc); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tb_mem_size, /* maxsize */ 1, /* msegments */ tb_mem_size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_dmat, (void **)&sc->request_message_pool, BUS_DMA_NOWAIT, &sc->mfi_tb_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->request_message_pool, tb_mem_size); bus_dmamap_load(sc->mfi_tb_dmat, sc->mfi_tb_dmamap, sc->request_message_pool, tb_mem_size, mfi_addr_cb, &sc->mfi_tb_busaddr, 0); /* For ThunderBolt memory init */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 0x100, 0, /* alignmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MFI_FRAME_SIZE, /* maxsize */ 1, /* msegments */ MFI_FRAME_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_init_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init, BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate init memory\n"); return (ENOMEM); } bzero(sc->mfi_tb_init, MFI_FRAME_SIZE); bus_dmamap_load(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap, sc->mfi_tb_init, MFI_FRAME_SIZE, mfi_addr_cb, &sc->mfi_tb_init_busaddr, 0); if (mfi_tbolt_init_desc_pool(sc, sc->request_message_pool, tb_mem_size)) { device_printf(sc->mfi_dev, "Thunderbolt pool preparation error\n"); return 0; } /* Allocate DMA memory mapping for MPI2 IOC Init descriptor, we are taking it different from what we have allocated for Request and reply descriptors to avoid confusion later */ tb_mem_size = sizeof(struct MPI2_IOC_INIT_REQUEST); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ tb_mem_size, /* maxsize */ 1, /* msegments */ tb_mem_size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_tb_ioc_init_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_tb_ioc_init_dmat, (void **)&sc->mfi_tb_ioc_init_desc, BUS_DMA_NOWAIT, &sc->mfi_tb_ioc_init_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->mfi_tb_ioc_init_desc, tb_mem_size); bus_dmamap_load(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_dmamap, sc->mfi_tb_ioc_init_desc, tb_mem_size, mfi_addr_cb, &sc->mfi_tb_ioc_init_busaddr, 0); } /* * Create the dma tag for data buffers. Used both for block I/O * and for various internal data queries. */ if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ sc->mfi_max_sge, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->mfi_io_lock, /* lockfuncarg */ &sc->mfi_buffer_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate buffer DMA tag\n"); return (ENOMEM); } /* * Allocate DMA memory for the comms queues. Keep it under 4GB for * efficiency. The mfi_hwcomms struct includes space for 1 reply queue * entry, so the calculated size here will be will be 1 more than * mfi_max_fw_cmds. This is apparently a requirement of the hardware. */ commsz = (sizeof(uint32_t) * sc->mfi_max_fw_cmds) + sizeof(struct mfi_hwcomms); if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ commsz, /* maxsize */ 1, /* msegments */ commsz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_comms_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_comms_dmat, (void **)&sc->mfi_comms, BUS_DMA_NOWAIT, &sc->mfi_comms_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate comms memory\n"); return (ENOMEM); } bzero(sc->mfi_comms, commsz); bus_dmamap_load(sc->mfi_comms_dmat, sc->mfi_comms_dmamap, sc->mfi_comms, commsz, mfi_addr_cb, &sc->mfi_comms_busaddr, 0); /* * Allocate DMA memory for the command frames. Keep them in the * lower 4GB for efficiency. Calculate the size of the commands at * the same time; each command is one 64 byte frame plus a set of * additional frames for holding sg lists or other data. * The assumption here is that the SG list will start at the second * frame and not use the unused bytes in the first frame. While this * isn't technically correct, it simplifies the calculation and allows * for command frames that might be larger than an mfi_io_frame. */ if (sizeof(bus_addr_t) == 8) { sc->mfi_sge_size = sizeof(struct mfi_sg64); sc->mfi_flags |= MFI_FLAGS_SG64; } else { sc->mfi_sge_size = sizeof(struct mfi_sg32); } if (sc->mfi_flags & MFI_FLAGS_SKINNY) sc->mfi_sge_size = sizeof(struct mfi_sg_skinny); frames = (sc->mfi_sge_size * sc->mfi_max_sge - 1) / MFI_FRAME_SIZE + 2; sc->mfi_cmd_size = frames * MFI_FRAME_SIZE; framessz = sc->mfi_cmd_size * sc->mfi_max_fw_cmds; if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 64, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ framessz, /* maxsize */ 1, /* nsegments */ framessz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_frames_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate frame DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_frames_dmat, (void **)&sc->mfi_frames, BUS_DMA_NOWAIT, &sc->mfi_frames_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate frames memory\n"); return (ENOMEM); } bzero(sc->mfi_frames, framessz); bus_dmamap_load(sc->mfi_frames_dmat, sc->mfi_frames_dmamap, sc->mfi_frames, framessz, mfi_addr_cb, &sc->mfi_frames_busaddr,0); /* * Allocate DMA memory for the frame sense data. Keep them in the * lower 4GB for efficiency */ sensesz = sc->mfi_max_fw_cmds * MFI_SENSE_LEN; if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 4, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sensesz, /* maxsize */ 1, /* nsegments */ sensesz, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_sense_dmat)) { device_printf(sc->mfi_dev, "Cannot allocate sense DMA tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_sense_dmat, (void **)&sc->mfi_sense, BUS_DMA_NOWAIT, &sc->mfi_sense_dmamap)) { device_printf(sc->mfi_dev, "Cannot allocate sense memory\n"); return (ENOMEM); } bus_dmamap_load(sc->mfi_sense_dmat, sc->mfi_sense_dmamap, sc->mfi_sense, sensesz, mfi_addr_cb, &sc->mfi_sense_busaddr, 0); if ((error = mfi_alloc_commands(sc)) != 0) return (error); /* Before moving the FW to operational state, check whether * hostmemory is required by the FW or not */ /* ThunderBolt MFI_IOC2 INIT */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { sc->mfi_disable_intr(sc); mtx_lock(&sc->mfi_io_lock); if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) { device_printf(sc->mfi_dev, "TB Init has failed with error %d\n",error); mtx_unlock(&sc->mfi_io_lock); return error; } mtx_unlock(&sc->mfi_io_lock); if ((error = mfi_tbolt_alloc_cmd(sc)) != 0) return error; if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr_tbolt, sc, &sc->mfi_intr)) { device_printf(sc->mfi_dev, "Cannot set up interrupt\n"); return (EINVAL); } sc->mfi_intr_ptr = mfi_intr_tbolt; sc->mfi_enable_intr(sc); } else { if ((error = mfi_comms_init(sc)) != 0) return (error); if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq, INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr, sc, &sc->mfi_intr)) { device_printf(sc->mfi_dev, "Cannot set up interrupt\n"); return (EINVAL); } sc->mfi_intr_ptr = mfi_intr; sc->mfi_enable_intr(sc); } if ((error = mfi_get_controller_info(sc)) != 0) return (error); sc->disableOnlineCtrlReset = 0; /* Register a config hook to probe the bus for arrays */ sc->mfi_ich.ich_func = mfi_startup; sc->mfi_ich.ich_arg = sc; if (config_intrhook_establish(&sc->mfi_ich) != 0) { device_printf(sc->mfi_dev, "Cannot establish configuration " "hook\n"); return (EINVAL); } mtx_lock(&sc->mfi_io_lock); if ((error = mfi_aen_setup(sc, 0), 0) != 0) { mtx_unlock(&sc->mfi_io_lock); return (error); } mtx_unlock(&sc->mfi_io_lock); /* * Register a shutdown handler. */ if ((sc->mfi_eh = EVENTHANDLER_REGISTER(shutdown_final, mfi_shutdown, sc, SHUTDOWN_PRI_DEFAULT)) == NULL) { device_printf(sc->mfi_dev, "Warning: shutdown event " "registration failed\n"); } /* * Create the control device for doing management */ unit = device_get_unit(sc->mfi_dev); sc->mfi_cdev = make_dev(&mfi_cdevsw, unit, UID_ROOT, GID_OPERATOR, 0640, "mfi%d", unit); if (unit == 0) make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_t, sc->mfi_cdev, "%s", "megaraid_sas_ioctl_node"); if (sc->mfi_cdev != NULL) sc->mfi_cdev->si_drv1 = sc; SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)), OID_AUTO, "delete_busy_volumes", CTLFLAG_RW, &sc->mfi_delete_busy_volumes, 0, "Allow removal of busy volumes"); SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)), OID_AUTO, "keep_deleted_volumes", CTLFLAG_RW, &sc->mfi_keep_deleted_volumes, 0, "Don't detach the mfid device for a busy volume that is deleted"); device_add_child(sc->mfi_dev, "mfip", -1); bus_generic_attach(sc->mfi_dev); /* Start the timeout watchdog */ callout_init(&sc->mfi_watchdog_callout, 1); callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); if (sc->mfi_flags & MFI_FLAGS_TBOLT) { mtx_lock(&sc->mfi_io_lock); mfi_tbolt_sync_map_info(sc); mtx_unlock(&sc->mfi_io_lock); } return (0); } static int mfi_alloc_commands(struct mfi_softc *sc) { struct mfi_command *cm; int i, j; /* * XXX Should we allocate all the commands up front, or allocate on * demand later like 'aac' does? */ sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) * sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO); for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames + sc->mfi_cmd_size * i); cm->cm_frame_busaddr = sc->mfi_frames_busaddr + sc->mfi_cmd_size * i; cm->cm_frame->header.context = i; cm->cm_sense = &sc->mfi_sense[i]; cm->cm_sense_busaddr= sc->mfi_sense_busaddr + MFI_SENSE_LEN * i; cm->cm_sc = sc; cm->cm_index = i; if (bus_dmamap_create(sc->mfi_buffer_dmat, 0, &cm->cm_dmamap) == 0) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } else { device_printf(sc->mfi_dev, "Failed to allocate %d " "command blocks, only allocated %d\n", sc->mfi_max_fw_cmds, i - 1); for (j = 0; j < i; j++) { cm = &sc->mfi_commands[i]; bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap); } free(sc->mfi_commands, M_MFIBUF); sc->mfi_commands = NULL; return (ENOMEM); } } return (0); } void mfi_release_command(struct mfi_command *cm) { struct mfi_frame_header *hdr; uint32_t *hdr_data; mtx_assert(&cm->cm_sc->mfi_io_lock, MA_OWNED); /* * Zero out the important fields of the frame, but make sure the * context field is preserved. For efficiency, handle the fields * as 32 bit words. Clear out the first S/G entry too for safety. */ hdr = &cm->cm_frame->header; if (cm->cm_data != NULL && hdr->sg_count) { cm->cm_sg->sg32[0].len = 0; cm->cm_sg->sg32[0].addr = 0; } /* * Command may be on other queues e.g. busy queue depending on the * flow of a previous call to mfi_mapcmd, so ensure its dequeued * properly */ if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) mfi_remove_busy(cm); if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0) mfi_remove_ready(cm); /* We're not expecting it to be on any other queue but check */ if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) { panic("Command %p is still on another queue, flags = %#x", cm, cm->cm_flags); } /* tbolt cleanup */ if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) { mfi_tbolt_return_cmd(cm->cm_sc, cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1], cm); } hdr_data = (uint32_t *)cm->cm_frame; hdr_data[0] = 0; /* cmd, sense_len, cmd_status, scsi_status */ hdr_data[1] = 0; /* target_id, lun_id, cdb_len, sg_count */ hdr_data[4] = 0; /* flags, timeout */ hdr_data[5] = 0; /* data_len */ cm->cm_extra_frames = 0; cm->cm_flags = 0; cm->cm_complete = NULL; cm->cm_private = NULL; cm->cm_data = NULL; cm->cm_sg = 0; cm->cm_total_frame_size = 0; cm->retry_for_fw_reset = 0; mfi_enqueue_free(cm); } int mfi_dcmd_command(struct mfi_softc *sc, struct mfi_command **cmp, uint32_t opcode, void **bufp, size_t bufsize) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; void *buf = NULL; uint32_t context = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); cm = mfi_dequeue_free(sc); if (cm == NULL) return (EBUSY); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; if ((bufsize > 0) && (bufp != NULL)) { if (*bufp == NULL) { buf = malloc(bufsize, M_MFIBUF, M_NOWAIT|M_ZERO); if (buf == NULL) { mfi_release_command(cm); return (ENOMEM); } *bufp = buf; } else { buf = *bufp; } } dcmd = &cm->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); dcmd->header.cmd = MFI_CMD_DCMD; dcmd->header.timeout = 0; dcmd->header.flags = 0; dcmd->header.data_len = bufsize; dcmd->header.scsi_status = 0; dcmd->opcode = opcode; cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_flags = 0; cm->cm_data = buf; cm->cm_private = buf; cm->cm_len = bufsize; *cmp = cm; if ((bufp != NULL) && (*bufp == NULL) && (buf != NULL)) *bufp = buf; return (0); } static int mfi_comms_init(struct mfi_softc *sc) { struct mfi_command *cm; struct mfi_init_frame *init; struct mfi_init_qinfo *qinfo; int error; uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; /* * Abuse the SG list area of the frame to hold the init_qinfo * object; */ init = &cm->cm_frame->init; qinfo = (struct mfi_init_qinfo *)((uintptr_t)init + MFI_FRAME_SIZE); bzero(qinfo, sizeof(struct mfi_init_qinfo)); qinfo->rq_entries = sc->mfi_max_fw_cmds + 1; qinfo->rq_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_reply_q); qinfo->pi_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_pi); qinfo->ci_addr_lo = sc->mfi_comms_busaddr + offsetof(struct mfi_hwcomms, hw_ci); init->header.cmd = MFI_CMD_INIT; init->header.data_len = sizeof(struct mfi_init_qinfo); init->qinfo_new_addr_lo = cm->cm_frame_busaddr + MFI_FRAME_SIZE; cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed to send init command\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_get_controller_info(struct mfi_softc *sc) { struct mfi_command *cm = NULL; struct mfi_ctrl_info *ci = NULL; uint32_t max_sectors_1, max_sectors_2; int error; mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_GETINFO, (void **)&ci, sizeof(*ci)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get controller info\n"); sc->mfi_max_io = (sc->mfi_max_sge - 1) * PAGE_SIZE / MFI_SECTOR_LEN; error = 0; goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); max_sectors_1 = (1 << ci->stripe_sz_ops.max) * ci->max_strips_per_io; max_sectors_2 = ci->max_request_size; sc->mfi_max_io = min(max_sectors_1, max_sectors_2); sc->disableOnlineCtrlReset = ci->properties.OnOffProperties.disableOnlineCtrlReset; out: if (ci) free(ci, M_MFIBUF); if (cm) mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state) { struct mfi_command *cm = NULL; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO, (void **)log_state, sizeof(**log_state)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get log state\n"); goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); out: if (cm) mfi_release_command(cm); return (error); } int mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start) { struct mfi_evt_log_state *log_state = NULL; union mfi_evt class_locale; int error = 0; uint32_t seq; mtx_assert(&sc->mfi_io_lock, MA_OWNED); class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; if (seq_start == 0) { if ((error = mfi_get_log_state(sc, &log_state)) != 0) goto out; sc->mfi_boot_seq_num = log_state->boot_seq_num; /* * Walk through any events that fired since the last * shutdown. */ if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num, log_state->newest_seq_num)) != 0) goto out; seq = log_state->newest_seq_num; } else seq = seq_start; error = mfi_aen_register(sc, seq, class_locale.word); out: free(log_state, M_MFIBUF); return (error); } int mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm) { mtx_assert(&sc->mfi_io_lock, MA_OWNED); cm->cm_complete = NULL; /* * MegaCli can issue a DCMD of 0. In this case do nothing * and return 0 to it as status */ if (cm->cm_frame->dcmd.opcode == 0) { cm->cm_frame->header.cmd_status = MFI_STAT_OK; cm->cm_error = 0; return (cm->cm_error); } mfi_enqueue_ready(cm); mfi_startio(sc); if ((cm->cm_flags & MFI_CMD_COMPLETED) == 0) msleep(cm, &sc->mfi_io_lock, PRIBIO, "mfiwait", 0); return (cm->cm_error); } void mfi_free(struct mfi_softc *sc) { struct mfi_command *cm; int i; callout_drain(&sc->mfi_watchdog_callout); if (sc->mfi_cdev != NULL) destroy_dev(sc->mfi_cdev); if (sc->mfi_commands != NULL) { for (i = 0; i < sc->mfi_max_fw_cmds; i++) { cm = &sc->mfi_commands[i]; bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap); } free(sc->mfi_commands, M_MFIBUF); sc->mfi_commands = NULL; } if (sc->mfi_intr) bus_teardown_intr(sc->mfi_dev, sc->mfi_irq, sc->mfi_intr); if (sc->mfi_irq != NULL) bus_release_resource(sc->mfi_dev, SYS_RES_IRQ, sc->mfi_irq_rid, sc->mfi_irq); if (sc->mfi_sense_busaddr != 0) bus_dmamap_unload(sc->mfi_sense_dmat, sc->mfi_sense_dmamap); if (sc->mfi_sense != NULL) bus_dmamem_free(sc->mfi_sense_dmat, sc->mfi_sense, sc->mfi_sense_dmamap); if (sc->mfi_sense_dmat != NULL) bus_dma_tag_destroy(sc->mfi_sense_dmat); if (sc->mfi_frames_busaddr != 0) bus_dmamap_unload(sc->mfi_frames_dmat, sc->mfi_frames_dmamap); if (sc->mfi_frames != NULL) bus_dmamem_free(sc->mfi_frames_dmat, sc->mfi_frames, sc->mfi_frames_dmamap); if (sc->mfi_frames_dmat != NULL) bus_dma_tag_destroy(sc->mfi_frames_dmat); if (sc->mfi_comms_busaddr != 0) bus_dmamap_unload(sc->mfi_comms_dmat, sc->mfi_comms_dmamap); if (sc->mfi_comms != NULL) bus_dmamem_free(sc->mfi_comms_dmat, sc->mfi_comms, sc->mfi_comms_dmamap); if (sc->mfi_comms_dmat != NULL) bus_dma_tag_destroy(sc->mfi_comms_dmat); /* ThunderBolt contiguous memory free here */ if (sc->mfi_flags & MFI_FLAGS_TBOLT) { if (sc->mfi_tb_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_dmat, sc->mfi_tb_dmamap); if (sc->request_message_pool != NULL) bus_dmamem_free(sc->mfi_tb_dmat, sc->request_message_pool, sc->mfi_tb_dmamap); if (sc->mfi_tb_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_dmat); /* Version buffer memory free */ /* Start LSIP200113393 */ if (sc->verbuf_h_busaddr != 0) bus_dmamap_unload(sc->verbuf_h_dmat, sc->verbuf_h_dmamap); if (sc->verbuf != NULL) bus_dmamem_free(sc->verbuf_h_dmat, sc->verbuf, sc->verbuf_h_dmamap); if (sc->verbuf_h_dmat != NULL) bus_dma_tag_destroy(sc->verbuf_h_dmat); /* End LSIP200113393 */ /* ThunderBolt INIT packet memory Free */ if (sc->mfi_tb_init_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap); if (sc->mfi_tb_init != NULL) bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init, sc->mfi_tb_init_dmamap); if (sc->mfi_tb_init_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_init_dmat); /* ThunderBolt IOC Init Desc memory free here */ if (sc->mfi_tb_ioc_init_busaddr != 0) bus_dmamap_unload(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_dmamap); if (sc->mfi_tb_ioc_init_desc != NULL) bus_dmamem_free(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_desc, sc->mfi_tb_ioc_init_dmamap); if (sc->mfi_tb_ioc_init_dmat != NULL) bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat); if (sc->mfi_cmd_pool_tbolt != NULL) { for (int i = 0; i < sc->mfi_max_fw_cmds; i++) { if (sc->mfi_cmd_pool_tbolt[i] != NULL) { free(sc->mfi_cmd_pool_tbolt[i], M_MFIBUF); sc->mfi_cmd_pool_tbolt[i] = NULL; } } free(sc->mfi_cmd_pool_tbolt, M_MFIBUF); sc->mfi_cmd_pool_tbolt = NULL; } if (sc->request_desc_pool != NULL) { free(sc->request_desc_pool, M_MFIBUF); sc->request_desc_pool = NULL; } } if (sc->mfi_buffer_dmat != NULL) bus_dma_tag_destroy(sc->mfi_buffer_dmat); if (sc->mfi_parent_dmat != NULL) bus_dma_tag_destroy(sc->mfi_parent_dmat); if (mtx_initialized(&sc->mfi_io_lock)) { mtx_destroy(&sc->mfi_io_lock); sx_destroy(&sc->mfi_config_lock); } return; } static void mfi_startup(void *arg) { struct mfi_softc *sc; sc = (struct mfi_softc *)arg; config_intrhook_disestablish(&sc->mfi_ich); sc->mfi_enable_intr(sc); sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_ldprobe(sc); if (sc->mfi_flags & MFI_FLAGS_SKINNY) mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } static void mfi_intr(void *arg) { struct mfi_softc *sc; struct mfi_command *cm; uint32_t pi, ci, context; sc = (struct mfi_softc *)arg; if (sc->mfi_check_clear_intr(sc)) return; restart: pi = sc->mfi_comms->hw_pi; ci = sc->mfi_comms->hw_ci; mtx_lock(&sc->mfi_io_lock); while (ci != pi) { context = sc->mfi_comms->hw_reply_q[ci]; if (context < sc->mfi_max_fw_cmds) { cm = &sc->mfi_commands[context]; mfi_remove_busy(cm); cm->cm_error = 0; mfi_complete(sc, cm); } if (++ci == (sc->mfi_max_fw_cmds + 1)) ci = 0; } sc->mfi_comms->hw_ci = ci; /* Give defered I/O a chance to run */ sc->mfi_flags &= ~MFI_FLAGS_QFRZN; mfi_startio(sc); mtx_unlock(&sc->mfi_io_lock); /* * Dummy read to flush the bus; this ensures that the indexes are up * to date. Restart processing if more commands have come it. */ (void)sc->mfi_read_fw_status(sc); if (pi != sc->mfi_comms->hw_pi) goto restart; return; } int mfi_shutdown(struct mfi_softc *sc) { struct mfi_dcmd_frame *dcmd; struct mfi_command *cm; int error; if (sc->mfi_aen_cm != NULL) { sc->cm_aen_abort = 1; mfi_abort(sc, &sc->mfi_aen_cm); } if (sc->mfi_map_sync_cm != NULL) { sc->cm_map_abort = 1; mfi_abort(sc, &sc->mfi_map_sync_cm); } mtx_lock(&sc->mfi_io_lock); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0); if (error) { mtx_unlock(&sc->mfi_io_lock); return (error); } dcmd = &cm->cm_frame->dcmd; dcmd->header.flags = MFI_FRAME_DIR_NONE; cm->cm_flags = MFI_CMD_POLLED; cm->cm_data = NULL; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "Failed to shutdown controller\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); return (error); } static void mfi_syspdprobe(struct mfi_softc *sc) { struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_pd_list *pdlist = NULL; struct mfi_system_pd *syspd, *tmp; struct mfi_system_pending *syspd_pend; int error, i, found; sx_assert(&sc->mfi_config_lock, SA_XLOCKED); mtx_assert(&sc->mfi_io_lock, MA_OWNED); /* Add SYSTEM PD's */ error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_LIST_QUERY, (void **)&pdlist, sizeof(*pdlist)); if (error) { device_printf(sc->mfi_dev, "Error while forming SYSTEM PD list\n"); goto out; } cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; cm->cm_frame->dcmd.mbox[0] = MR_PD_QUERY_TYPE_EXPOSED_TO_HOST; cm->cm_frame->dcmd.mbox[1] = 0; if (mfi_mapcmd(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get syspd device listing\n"); goto out; } bus_dmamap_sync(sc->mfi_buffer_dmat,cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); hdr = &cm->cm_frame->header; if (hdr->cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "MFI_DCMD_PD_LIST_QUERY failed %x\n", hdr->cmd_status); goto out; } /* Get each PD and add it to the system */ for (i = 0; i < pdlist->count; i++) { if (pdlist->addr[i].device_id == pdlist->addr[i].encl_device_id) continue; found = 0; TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == pdlist->addr[i].device_id) found = 1; } TAILQ_FOREACH(syspd_pend, &sc->mfi_syspd_pend_tqh, pd_link) { if (syspd_pend->pd_id == pdlist->addr[i].device_id) found = 1; } if (found == 0) mfi_add_sys_pd(sc, pdlist->addr[i].device_id); } /* Delete SYSPD's whose state has been changed */ TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) { found = 0; for (i = 0; i < pdlist->count; i++) { if (syspd->pd_id == pdlist->addr[i].device_id) { found = 1; break; } } if (found == 0) { printf("DELETE\n"); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); device_delete_child(sc->mfi_dev, syspd->pd_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } } out: if (pdlist) free(pdlist, M_MFIBUF); if (cm) mfi_release_command(cm); return; } static void mfi_ldprobe(struct mfi_softc *sc) { struct mfi_frame_header *hdr; struct mfi_command *cm = NULL; struct mfi_ld_list *list = NULL; struct mfi_disk *ld; struct mfi_disk_pending *ld_pend; int error, i; sx_assert(&sc->mfi_config_lock, SA_XLOCKED); mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST, (void **)&list, sizeof(*list)); if (error) goto out; cm->cm_flags = MFI_CMD_DATAIN; if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get device listing\n"); goto out; } hdr = &cm->cm_frame->header; if (hdr->cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "MFI_DCMD_LD_GET_LIST failed %x\n", hdr->cmd_status); goto out; } for (i = 0; i < list->ld_count; i++) { TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == list->ld_list[i].ld.v.target_id) goto skip_add; } TAILQ_FOREACH(ld_pend, &sc->mfi_ld_pend_tqh, ld_link) { if (ld_pend->ld_id == list->ld_list[i].ld.v.target_id) goto skip_add; } mfi_add_ld(sc, list->ld_list[i].ld.v.target_id); skip_add:; } out: if (list) free(list, M_MFIBUF); if (cm) mfi_release_command(cm); return; } /* * The timestamp is the number of seconds since 00:00 Jan 1, 2000. If * the bits in 24-31 are all set, then it is the number of seconds since * boot. */ static const char * format_timestamp(uint32_t timestamp) { static char buffer[32]; if ((timestamp & 0xff000000) == 0xff000000) snprintf(buffer, sizeof(buffer), "boot + %us", timestamp & 0x00ffffff); else snprintf(buffer, sizeof(buffer), "%us", timestamp); return (buffer); } static const char * format_class(int8_t class) { static char buffer[6]; switch (class) { case MFI_EVT_CLASS_DEBUG: return ("debug"); case MFI_EVT_CLASS_PROGRESS: return ("progress"); case MFI_EVT_CLASS_INFO: return ("info"); case MFI_EVT_CLASS_WARNING: return ("WARN"); case MFI_EVT_CLASS_CRITICAL: return ("CRIT"); case MFI_EVT_CLASS_FATAL: return ("FATAL"); case MFI_EVT_CLASS_DEAD: return ("DEAD"); default: snprintf(buffer, sizeof(buffer), "%d", class); return (buffer); } } static void mfi_decode_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail) { struct mfi_system_pd *syspd = NULL; device_printf(sc->mfi_dev, "%d (%s/0x%04x/%s) - %s\n", detail->seq, format_timestamp(detail->time), detail->evt_class.members.locale, format_class(detail->evt_class.members.evt_class), detail->description); /* Don't act on old AEN's or while shutting down */ if (detail->seq < sc->mfi_boot_seq_num || sc->mfi_detaching) return; switch (detail->arg_type) { case MR_EVT_ARGS_NONE: if (detail->code == MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED) { device_printf(sc->mfi_dev, "HostBus scan raised\n"); if (mfi_detect_jbod_change) { /* * Probe for new SYSPD's and Delete * invalid SYSPD's */ sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } } break; case MR_EVT_ARGS_LD_STATE: /* During load time driver reads all the events starting * from the one that has been logged after shutdown. Avoid * these old events. */ if (detail->args.ld_state.new_state == MFI_LD_STATE_OFFLINE ) { /* Remove the LD */ struct mfi_disk *ld; TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == detail->args.ld_state.ld.target_id) break; } /* Fix: for kernel panics when SSCD is removed KASSERT(ld != NULL, ("volume dissappeared")); */ if (ld != NULL) { mtx_lock(&Giant); device_delete_child(sc->mfi_dev, ld->ld_dev); mtx_unlock(&Giant); } } break; case MR_EVT_ARGS_PD: if (detail->code == MR_EVT_PD_REMOVED) { if (mfi_detect_jbod_change) { /* * If the removed device is a SYSPD then * delete it */ TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == detail->args.pd.device_id) { mtx_lock(&Giant); device_delete_child( sc->mfi_dev, syspd->pd_dev); mtx_unlock(&Giant); break; } } } } if (detail->code == MR_EVT_PD_INSERTED) { if (mfi_detect_jbod_change) { /* Probe for new SYSPD's */ sx_xlock(&sc->mfi_config_lock); mtx_lock(&sc->mfi_io_lock); mfi_syspdprobe(sc); mtx_unlock(&sc->mfi_io_lock); sx_xunlock(&sc->mfi_config_lock); } } if (sc->mfi_cam_rescan_cb != NULL && (detail->code == MR_EVT_PD_INSERTED || detail->code == MR_EVT_PD_REMOVED)) { sc->mfi_cam_rescan_cb(sc, detail->args.pd.device_id); } break; } } static void mfi_queue_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail) { struct mfi_evt_queue_elm *elm; mtx_assert(&sc->mfi_io_lock, MA_OWNED); elm = malloc(sizeof(*elm), M_MFIBUF, M_NOWAIT|M_ZERO); if (elm == NULL) return; memcpy(&elm->detail, detail, sizeof(*detail)); TAILQ_INSERT_TAIL(&sc->mfi_evt_queue, elm, link); taskqueue_enqueue(taskqueue_swi, &sc->mfi_evt_task); } static void mfi_handle_evt(void *context, int pending) { TAILQ_HEAD(,mfi_evt_queue_elm) queue; struct mfi_softc *sc; struct mfi_evt_queue_elm *elm; sc = context; TAILQ_INIT(&queue); mtx_lock(&sc->mfi_io_lock); TAILQ_CONCAT(&queue, &sc->mfi_evt_queue, link); mtx_unlock(&sc->mfi_io_lock); while ((elm = TAILQ_FIRST(&queue)) != NULL) { TAILQ_REMOVE(&queue, elm, link); mfi_decode_evt(sc, &elm->detail); free(elm, M_MFIBUF); } } static int mfi_aen_register(struct mfi_softc *sc, int seq, int locale) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; union mfi_evt current_aen, prior_aen; struct mfi_evt_detail *ed = NULL; int error = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); current_aen.word = locale; if (sc->mfi_aen_cm != NULL) { prior_aen.word = ((uint32_t *)&sc->mfi_aen_cm->cm_frame->dcmd.mbox)[1]; if (prior_aen.members.evt_class <= current_aen.members.evt_class && !((prior_aen.members.locale & current_aen.members.locale) ^current_aen.members.locale)) { return (0); } else { prior_aen.members.locale |= current_aen.members.locale; if (prior_aen.members.evt_class < current_aen.members.evt_class) current_aen.members.evt_class = prior_aen.members.evt_class; mfi_abort(sc, &sc->mfi_aen_cm); } } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT, (void **)&ed, sizeof(*ed)); if (error) goto out; dcmd = &cm->cm_frame->dcmd; ((uint32_t *)&dcmd->mbox)[0] = seq; ((uint32_t *)&dcmd->mbox)[1] = locale; cm->cm_flags = MFI_CMD_DATAIN; cm->cm_complete = mfi_aen_complete; sc->last_seq_num = seq; sc->mfi_aen_cm = cm; mfi_enqueue_ready(cm); mfi_startio(sc); out: return (error); } static void mfi_aen_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_softc *sc; struct mfi_evt_detail *detail; struct mfi_aen *mfi_aen_entry, *tmp; int seq = 0, aborted = 0; sc = cm->cm_sc; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if (sc->mfi_aen_cm == NULL) return; hdr = &cm->cm_frame->header; if (sc->cm_aen_abort || hdr->cmd_status == MFI_STAT_INVALID_STATUS) { sc->cm_aen_abort = 0; aborted = 1; } else { sc->mfi_aen_triggered = 1; if (sc->mfi_poll_waiting) { sc->mfi_poll_waiting = 0; selwakeup(&sc->mfi_select); } detail = cm->cm_data; mfi_queue_evt(sc, detail); seq = detail->seq + 1; TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link, tmp) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); PROC_LOCK(mfi_aen_entry->p); kern_psignal(mfi_aen_entry->p, SIGIO); PROC_UNLOCK(mfi_aen_entry->p); free(mfi_aen_entry, M_MFIBUF); } } free(cm->cm_data, M_MFIBUF); wakeup(&sc->mfi_aen_cm); sc->mfi_aen_cm = NULL; mfi_release_command(cm); /* set it up again so the driver can catch more events */ if (!aborted) mfi_aen_setup(sc, seq); } #define MAX_EVENTS 15 static int mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; struct mfi_evt_list *el; union mfi_evt class_locale; int error, i, seq, size; mtx_assert(&sc->mfi_io_lock, MA_OWNED); class_locale.members.reserved = 0; class_locale.members.locale = mfi_event_locale; class_locale.members.evt_class = mfi_event_class; size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail) * (MAX_EVENTS - 1); el = malloc(size, M_MFIBUF, M_NOWAIT | M_ZERO); if (el == NULL) return (ENOMEM); for (seq = start_seq;;) { if ((cm = mfi_dequeue_free(sc)) == NULL) { free(el, M_MFIBUF); return (EBUSY); } dcmd = &cm->cm_frame->dcmd; bzero(dcmd->mbox, MFI_MBOX_SIZE); dcmd->header.cmd = MFI_CMD_DCMD; dcmd->header.timeout = 0; dcmd->header.data_len = size; dcmd->opcode = MFI_DCMD_CTRL_EVENT_GET; ((uint32_t *)&dcmd->mbox)[0] = seq; ((uint32_t *)&dcmd->mbox)[1] = class_locale.word; cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; cm->cm_data = el; cm->cm_len = size; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get controller entries\n"); mfi_release_command(cm); break; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) { mfi_release_command(cm); break; } if (dcmd->header.cmd_status != MFI_STAT_OK) { device_printf(sc->mfi_dev, "Error %d fetching controller entries\n", dcmd->header.cmd_status); mfi_release_command(cm); error = EIO; break; } mfi_release_command(cm); for (i = 0; i < el->count; i++) { /* * If this event is newer than 'stop_seq' then * break out of the loop. Note that the log * is a circular buffer so we have to handle * the case that our stop point is earlier in * the buffer than our start point. */ if (el->event[i].seq >= stop_seq) { if (start_seq <= stop_seq) break; else if (el->event[i].seq < start_seq) break; } mfi_queue_evt(sc, &el->event[i]); } seq = el->event[el->count - 1].seq + 1; } free(el, M_MFIBUF); return (error); } static int mfi_add_ld(struct mfi_softc *sc, int id) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd = NULL; struct mfi_ld_info *ld_info = NULL; struct mfi_disk_pending *ld_pend; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); ld_pend = malloc(sizeof(*ld_pend), M_MFIBUF, M_NOWAIT | M_ZERO); if (ld_pend != NULL) { ld_pend->ld_id = id; TAILQ_INSERT_TAIL(&sc->mfi_ld_pend_tqh, ld_pend, ld_link); } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_INFO, (void **)&ld_info, sizeof(*ld_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocate for MFI_DCMD_LD_GET_INFO %d\n", error); if (ld_info) free(ld_info, M_MFIBUF); return (error); } cm->cm_flags = MFI_CMD_DATAIN; dcmd = &cm->cm_frame->dcmd; dcmd->mbox[0] = id; if (mfi_wait_command(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to get logical drive: %d\n", id); free(ld_info, M_MFIBUF); return (0); } if (ld_info->ld_config.params.isSSCD != 1) mfi_add_ld_complete(cm); else { mfi_release_command(cm); if (ld_info) /* SSCD drives ld_info free here */ free(ld_info, M_MFIBUF); } return (0); } static void mfi_add_ld_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_ld_info *ld_info; struct mfi_softc *sc; device_t child; sc = cm->cm_sc; hdr = &cm->cm_frame->header; ld_info = cm->cm_private; if (sc->cm_map_abort || hdr->cmd_status != MFI_STAT_OK) { free(ld_info, M_MFIBUF); wakeup(&sc->mfi_map_sync_cm); mfi_release_command(cm); return; } wakeup(&sc->mfi_map_sync_cm); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); if ((child = device_add_child(sc->mfi_dev, "mfid", -1)) == NULL) { device_printf(sc->mfi_dev, "Failed to add logical disk\n"); free(ld_info, M_MFIBUF); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); return; } device_set_ivars(child, ld_info); device_set_desc(child, "MFI Logical Disk"); bus_generic_attach(sc->mfi_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } static int mfi_add_sys_pd(struct mfi_softc *sc, int id) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd = NULL; struct mfi_pd_info *pd_info = NULL; struct mfi_system_pending *syspd_pend; int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); syspd_pend = malloc(sizeof(*syspd_pend), M_MFIBUF, M_NOWAIT | M_ZERO); if (syspd_pend != NULL) { syspd_pend->pd_id = id; TAILQ_INSERT_TAIL(&sc->mfi_syspd_pend_tqh, syspd_pend, pd_link); } error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_GET_INFO, (void **)&pd_info, sizeof(*pd_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocated for MFI_DCMD_PD_GET_INFO %d\n", error); if (pd_info) free(pd_info, M_MFIBUF); return (error); } cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED; dcmd = &cm->cm_frame->dcmd; dcmd->mbox[0]=id; dcmd->header.scsi_status = 0; dcmd->header.pad0 = 0; if ((error = mfi_mapcmd(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Failed to get physical drive info %d\n", id); free(pd_info, M_MFIBUF); mfi_release_command(cm); return (error); } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_add_sys_pd_complete(cm); return (0); } static void mfi_add_sys_pd_complete(struct mfi_command *cm) { struct mfi_frame_header *hdr; struct mfi_pd_info *pd_info; struct mfi_softc *sc; device_t child; sc = cm->cm_sc; hdr = &cm->cm_frame->header; pd_info = cm->cm_private; if (hdr->cmd_status != MFI_STAT_OK) { free(pd_info, M_MFIBUF); mfi_release_command(cm); return; } if (pd_info->fw_state != MFI_PD_STATE_SYSTEM) { device_printf(sc->mfi_dev, "PD=%x is not SYSTEM PD\n", pd_info->ref.v.device_id); free(pd_info, M_MFIBUF); mfi_release_command(cm); return; } mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); if ((child = device_add_child(sc->mfi_dev, "mfisyspd", -1)) == NULL) { device_printf(sc->mfi_dev, "Failed to add system pd\n"); free(pd_info, M_MFIBUF); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); return; } device_set_ivars(child, pd_info); device_set_desc(child, "MFI System PD"); bus_generic_attach(sc->mfi_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } static struct mfi_command * mfi_bio_command(struct mfi_softc *sc) { struct bio *bio; struct mfi_command *cm = NULL; /*reserving two commands to avoid starvation for IOCTL*/ if (sc->mfi_qstat[MFIQ_FREE].q_length < 2) { return (NULL); } if ((bio = mfi_dequeue_bio(sc)) == NULL) { return (NULL); } if ((uintptr_t)bio->bio_driver2 == MFI_LD_IO) { cm = mfi_build_ldio(sc, bio); } else if ((uintptr_t) bio->bio_driver2 == MFI_SYS_PD_IO) { cm = mfi_build_syspdio(sc, bio); } if (!cm) mfi_enqueue_bio(sc, bio); return cm; } /* * mostly copied from cam/scsi/scsi_all.c:scsi_read_write */ int mfi_build_cdb(int readop, uint8_t byte2, u_int64_t lba, u_int32_t block_count, uint8_t *cdb) { int cdb_len; if (((lba & 0x1fffff) == lba) && ((block_count & 0xff) == block_count) && (byte2 == 0)) { /* We can fit in a 6 byte cdb */ struct scsi_rw_6 *scsi_cmd; scsi_cmd = (struct scsi_rw_6 *)cdb; scsi_cmd->opcode = readop ? READ_6 : WRITE_6; scsi_ulto3b(lba, scsi_cmd->addr); scsi_cmd->length = block_count & 0xff; scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else if (((block_count & 0xffff) == block_count) && ((lba & 0xffffffff) == lba)) { /* Need a 10 byte CDB */ struct scsi_rw_10 *scsi_cmd; scsi_cmd = (struct scsi_rw_10 *)cdb; scsi_cmd->opcode = readop ? READ_10 : WRITE_10; scsi_cmd->byte2 = byte2; scsi_ulto4b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto2b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else if (((block_count & 0xffffffff) == block_count) && ((lba & 0xffffffff) == lba)) { /* Block count is too big for 10 byte CDB use a 12 byte CDB */ struct scsi_rw_12 *scsi_cmd; scsi_cmd = (struct scsi_rw_12 *)cdb; scsi_cmd->opcode = readop ? READ_12 : WRITE_12; scsi_cmd->byte2 = byte2; scsi_ulto4b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto4b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } else { /* * 16 byte CDB. We'll only get here if the LBA is larger * than 2^32 */ struct scsi_rw_16 *scsi_cmd; scsi_cmd = (struct scsi_rw_16 *)cdb; scsi_cmd->opcode = readop ? READ_16 : WRITE_16; scsi_cmd->byte2 = byte2; scsi_u64to8b(lba, scsi_cmd->addr); scsi_cmd->reserved = 0; scsi_ulto4b(block_count, scsi_cmd->length); scsi_cmd->control = 0; cdb_len = sizeof(*scsi_cmd); } return cdb_len; } extern char *unmapped_buf; static struct mfi_command * mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio) { struct mfi_command *cm; struct mfi_pass_frame *pass; uint32_t context = 0; int flags = 0, blkcount = 0, readop; uint8_t cdb_len; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; pass = &cm->cm_frame->pass; bzero(pass->cdb, 16); pass->header.cmd = MFI_CMD_PD_SCSI_IO; switch (bio->bio_cmd) { case BIO_READ: flags = MFI_CMD_DATAIN | MFI_CMD_BIO; readop = 1; break; case BIO_WRITE: flags = MFI_CMD_DATAOUT | MFI_CMD_BIO; readop = 0; break; default: /* TODO: what about BIO_DELETE??? */ panic("Unsupported bio command %x\n", bio->bio_cmd); } /* Cheat with the sector length to avoid a non-constant division */ blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN); /* Fill the LBA and Transfer length in CDB */ cdb_len = mfi_build_cdb(readop, 0, bio->bio_pblkno, blkcount, pass->cdb); pass->header.target_id = (uintptr_t)bio->bio_driver1; pass->header.lun_id = 0; pass->header.timeout = 0; pass->header.flags = 0; pass->header.scsi_status = 0; pass->header.sense_len = MFI_SENSE_LEN; pass->header.data_len = bio->bio_bcount; pass->header.cdb_len = cdb_len; pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); cm->cm_complete = mfi_bio_complete; cm->cm_private = bio; cm->cm_data = unmapped_buf; cm->cm_len = bio->bio_bcount; cm->cm_sg = &pass->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = flags; return (cm); } static struct mfi_command * mfi_build_ldio(struct mfi_softc *sc, struct bio *bio) { struct mfi_io_frame *io; struct mfi_command *cm; int flags; uint32_t blkcount; uint32_t context = 0; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm = mfi_dequeue_free(sc)) == NULL) return (NULL); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; io = &cm->cm_frame->io; switch (bio->bio_cmd) { case BIO_READ: io->header.cmd = MFI_CMD_LD_READ; flags = MFI_CMD_DATAIN | MFI_CMD_BIO; break; case BIO_WRITE: io->header.cmd = MFI_CMD_LD_WRITE; flags = MFI_CMD_DATAOUT | MFI_CMD_BIO; break; default: /* TODO: what about BIO_DELETE??? */ panic("Unsupported bio command %x\n", bio->bio_cmd); } /* Cheat with the sector length to avoid a non-constant division */ blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN); io->header.target_id = (uintptr_t)bio->bio_driver1; io->header.timeout = 0; io->header.flags = 0; io->header.scsi_status = 0; io->header.sense_len = MFI_SENSE_LEN; io->header.data_len = blkcount; io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); io->lba_hi = (bio->bio_pblkno & 0xffffffff00000000) >> 32; io->lba_lo = bio->bio_pblkno & 0xffffffff; cm->cm_complete = mfi_bio_complete; cm->cm_private = bio; cm->cm_data = unmapped_buf; cm->cm_len = bio->bio_bcount; cm->cm_sg = &io->sgl; cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = flags; return (cm); } static void mfi_bio_complete(struct mfi_command *cm) { struct bio *bio; struct mfi_frame_header *hdr; struct mfi_softc *sc; bio = cm->cm_private; hdr = &cm->cm_frame->header; sc = cm->cm_sc; if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) { bio->bio_flags |= BIO_ERROR; bio->bio_error = EIO; device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, " "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status); mfi_print_sense(cm->cm_sc, cm->cm_sense); } else if (cm->cm_error != 0) { bio->bio_flags |= BIO_ERROR; bio->bio_error = cm->cm_error; device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n", cm, cm->cm_error); } mfi_release_command(cm); mfi_disk_complete(bio); } void mfi_startio(struct mfi_softc *sc) { struct mfi_command *cm; struct ccb_hdr *ccbh; for (;;) { /* Don't bother if we're short on resources */ if (sc->mfi_flags & MFI_FLAGS_QFRZN) break; /* Try a command that has already been prepared */ cm = mfi_dequeue_ready(sc); if (cm == NULL) { if ((ccbh = TAILQ_FIRST(&sc->mfi_cam_ccbq)) != NULL) cm = sc->mfi_cam_start(ccbh); } /* Nope, so look for work on the bioq */ if (cm == NULL) cm = mfi_bio_command(sc); /* No work available, so exit */ if (cm == NULL) break; /* Send the command to the controller */ if (mfi_mapcmd(sc, cm) != 0) { device_printf(sc->mfi_dev, "Failed to startio\n"); mfi_requeue_ready(cm); break; } } } int mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm) { int error, polled; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm->cm_data != NULL) && (cm->cm_frame->header.cmd != MFI_CMD_STP )) { polled = (cm->cm_flags & MFI_CMD_POLLED) ? BUS_DMA_NOWAIT : 0; if (cm->cm_flags & MFI_CMD_CCB) error = bus_dmamap_load_ccb(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_data, mfi_data_cb, cm, polled); else if (cm->cm_flags & MFI_CMD_BIO) error = bus_dmamap_load_bio(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_private, mfi_data_cb, cm, polled); else error = bus_dmamap_load(sc->mfi_buffer_dmat, cm->cm_dmamap, cm->cm_data, cm->cm_len, mfi_data_cb, cm, polled); if (error == EINPROGRESS) { sc->mfi_flags |= MFI_FLAGS_QFRZN; return (0); } } else { error = mfi_send_frame(sc, cm); } return (error); } static void mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct mfi_frame_header *hdr; struct mfi_command *cm; union mfi_sgl *sgl; struct mfi_softc *sc; int i, j, first, dir; int sge_size, locked; cm = (struct mfi_command *)arg; sc = cm->cm_sc; hdr = &cm->cm_frame->header; sgl = cm->cm_sg; /* * We need to check if we have the lock as this is async * callback so even though our caller mfi_mapcmd asserts * it has the lock, there is no guarantee that hasn't been * dropped if bus_dmamap_load returned prior to our * completion. */ if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0) mtx_lock(&sc->mfi_io_lock); if (error) { printf("error %d in callback\n", error); cm->cm_error = error; mfi_complete(sc, cm); goto out; } /* Use IEEE sgl only for IO's on a SKINNY controller * For other commands on a SKINNY controller use either * sg32 or sg64 based on the sizeof(bus_addr_t). * Also calculate the total frame size based on the type * of SGL used. */ if (((cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) || (cm->cm_frame->header.cmd == MFI_CMD_LD_READ) || (cm->cm_frame->header.cmd == MFI_CMD_LD_WRITE)) && (sc->mfi_flags & MFI_FLAGS_SKINNY)) { for (i = 0; i < nsegs; i++) { sgl->sg_skinny[i].addr = segs[i].ds_addr; sgl->sg_skinny[i].len = segs[i].ds_len; sgl->sg_skinny[i].flag = 0; } hdr->flags |= MFI_FRAME_IEEE_SGL | MFI_FRAME_SGL64; sge_size = sizeof(struct mfi_sg_skinny); hdr->sg_count = nsegs; } else { j = 0; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { first = cm->cm_stp_len; if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) { sgl->sg32[j].addr = segs[0].ds_addr; sgl->sg32[j++].len = first; } else { sgl->sg64[j].addr = segs[0].ds_addr; sgl->sg64[j++].len = first; } } else first = 0; if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) { for (i = 0; i < nsegs; i++) { sgl->sg32[j].addr = segs[i].ds_addr + first; sgl->sg32[j++].len = segs[i].ds_len - first; first = 0; } } else { for (i = 0; i < nsegs; i++) { sgl->sg64[j].addr = segs[i].ds_addr + first; sgl->sg64[j++].len = segs[i].ds_len - first; first = 0; } hdr->flags |= MFI_FRAME_SGL64; } hdr->sg_count = j; sge_size = sc->mfi_sge_size; } dir = 0; if (cm->cm_flags & MFI_CMD_DATAIN) { dir |= BUS_DMASYNC_PREREAD; hdr->flags |= MFI_FRAME_DIR_READ; } if (cm->cm_flags & MFI_CMD_DATAOUT) { dir |= BUS_DMASYNC_PREWRITE; hdr->flags |= MFI_FRAME_DIR_WRITE; } bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir); cm->cm_flags |= MFI_CMD_MAPPED; /* * Instead of calculating the total number of frames in the * compound frame, it's already assumed that there will be at * least 1 frame, so don't compensate for the modulo of the * following division. */ cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs); cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE; if ((error = mfi_send_frame(sc, cm)) != 0) { printf("error %d in callback from mfi_send_frame\n", error); cm->cm_error = error; mfi_complete(sc, cm); goto out; } out: /* leave the lock in the state we found it */ if (locked == 0) mtx_unlock(&sc->mfi_io_lock); return; } static int mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { int error; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if (sc->MFA_enabled) error = mfi_tbolt_send_frame(sc, cm); else error = mfi_std_send_frame(sc, cm); if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0) mfi_remove_busy(cm); return (error); } static int mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_frame_header *hdr; int tm = mfi_polled_cmd_timeout * 1000; hdr = &cm->cm_frame->header; if ((cm->cm_flags & MFI_CMD_POLLED) == 0) { cm->cm_timestamp = time_uptime; mfi_enqueue_busy(cm); } else { hdr->cmd_status = MFI_STAT_INVALID_STATUS; hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE; } /* * The bus address of the command is aligned on a 64 byte boundary, * leaving the least 6 bits as zero. For whatever reason, the * hardware wants the address shifted right by three, leaving just * 3 zero bits. These three bits are then used as a prefetching * hint for the hardware to predict how many frames need to be * fetched across the bus. If a command has more than 8 frames * then the 3 bits are set to 0x7 and the firmware uses other * information in the command to determine the total amount to fetch. * However, FreeBSD doesn't support I/O larger than 128K, so 8 frames * is enough for both 32bit and 64bit systems. */ if (cm->cm_extra_frames > 7) cm->cm_extra_frames = 7; sc->mfi_issue_cmd(sc, cm->cm_frame_busaddr, cm->cm_extra_frames); if ((cm->cm_flags & MFI_CMD_POLLED) == 0) return (0); /* This is a polled command, so busy-wait for it to complete. */ while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { DELAY(1000); tm -= 1; if (tm <= 0) break; } if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) { device_printf(sc->mfi_dev, "Frame %p timed out " "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode); return (ETIMEDOUT); } return (0); } void mfi_complete(struct mfi_softc *sc, struct mfi_command *cm) { int dir; mtx_assert(&sc->mfi_io_lock, MA_OWNED); if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) { dir = 0; if ((cm->cm_flags & MFI_CMD_DATAIN) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) dir |= BUS_DMASYNC_POSTREAD; if (cm->cm_flags & MFI_CMD_DATAOUT) dir |= BUS_DMASYNC_POSTWRITE; bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); cm->cm_flags &= ~MFI_CMD_MAPPED; } cm->cm_flags |= MFI_CMD_COMPLETED; if (cm->cm_complete != NULL) cm->cm_complete(cm); else wakeup(cm); } static int mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort) { struct mfi_command *cm; struct mfi_abort_frame *abort; int i = 0, error; uint32_t context = 0; mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; abort = &cm->cm_frame->abort; abort->header.cmd = MFI_CMD_ABORT; abort->header.flags = 0; abort->header.scsi_status = 0; abort->abort_context = (*cm_abort)->cm_frame->header.context; abort->abort_mfi_addr_lo = (uint32_t)(*cm_abort)->cm_frame_busaddr; abort->abort_mfi_addr_hi = (uint32_t)((uint64_t)(*cm_abort)->cm_frame_busaddr >> 32); cm->cm_data = NULL; cm->cm_flags = MFI_CMD_POLLED; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed to abort command\n"); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); while (i < 5 && *cm_abort != NULL) { tsleep(cm_abort, 0, "mfiabort", 5 * hz); i++; } if (*cm_abort != NULL) { /* Force a complete if command didn't abort */ mtx_lock(&sc->mfi_io_lock); (*cm_abort)->cm_complete(*cm_abort); mtx_unlock(&sc->mfi_io_lock); } return (error); } int mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, int len) { struct mfi_command *cm; struct mfi_io_frame *io; int error; uint32_t context = 0; if ((cm = mfi_dequeue_free(sc)) == NULL) return (EBUSY); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; io = &cm->cm_frame->io; io->header.cmd = MFI_CMD_LD_WRITE; io->header.target_id = id; io->header.timeout = 0; io->header.flags = 0; io->header.scsi_status = 0; io->header.sense_len = MFI_SENSE_LEN; io->header.data_len = howmany(len, MFI_SECTOR_LEN); io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); io->lba_hi = (lba & 0xffffffff00000000) >> 32; io->lba_lo = lba & 0xffffffff; cm->cm_data = virt; cm->cm_len = len; cm->cm_sg = &io->sgl; cm->cm_total_frame_size = MFI_IO_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_release_command(cm); return (error); } int mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt, int len) { struct mfi_command *cm; struct mfi_pass_frame *pass; int error, readop, cdb_len; uint32_t blkcount; if ((cm = mfi_dequeue_free(sc)) == NULL) return (EBUSY); pass = &cm->cm_frame->pass; bzero(pass->cdb, 16); pass->header.cmd = MFI_CMD_PD_SCSI_IO; readop = 0; blkcount = howmany(len, MFI_SECTOR_LEN); cdb_len = mfi_build_cdb(readop, 0, lba, blkcount, pass->cdb); pass->header.target_id = id; pass->header.timeout = 0; pass->header.flags = 0; pass->header.scsi_status = 0; pass->header.sense_len = MFI_SENSE_LEN; pass->header.data_len = len; pass->header.cdb_len = cdb_len; pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); cm->cm_data = virt; cm->cm_len = len; cm->cm_sg = &pass->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI; if ((error = mfi_mapcmd(sc, cm)) != 0) device_printf(sc->mfi_dev, "failed dump blocks\n"); bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap); mfi_release_command(cm); return (error); } static int mfi_open(struct cdev *dev, int flags, int fmt, struct thread *td) { struct mfi_softc *sc; int error; sc = dev->si_drv1; mtx_lock(&sc->mfi_io_lock); if (sc->mfi_detaching) error = ENXIO; else { sc->mfi_flags |= MFI_FLAGS_OPEN; error = 0; } mtx_unlock(&sc->mfi_io_lock); return (error); } static int mfi_close(struct cdev *dev, int flags, int fmt, struct thread *td) { struct mfi_softc *sc; struct mfi_aen *mfi_aen_entry, *tmp; sc = dev->si_drv1; mtx_lock(&sc->mfi_io_lock); sc->mfi_flags &= ~MFI_FLAGS_OPEN; TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link, tmp) { if (mfi_aen_entry->p == curproc) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); free(mfi_aen_entry, M_MFIBUF); } } mtx_unlock(&sc->mfi_io_lock); return (0); } static int mfi_config_lock(struct mfi_softc *sc, uint32_t opcode) { switch (opcode) { case MFI_DCMD_LD_DELETE: case MFI_DCMD_CFG_ADD: case MFI_DCMD_CFG_CLEAR: case MFI_DCMD_CFG_FOREIGN_IMPORT: sx_xlock(&sc->mfi_config_lock); return (1); default: return (0); } } static void mfi_config_unlock(struct mfi_softc *sc, int locked) { if (locked) sx_xunlock(&sc->mfi_config_lock); } /* * Perform pre-issue checks on commands from userland and possibly veto * them. */ static int mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_disk *ld, *ld2; int error; struct mfi_system_pd *syspd = NULL; uint16_t syspd_id; uint16_t *mbox; mtx_assert(&sc->mfi_io_lock, MA_OWNED); error = 0; switch (cm->cm_frame->dcmd.opcode) { case MFI_DCMD_LD_DELETE: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == cm->cm_frame->dcmd.mbox[0]) break; } if (ld == NULL) error = ENOENT; else error = mfi_disk_disable(ld); break; case MFI_DCMD_CFG_CLEAR: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { error = mfi_disk_disable(ld); if (error) break; } if (error) { TAILQ_FOREACH(ld2, &sc->mfi_ld_tqh, ld_link) { if (ld2 == ld) break; mfi_disk_enable(ld2); } } break; case MFI_DCMD_PD_STATE_SET: mbox = (uint16_t *) cm->cm_frame->dcmd.mbox; syspd_id = mbox[0]; if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) { TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) { if (syspd->pd_id == syspd_id) break; } } else break; if (syspd) error = mfi_syspd_disable(syspd); break; default: break; } return (error); } /* Perform post-issue checks on commands from userland. */ static void mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_disk *ld, *ldn; struct mfi_system_pd *syspd = NULL; uint16_t syspd_id; uint16_t *mbox; switch (cm->cm_frame->dcmd.opcode) { case MFI_DCMD_LD_DELETE: TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == cm->cm_frame->dcmd.mbox[0]) break; } KASSERT(ld != NULL, ("volume dissappeared")); if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) { mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); device_delete_child(sc->mfi_dev, ld->ld_dev); mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } else mfi_disk_enable(ld); break; case MFI_DCMD_CFG_CLEAR: if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) { mtx_unlock(&sc->mfi_io_lock); mtx_lock(&Giant); TAILQ_FOREACH_SAFE(ld, &sc->mfi_ld_tqh, ld_link, ldn) { device_delete_child(sc->mfi_dev, ld->ld_dev); } mtx_unlock(&Giant); mtx_lock(&sc->mfi_io_lock); } else { TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) mfi_disk_enable(ld); } break; case MFI_DCMD_CFG_ADD: mfi_ldprobe(sc); break; case MFI_DCMD_CFG_FOREIGN_IMPORT: mfi_ldprobe(sc); break; case MFI_DCMD_PD_STATE_SET: mbox = (uint16_t *) cm->cm_frame->dcmd.mbox; syspd_id = mbox[0]; if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) { TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh,pd_link) { if (syspd->pd_id == syspd_id) break; } } else break; /* If the transition fails then enable the syspd again */ if (syspd && cm->cm_frame->header.cmd_status != MFI_STAT_OK) mfi_syspd_enable(syspd); break; } } static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm) { struct mfi_config_data *conf_data; struct mfi_command *ld_cm = NULL; struct mfi_ld_info *ld_info = NULL; struct mfi_ld_config *ld; char *p; int error = 0; conf_data = (struct mfi_config_data *)cm->cm_data; if (cm->cm_frame->dcmd.opcode == MFI_DCMD_CFG_ADD) { p = (char *)conf_data->array; p += conf_data->array_size * conf_data->array_count; ld = (struct mfi_ld_config *)p; if (ld->params.isSSCD == 1) error = 1; } else if (cm->cm_frame->dcmd.opcode == MFI_DCMD_LD_DELETE) { error = mfi_dcmd_command (sc, &ld_cm, MFI_DCMD_LD_GET_INFO, (void **)&ld_info, sizeof(*ld_info)); if (error) { device_printf(sc->mfi_dev, "Failed to allocate" "MFI_DCMD_LD_GET_INFO %d", error); if (ld_info) free(ld_info, M_MFIBUF); return 0; } ld_cm->cm_flags = MFI_CMD_DATAIN; ld_cm->cm_frame->dcmd.mbox[0]= cm->cm_frame->dcmd.mbox[0]; ld_cm->cm_frame->header.target_id = cm->cm_frame->dcmd.mbox[0]; if (mfi_wait_command(sc, ld_cm) != 0) { device_printf(sc->mfi_dev, "failed to get log drv\n"); mfi_release_command(ld_cm); free(ld_info, M_MFIBUF); return 0; } if (ld_cm->cm_frame->header.cmd_status != MFI_STAT_OK) { free(ld_info, M_MFIBUF); mfi_release_command(ld_cm); return 0; } else ld_info = (struct mfi_ld_info *)ld_cm->cm_private; if (ld_info->ld_config.params.isSSCD == 1) error = 1; mfi_release_command(ld_cm); free(ld_info, M_MFIBUF); } return error; } static int mfi_stp_cmd(struct mfi_softc *sc, struct mfi_command *cm,caddr_t arg) { uint8_t i; struct mfi_ioc_packet *ioc; ioc = (struct mfi_ioc_packet *)arg; int sge_size, error; struct megasas_sge *kern_sge; memset(sc->kbuff_arr, 0, sizeof(sc->kbuff_arr)); kern_sge =(struct megasas_sge *) ((uintptr_t)cm->cm_frame + ioc->mfi_sgl_off); cm->cm_frame->header.sg_count = ioc->mfi_sge_count; if (sizeof(bus_addr_t) == 8) { cm->cm_frame->header.flags |= MFI_FRAME_SGL64; cm->cm_extra_frames = 2; sge_size = sizeof(struct mfi_sg64); } else { cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE; sge_size = sizeof(struct mfi_sg32); } cm->cm_total_frame_size += (sge_size * ioc->mfi_sge_count); for (i = 0; i < ioc->mfi_sge_count; i++) { if (bus_dma_tag_create( sc->mfi_parent_dmat, /* parent */ 1, 0, /* algnmnt, boundary */ BUS_SPACE_MAXADDR_32BIT,/* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ ioc->mfi_sgl[i].iov_len,/* maxsize */ 2, /* nsegments */ ioc->mfi_sgl[i].iov_len,/* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &sc->mfi_kbuff_arr_dmat[i])) { device_printf(sc->mfi_dev, "Cannot allocate mfi_kbuff_arr_dmat tag\n"); return (ENOMEM); } if (bus_dmamem_alloc(sc->mfi_kbuff_arr_dmat[i], (void **)&sc->kbuff_arr[i], BUS_DMA_NOWAIT, &sc->mfi_kbuff_arr_dmamap[i])) { device_printf(sc->mfi_dev, "Cannot allocate mfi_kbuff_arr_dmamap memory\n"); return (ENOMEM); } bus_dmamap_load(sc->mfi_kbuff_arr_dmat[i], sc->mfi_kbuff_arr_dmamap[i], sc->kbuff_arr[i], ioc->mfi_sgl[i].iov_len, mfi_addr_cb, &sc->mfi_kbuff_arr_busaddr[i], 0); if (!sc->kbuff_arr[i]) { device_printf(sc->mfi_dev, "Could not allocate memory for kbuff_arr info\n"); return -1; } kern_sge[i].phys_addr = sc->mfi_kbuff_arr_busaddr[i]; kern_sge[i].length = ioc->mfi_sgl[i].iov_len; if (sizeof(bus_addr_t) == 8) { cm->cm_frame->stp.sgl.sg64[i].addr = kern_sge[i].phys_addr; cm->cm_frame->stp.sgl.sg64[i].len = ioc->mfi_sgl[i].iov_len; } else { cm->cm_frame->stp.sgl.sg32[i].addr = kern_sge[i].phys_addr; cm->cm_frame->stp.sgl.sg32[i].len = ioc->mfi_sgl[i].iov_len; } error = copyin(ioc->mfi_sgl[i].iov_base, sc->kbuff_arr[i], ioc->mfi_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); return error; } } cm->cm_flags |=MFI_CMD_MAPPED; return 0; } static int mfi_user_command(struct mfi_softc *sc, struct mfi_ioc_passthru *ioc) { struct mfi_command *cm; struct mfi_dcmd_frame *dcmd; void *ioc_buf = NULL; uint32_t context; int error = 0, locked; if (ioc->buf_size > 0) { if (ioc->buf_size > 1024 * 1024) return (ENOMEM); ioc_buf = malloc(ioc->buf_size, M_MFIBUF, M_WAITOK); error = copyin(ioc->buf, ioc_buf, ioc->buf_size); if (error) { device_printf(sc->mfi_dev, "failed to copyin\n"); free(ioc_buf, M_MFIBUF); return (error); } } locked = mfi_config_lock(sc, ioc->ioc_frame.opcode); mtx_lock(&sc->mfi_io_lock); while ((cm = mfi_dequeue_free(sc)) == NULL) msleep(mfi_user_command, &sc->mfi_io_lock, 0, "mfiioc", hz); /* Save context for later */ context = cm->cm_frame->header.context; dcmd = &cm->cm_frame->dcmd; bcopy(&ioc->ioc_frame, dcmd, sizeof(struct mfi_dcmd_frame)); cm->cm_sg = &dcmd->sgl; cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE; cm->cm_data = ioc_buf; cm->cm_len = ioc->buf_size; /* restore context */ cm->cm_frame->header.context = context; /* Cheat since we don't know if we're writing or reading */ cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_DATAOUT; error = mfi_check_command_pre(sc, cm); if (error) goto out; error = mfi_wait_command(sc, cm); if (error) { device_printf(sc->mfi_dev, "ioctl failed %d\n", error); goto out; } bcopy(dcmd, &ioc->ioc_frame, sizeof(struct mfi_dcmd_frame)); mfi_check_command_post(sc, cm); out: mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); mfi_config_unlock(sc, locked); if (ioc->buf_size > 0) error = copyout(ioc_buf, ioc->buf, ioc->buf_size); if (ioc_buf) free(ioc_buf, M_MFIBUF); return (error); } #define PTRIN(p) ((void *)(uintptr_t)(p)) static int mfi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct mfi_softc *sc; union mfi_statrequest *ms; struct mfi_ioc_packet *ioc; #ifdef COMPAT_FREEBSD32 struct mfi_ioc_packet32 *ioc32; #endif struct mfi_ioc_aen *aen; struct mfi_command *cm = NULL; uint32_t context = 0; union mfi_sense_ptr sense_ptr; uint8_t *data = NULL, *temp, *addr, skip_pre_post = 0; size_t len; int i, res; struct mfi_ioc_passthru *iop = (struct mfi_ioc_passthru *)arg; #ifdef COMPAT_FREEBSD32 struct mfi_ioc_passthru32 *iop32 = (struct mfi_ioc_passthru32 *)arg; struct mfi_ioc_passthru iop_swab; #endif int error, locked; union mfi_sgl *sgl; sc = dev->si_drv1; error = 0; if (sc->adpreset) return EBUSY; if (sc->hw_crit_error) return EBUSY; if (sc->issuepend_done == 0) return EBUSY; switch (cmd) { case MFIIO_STATS: ms = (union mfi_statrequest *)arg; switch (ms->ms_item) { case MFIQ_FREE: case MFIQ_BIO: case MFIQ_READY: case MFIQ_BUSY: bcopy(&sc->mfi_qstat[ms->ms_item], &ms->ms_qstat, sizeof(struct mfi_qstat)); break; default: error = ENOIOCTL; break; } break; case MFIIO_QUERY_DISK: { struct mfi_query_disk *qd; struct mfi_disk *ld; qd = (struct mfi_query_disk *)arg; mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) { if (ld->ld_id == qd->array_id) break; } if (ld == NULL) { qd->present = 0; mtx_unlock(&sc->mfi_io_lock); return (0); } qd->present = 1; if (ld->ld_flags & MFI_DISK_FLAGS_OPEN) qd->open = 1; bzero(qd->devname, SPECNAMELEN + 1); snprintf(qd->devname, SPECNAMELEN, "mfid%d", ld->ld_unit); mtx_unlock(&sc->mfi_io_lock); break; } case MFI_CMD: #ifdef COMPAT_FREEBSD32 case MFI_CMD32: #endif { devclass_t devclass; ioc = (struct mfi_ioc_packet *)arg; int adapter; adapter = ioc->mfi_adapter_no; if (device_get_unit(sc->mfi_dev) == 0 && adapter != 0) { devclass = devclass_find("mfi"); sc = devclass_get_softc(devclass, adapter); } mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } mtx_unlock(&sc->mfi_io_lock); locked = 0; /* * save off original context since copying from user * will clobber some data */ context = cm->cm_frame->header.context; cm->cm_frame->header.context = cm->cm_index; bcopy(ioc->mfi_frame.raw, cm->cm_frame, 2 * MEGAMFI_FRAME_SIZE); cm->cm_total_frame_size = (sizeof(union mfi_sgl) * ioc->mfi_sge_count) + ioc->mfi_sgl_off; cm->cm_frame->header.scsi_status = 0; cm->cm_frame->header.pad0 = 0; if (ioc->mfi_sge_count) { cm->cm_sg = (union mfi_sgl *)&cm->cm_frame->bytes[ioc->mfi_sgl_off]; } sgl = cm->cm_sg; cm->cm_flags = 0; if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN) cm->cm_flags |= MFI_CMD_DATAIN; if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT) cm->cm_flags |= MFI_CMD_DATAOUT; /* Legacy app shim */ if (cm->cm_flags == 0) cm->cm_flags |= MFI_CMD_DATAIN | MFI_CMD_DATAOUT; cm->cm_len = cm->cm_frame->header.data_len; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ cm->cm_stp_len = ioc->mfi_sgl[0].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; cm->cm_stp_len = ioc32->mfi_sgl[0].iov_len; } #endif cm->cm_len += cm->cm_stp_len; } if (cm->cm_len && (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) { cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF, M_WAITOK | M_ZERO); } else { cm->cm_data = 0; } /* restore header context */ cm->cm_frame->header.context = context; if (cm->cm_frame->header.cmd == MFI_CMD_STP) { res = mfi_stp_cmd(sc, cm, arg); if (res != 0) goto out; } else { temp = data; if ((cm->cm_flags & MFI_CMD_DATAOUT) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) { for (i = 0; i < ioc->mfi_sge_count; i++) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ addr = ioc->mfi_sgl[i].iov_base; len = ioc->mfi_sgl[i].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; addr = PTRIN(ioc32->mfi_sgl[i].iov_base); len = ioc32->mfi_sgl[i].iov_len; } #endif error = copyin(addr, temp, len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); goto out; } temp = &temp[len]; } } } if (cm->cm_frame->header.cmd == MFI_CMD_DCMD) locked = mfi_config_lock(sc, cm->cm_frame->dcmd.opcode); if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) { cm->cm_frame->pass.sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; cm->cm_frame->pass.sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); } mtx_lock(&sc->mfi_io_lock); skip_pre_post = mfi_check_for_sscd (sc, cm); if (!skip_pre_post) { error = mfi_check_command_pre(sc, cm); if (error) { mtx_unlock(&sc->mfi_io_lock); goto out; } } if ((error = mfi_wait_command(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Controller polled failed\n"); mtx_unlock(&sc->mfi_io_lock); goto out; } if (!skip_pre_post) { mfi_check_command_post(sc, cm); } mtx_unlock(&sc->mfi_io_lock); if (cm->cm_frame->header.cmd != MFI_CMD_STP) { temp = data; if ((cm->cm_flags & MFI_CMD_DATAIN) || (cm->cm_frame->header.cmd == MFI_CMD_STP)) { for (i = 0; i < ioc->mfi_sge_count; i++) { #ifdef COMPAT_FREEBSD32 if (cmd == MFI_CMD) { #endif /* Native */ addr = ioc->mfi_sgl[i].iov_base; len = ioc->mfi_sgl[i].iov_len; #ifdef COMPAT_FREEBSD32 } else { /* 32bit on 64bit */ ioc32 = (struct mfi_ioc_packet32 *)ioc; addr = PTRIN(ioc32->mfi_sgl[i].iov_base); len = ioc32->mfi_sgl[i].iov_len; } #endif error = copyout(temp, addr, len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } temp = &temp[len]; } } } if (ioc->mfi_sense_len) { /* get user-space sense ptr then copy out sense */ bcopy(&ioc->mfi_frame.raw[ioc->mfi_sense_off], &sense_ptr.sense_ptr_data[0], sizeof(sense_ptr.sense_ptr_data)); #ifdef COMPAT_FREEBSD32 if (cmd != MFI_CMD) { /* * not 64bit native so zero out any address * over 32bit */ sense_ptr.addr.high = 0; } #endif error = copyout(cm->cm_sense, sense_ptr.user_space, ioc->mfi_sense_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } } ioc->mfi_frame.hdr.cmd_status = cm->cm_frame->header.cmd_status; out: mfi_config_unlock(sc, locked); if (data) free(data, M_MFIBUF); if (cm->cm_frame->header.cmd == MFI_CMD_STP) { for (i = 0; i < 2; i++) { if (sc->kbuff_arr[i]) { if (sc->mfi_kbuff_arr_busaddr[i] != 0) bus_dmamap_unload( sc->mfi_kbuff_arr_dmat[i], sc->mfi_kbuff_arr_dmamap[i] ); if (sc->kbuff_arr[i] != NULL) bus_dmamem_free( sc->mfi_kbuff_arr_dmat[i], sc->kbuff_arr[i], sc->mfi_kbuff_arr_dmamap[i] ); if (sc->mfi_kbuff_arr_dmat[i] != NULL) bus_dma_tag_destroy( sc->mfi_kbuff_arr_dmat[i]); } } } if (cm) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } break; } case MFI_SET_AEN: aen = (struct mfi_ioc_aen *)arg; mtx_lock(&sc->mfi_io_lock); error = mfi_aen_register(sc, aen->aen_seq_num, aen->aen_class_locale); mtx_unlock(&sc->mfi_io_lock); break; case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */ { devclass_t devclass; struct mfi_linux_ioc_packet l_ioc; int adapter; devclass = devclass_find("mfi"); if (devclass == NULL) return (ENOENT); error = copyin(arg, &l_ioc, sizeof(l_ioc)); if (error) return (error); adapter = l_ioc.lioc_adapter_no; sc = devclass_get_softc(devclass, adapter); if (sc == NULL) return (ENOENT); return (mfi_linux_ioctl_int(sc->mfi_cdev, cmd, arg, flag, td)); break; } case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */ { devclass_t devclass; struct mfi_linux_ioc_aen l_aen; int adapter; devclass = devclass_find("mfi"); if (devclass == NULL) return (ENOENT); error = copyin(arg, &l_aen, sizeof(l_aen)); if (error) return (error); adapter = l_aen.laen_adapter_no; sc = devclass_get_softc(devclass, adapter); if (sc == NULL) return (ENOENT); return (mfi_linux_ioctl_int(sc->mfi_cdev, cmd, arg, flag, td)); break; } #ifdef COMPAT_FREEBSD32 case MFIIO_PASSTHRU32: if (!SV_CURPROC_FLAG(SV_ILP32)) { error = ENOTTY; break; } iop_swab.ioc_frame = iop32->ioc_frame; iop_swab.buf_size = iop32->buf_size; iop_swab.buf = PTRIN(iop32->buf); iop = &iop_swab; /* FALLTHROUGH */ #endif case MFIIO_PASSTHRU: error = mfi_user_command(sc, iop); #ifdef COMPAT_FREEBSD32 if (cmd == MFIIO_PASSTHRU32) iop32->ioc_frame = iop_swab.ioc_frame; #endif break; default: device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd); error = ENOTTY; break; } return (error); } static int mfi_linux_ioctl_int(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct mfi_softc *sc; struct mfi_linux_ioc_packet l_ioc; struct mfi_linux_ioc_aen l_aen; struct mfi_command *cm = NULL; struct mfi_aen *mfi_aen_entry; union mfi_sense_ptr sense_ptr; uint32_t context = 0; uint8_t *data = NULL, *temp; int i; int error, locked; sc = dev->si_drv1; error = 0; switch (cmd) { case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */ error = copyin(arg, &l_ioc, sizeof(l_ioc)); if (error != 0) return (error); if (l_ioc.lioc_sge_count > MAX_LINUX_IOCTL_SGE) { return (EINVAL); } mtx_lock(&sc->mfi_io_lock); if ((cm = mfi_dequeue_free(sc)) == NULL) { mtx_unlock(&sc->mfi_io_lock); return (EBUSY); } mtx_unlock(&sc->mfi_io_lock); locked = 0; /* * save off original context since copying from user * will clobber some data */ context = cm->cm_frame->header.context; bcopy(l_ioc.lioc_frame.raw, cm->cm_frame, 2 * MFI_DCMD_FRAME_SIZE); /* this isn't quite right */ cm->cm_total_frame_size = (sizeof(union mfi_sgl) * l_ioc.lioc_sge_count) + l_ioc.lioc_sgl_off; cm->cm_frame->header.scsi_status = 0; cm->cm_frame->header.pad0 = 0; if (l_ioc.lioc_sge_count) cm->cm_sg = (union mfi_sgl *)&cm->cm_frame->bytes[l_ioc.lioc_sgl_off]; cm->cm_flags = 0; if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN) cm->cm_flags |= MFI_CMD_DATAIN; if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT) cm->cm_flags |= MFI_CMD_DATAOUT; cm->cm_len = cm->cm_frame->header.data_len; if (cm->cm_len && (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) { cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF, M_WAITOK | M_ZERO); } else { cm->cm_data = 0; } /* restore header context */ cm->cm_frame->header.context = context; temp = data; if (cm->cm_flags & MFI_CMD_DATAOUT) { for (i = 0; i < l_ioc.lioc_sge_count; i++) { error = copyin(PTRIN(l_ioc.lioc_sgl[i].iov_base), temp, l_ioc.lioc_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy in failed\n"); goto out; } temp = &temp[l_ioc.lioc_sgl[i].iov_len]; } } if (cm->cm_frame->header.cmd == MFI_CMD_DCMD) locked = mfi_config_lock(sc, cm->cm_frame->dcmd.opcode); if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) { cm->cm_frame->pass.sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; cm->cm_frame->pass.sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); } mtx_lock(&sc->mfi_io_lock); error = mfi_check_command_pre(sc, cm); if (error) { mtx_unlock(&sc->mfi_io_lock); goto out; } if ((error = mfi_wait_command(sc, cm)) != 0) { device_printf(sc->mfi_dev, "Controller polled failed\n"); mtx_unlock(&sc->mfi_io_lock); goto out; } mfi_check_command_post(sc, cm); mtx_unlock(&sc->mfi_io_lock); temp = data; if (cm->cm_flags & MFI_CMD_DATAIN) { for (i = 0; i < l_ioc.lioc_sge_count; i++) { error = copyout(temp, PTRIN(l_ioc.lioc_sgl[i].iov_base), l_ioc.lioc_sgl[i].iov_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } temp = &temp[l_ioc.lioc_sgl[i].iov_len]; } } if (l_ioc.lioc_sense_len) { /* get user-space sense ptr then copy out sense */ bcopy(&((struct mfi_linux_ioc_packet*)arg) ->lioc_frame.raw[l_ioc.lioc_sense_off], &sense_ptr.sense_ptr_data[0], sizeof(sense_ptr.sense_ptr_data)); #ifdef __amd64__ /* * only 32bit Linux support so zero out any * address over 32bit */ sense_ptr.addr.high = 0; #endif error = copyout(cm->cm_sense, sense_ptr.user_space, l_ioc.lioc_sense_len); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } } error = copyout(&cm->cm_frame->header.cmd_status, &((struct mfi_linux_ioc_packet*)arg) ->lioc_frame.hdr.cmd_status, 1); if (error != 0) { device_printf(sc->mfi_dev, "Copy out failed\n"); goto out; } out: mfi_config_unlock(sc, locked); if (data) free(data, M_MFIBUF); if (cm) { mtx_lock(&sc->mfi_io_lock); mfi_release_command(cm); mtx_unlock(&sc->mfi_io_lock); } return (error); case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */ error = copyin(arg, &l_aen, sizeof(l_aen)); if (error != 0) return (error); printf("AEN IMPLEMENTED for pid %d\n", curproc->p_pid); mfi_aen_entry = malloc(sizeof(struct mfi_aen), M_MFIBUF, M_WAITOK); mtx_lock(&sc->mfi_io_lock); if (mfi_aen_entry != NULL) { mfi_aen_entry->p = curproc; TAILQ_INSERT_TAIL(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); } error = mfi_aen_register(sc, l_aen.laen_seq_num, l_aen.laen_class_locale); if (error != 0) { TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry, aen_link); free(mfi_aen_entry, M_MFIBUF); } mtx_unlock(&sc->mfi_io_lock); return (error); default: device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd); error = ENOENT; break; } return (error); } static int mfi_poll(struct cdev *dev, int poll_events, struct thread *td) { struct mfi_softc *sc; int revents = 0; sc = dev->si_drv1; if (poll_events & (POLLIN | POLLRDNORM)) { if (sc->mfi_aen_triggered != 0) { revents |= poll_events & (POLLIN | POLLRDNORM); sc->mfi_aen_triggered = 0; } if (sc->mfi_aen_triggered == 0 && sc->mfi_aen_cm == NULL) { revents |= POLLERR; } } if (revents == 0) { if (poll_events & (POLLIN | POLLRDNORM)) { sc->mfi_poll_waiting = 1; selrecord(td, &sc->mfi_select); } } return revents; } static void mfi_dump_all(void) { struct mfi_softc *sc; struct mfi_command *cm; devclass_t dc; time_t deadline; int timedout; int i; dc = devclass_find("mfi"); if (dc == NULL) { printf("No mfi dev class\n"); return; } for (i = 0; ; i++) { sc = devclass_get_softc(dc, i); if (sc == NULL) break; device_printf(sc->mfi_dev, "Dumping\n\n"); timedout = 0; deadline = time_uptime - mfi_cmd_timeout; mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) { if (cm->cm_timestamp <= deadline) { device_printf(sc->mfi_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime - cm->cm_timestamp)); MFI_PRINT_CMD(cm); timedout++; } } #if 0 if (timedout) MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); } return; } static void mfi_timeout(void *data) { struct mfi_softc *sc = (struct mfi_softc *)data; struct mfi_command *cm, *tmp; time_t deadline; int timedout = 0; deadline = time_uptime - mfi_cmd_timeout; if (sc->adpreset == 0) { if (!mfi_tbolt_reset(sc)) { callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); return; } } mtx_lock(&sc->mfi_io_lock); TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) { if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm) continue; if (cm->cm_timestamp <= deadline) { if (sc->adpreset != 0 && sc->issuepend_done == 0) { cm->cm_timestamp = time_uptime; } else { device_printf(sc->mfi_dev, "COMMAND %p TIMEOUT AFTER %d SECONDS\n", cm, (int)(time_uptime - cm->cm_timestamp) ); MFI_PRINT_CMD(cm); MFI_VALIDATE_CMD(sc, cm); /* * While commands can get stuck forever we do * not fail them as there is no way to tell if * the controller has actually processed them * or not. * * In addition its very likely that force * failing a command here would cause a panic * e.g. in UFS. */ timedout++; } } } #if 0 if (timedout) MFI_DUMP_CMDS(sc); #endif mtx_unlock(&sc->mfi_io_lock); callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz, mfi_timeout, sc); if (0) mfi_dump_all(); return; } Index: head/sys/dev/mfi/mfi_cam.c =================================================================== --- head/sys/dev/mfi/mfi_cam.c (revision 320527) +++ head/sys/dev/mfi/mfi_cam.c (revision 320528) @@ -1,476 +1,475 @@ /*- * Copyright 2007 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mfi.h" #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum mfip_state { MFIP_STATE_NONE, MFIP_STATE_DETACH, MFIP_STATE_RESCAN }; struct mfip_softc { device_t dev; struct mfi_softc *mfi_sc; struct cam_devq *devq; struct cam_sim *sim; struct cam_path *path; enum mfip_state state; }; static int mfip_probe(device_t); static int mfip_attach(device_t); static int mfip_detach(device_t); static void mfip_cam_action(struct cam_sim *, union ccb *); static void mfip_cam_poll(struct cam_sim *); static void mfip_cam_rescan(struct mfi_softc *, uint32_t tid); static struct mfi_command * mfip_start(void *); static void mfip_done(struct mfi_command *cm); static int mfi_allow_disks = 0; SYSCTL_INT(_hw_mfi, OID_AUTO, allow_cam_disk_passthrough, CTLFLAG_RDTUN, &mfi_allow_disks, 0, "event message locale"); static devclass_t mfip_devclass; static device_method_t mfip_methods[] = { DEVMETHOD(device_probe, mfip_probe), DEVMETHOD(device_attach, mfip_attach), DEVMETHOD(device_detach, mfip_detach), DEVMETHOD_END }; static driver_t mfip_driver = { "mfip", mfip_methods, sizeof(struct mfip_softc) }; DRIVER_MODULE(mfip, mfi, mfip_driver, mfip_devclass, 0, 0); MODULE_DEPEND(mfip, cam, 1, 1, 1); MODULE_DEPEND(mfip, mfi, 1, 1, 1); #define ccb_mfip_ptr sim_priv.entries[0].ptr static int mfip_probe(device_t dev) { device_set_desc(dev, "SCSI Passthrough Bus"); return (0); } static int mfip_attach(device_t dev) { struct mfip_softc *sc; struct mfi_softc *mfisc; sc = device_get_softc(dev); if (sc == NULL) return (EINVAL); mfisc = device_get_softc(device_get_parent(dev)); sc->dev = dev; sc->state = MFIP_STATE_NONE; sc->mfi_sc = mfisc; mfisc->mfi_cam_start = mfip_start; if ((sc->devq = cam_simq_alloc(MFI_SCSI_MAX_CMDS)) == NULL) return (ENOMEM); sc->sim = cam_sim_alloc(mfip_cam_action, mfip_cam_poll, "mfi", sc, device_get_unit(dev), &mfisc->mfi_io_lock, 1, MFI_SCSI_MAX_CMDS, sc->devq); if (sc->sim == NULL) { cam_simq_free(sc->devq); sc->devq = NULL; device_printf(dev, "CAM SIM attach failed\n"); return (EINVAL); } mfisc->mfi_cam_rescan_cb = mfip_cam_rescan; mtx_lock(&mfisc->mfi_io_lock); if (xpt_bus_register(sc->sim, dev, 0) != 0) { device_printf(dev, "XPT bus registration failed\n"); cam_sim_free(sc->sim, FALSE); sc->sim = NULL; cam_simq_free(sc->devq); sc->devq = NULL; mtx_unlock(&mfisc->mfi_io_lock); return (EINVAL); } mtx_unlock(&mfisc->mfi_io_lock); return (0); } static int mfip_detach(device_t dev) { struct mfip_softc *sc; sc = device_get_softc(dev); if (sc == NULL) return (EINVAL); mtx_lock(&sc->mfi_sc->mfi_io_lock); if (sc->state == MFIP_STATE_RESCAN) { mtx_unlock(&sc->mfi_sc->mfi_io_lock); return (EBUSY); } sc->state = MFIP_STATE_DETACH; mtx_unlock(&sc->mfi_sc->mfi_io_lock); sc->mfi_sc->mfi_cam_rescan_cb = NULL; if (sc->sim != NULL) { mtx_lock(&sc->mfi_sc->mfi_io_lock); xpt_bus_deregister(cam_sim_path(sc->sim)); cam_sim_free(sc->sim, FALSE); sc->sim = NULL; mtx_unlock(&sc->mfi_sc->mfi_io_lock); } if (sc->devq != NULL) { cam_simq_free(sc->devq); sc->devq = NULL; } return (0); } static void mfip_cam_action(struct cam_sim *sim, union ccb *ccb) { struct mfip_softc *sc = cam_sim_softc(sim); struct mfi_softc *mfisc = sc->mfi_sc; mtx_assert(&mfisc->mfi_io_lock, MA_OWNED); switch (ccb->ccb_h.func_code) { case XPT_PATH_INQ: { struct ccb_pathinq *cpi = &ccb->cpi; cpi->version_num = 1; cpi->hba_inquiry = PI_TAG_ABLE; cpi->target_sprt = 0; cpi->hba_misc = PIM_NOBUSRESET | PIM_SEQSCAN | PIM_UNMAPPED; cpi->hba_eng_cnt = 0; cpi->max_target = MFI_SCSI_MAX_TARGETS; cpi->max_lun = MFI_SCSI_MAX_LUNS; cpi->initiator_id = MFI_SCSI_INITIATOR_ID; strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(cpi->hba_vid, "LSI", HBA_IDLEN); strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN); cpi->unit_number = cam_sim_unit(sim); cpi->bus_id = cam_sim_bus(sim); cpi->base_transfer_speed = 150000; cpi->transport = XPORT_SAS; cpi->transport_version = 0; cpi->protocol = PROTO_SCSI; cpi->protocol_version = SCSI_REV_2; cpi->ccb_h.status = CAM_REQ_CMP; break; } case XPT_RESET_BUS: ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_RESET_DEV: ccb->ccb_h.status = CAM_REQ_CMP; break; case XPT_GET_TRAN_SETTINGS: { struct ccb_trans_settings_scsi *scsi = &ccb->cts.proto_specific.scsi; struct ccb_trans_settings_sas *sas = &ccb->cts.xport_specific.sas; ccb->cts.protocol = PROTO_SCSI; ccb->cts.protocol_version = SCSI_REV_2; ccb->cts.transport = XPORT_SAS; ccb->cts.transport_version = 0; scsi->valid = CTS_SCSI_VALID_TQ; scsi->flags = CTS_SCSI_FLAGS_TAG_ENB; sas->valid &= ~CTS_SAS_VALID_SPEED; sas->bitrate = 150000; ccb->ccb_h.status = CAM_REQ_CMP; break; } case XPT_SET_TRAN_SETTINGS: ccb->ccb_h.status = CAM_FUNC_NOTAVAIL; break; case XPT_SCSI_IO: { struct ccb_hdr *ccbh = &ccb->ccb_h; struct ccb_scsiio *csio = &ccb->csio; ccbh->status = CAM_REQ_INPROG; if (csio->cdb_len > MFI_SCSI_MAX_CDB_LEN) { ccbh->status = CAM_REQ_INVALID; break; } ccbh->ccb_mfip_ptr = sc; TAILQ_INSERT_TAIL(&mfisc->mfi_cam_ccbq, ccbh, sim_links.tqe); mfi_startio(mfisc); return; } default: ccb->ccb_h.status = CAM_REQ_INVALID; break; } xpt_done(ccb); return; } static void mfip_cam_rescan(struct mfi_softc *sc, uint32_t tid) { union ccb *ccb; struct mfip_softc *camsc; struct cam_sim *sim; device_t mfip_dev; mtx_lock(&Giant); mfip_dev = device_find_child(sc->mfi_dev, "mfip", -1); mtx_unlock(&Giant); if (mfip_dev == NULL) { device_printf(sc->mfi_dev, "Couldn't find mfip child device!\n"); return; } mtx_lock(&sc->mfi_io_lock); camsc = device_get_softc(mfip_dev); if (camsc->state == MFIP_STATE_DETACH) { mtx_unlock(&sc->mfi_io_lock); return; } camsc->state = MFIP_STATE_RESCAN; ccb = xpt_alloc_ccb_nowait(); if (ccb == NULL) { mtx_unlock(&sc->mfi_io_lock); device_printf(sc->mfi_dev, "Cannot allocate ccb for bus rescan.\n"); return; } sim = camsc->sim; if (xpt_create_path(&ccb->ccb_h.path, NULL, cam_sim_path(sim), tid, CAM_LUN_WILDCARD) != CAM_REQ_CMP) { xpt_free_ccb(ccb); mtx_unlock(&sc->mfi_io_lock); device_printf(sc->mfi_dev, "Cannot create path for bus rescan.\n"); return; } xpt_rescan(ccb); camsc->state = MFIP_STATE_NONE; mtx_unlock(&sc->mfi_io_lock); } static struct mfi_command * mfip_start(void *data) { union ccb *ccb = data; struct ccb_hdr *ccbh = &ccb->ccb_h; struct ccb_scsiio *csio = &ccb->csio; struct mfip_softc *sc; struct mfi_pass_frame *pt; struct mfi_command *cm; uint32_t context = 0; sc = ccbh->ccb_mfip_ptr; if ((cm = mfi_dequeue_free(sc->mfi_sc)) == NULL) return (NULL); /* Zero out the MFI frame */ context = cm->cm_frame->header.context; bzero(cm->cm_frame, sizeof(union mfi_frame)); cm->cm_frame->header.context = context; pt = &cm->cm_frame->pass; pt->header.cmd = MFI_CMD_PD_SCSI_IO; pt->header.cmd_status = 0; pt->header.scsi_status = 0; pt->header.target_id = ccbh->target_id; pt->header.lun_id = ccbh->target_lun; pt->header.flags = 0; pt->header.timeout = 0; pt->header.data_len = csio->dxfer_len; pt->header.sense_len = MFI_SENSE_LEN; pt->header.cdb_len = csio->cdb_len; pt->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr; pt->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32); if (ccbh->flags & CAM_CDB_POINTER) bcopy(csio->cdb_io.cdb_ptr, &pt->cdb[0], csio->cdb_len); else bcopy(csio->cdb_io.cdb_bytes, &pt->cdb[0], csio->cdb_len); cm->cm_complete = mfip_done; cm->cm_private = ccb; cm->cm_sg = &pt->sgl; cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE; cm->cm_data = ccb; cm->cm_len = csio->dxfer_len; switch (ccbh->flags & CAM_DIR_MASK) { case CAM_DIR_IN: cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_CCB; break; case CAM_DIR_OUT: cm->cm_flags = MFI_CMD_DATAOUT | MFI_CMD_CCB; break; case CAM_DIR_NONE: default: cm->cm_data = NULL; cm->cm_len = 0; cm->cm_flags = 0; break; } TAILQ_REMOVE(&sc->mfi_sc->mfi_cam_ccbq, ccbh, sim_links.tqe); return (cm); } static void mfip_done(struct mfi_command *cm) { union ccb *ccb = cm->cm_private; struct ccb_hdr *ccbh = &ccb->ccb_h; struct ccb_scsiio *csio = &ccb->csio; struct mfip_softc *sc; struct mfi_pass_frame *pt; sc = ccbh->ccb_mfip_ptr; pt = &cm->cm_frame->pass; switch (pt->header.cmd_status) { case MFI_STAT_OK: { uint8_t command, device; ccbh->status = CAM_REQ_CMP; csio->scsi_status = pt->header.scsi_status; if (ccbh->flags & CAM_CDB_POINTER) command = csio->cdb_io.cdb_ptr[0]; else command = csio->cdb_io.cdb_bytes[0]; if (command == INQUIRY) { device = csio->data_ptr[0] & 0x1f; if ((!mfi_allow_disks && device == T_DIRECT) || (device == T_PROCESSOR)) csio->data_ptr[0] = (csio->data_ptr[0] & 0xe0) | T_NODEVICE; } break; } case MFI_STAT_SCSI_DONE_WITH_ERROR: { int sense_len; ccbh->status = CAM_SCSI_STATUS_ERROR | CAM_AUTOSNS_VALID; csio->scsi_status = pt->header.scsi_status; if (pt->header.sense_len < csio->sense_len) csio->sense_resid = csio->sense_len - pt->header.sense_len; else csio->sense_resid = 0; sense_len = min(pt->header.sense_len, sizeof(struct scsi_sense_data)); bzero(&csio->sense_data, sizeof(struct scsi_sense_data)); bcopy(&cm->cm_sense->data[0], &csio->sense_data, sense_len); break; } case MFI_STAT_DEVICE_NOT_FOUND: ccbh->status = CAM_SEL_TIMEOUT; break; case MFI_STAT_SCSI_IO_FAILED: ccbh->status = CAM_REQ_CMP_ERR; csio->scsi_status = pt->header.scsi_status; break; default: ccbh->status = CAM_REQ_CMP_ERR; csio->scsi_status = pt->header.scsi_status; break; } mfi_release_command(cm); xpt_done(ccb); } static void mfip_cam_poll(struct cam_sim *sim) { struct mfip_softc *sc = cam_sim_softc(sim); struct mfi_softc *mfisc = sc->mfi_sc; mfisc->mfi_intr_ptr(mfisc); } Index: head/sys/dev/tsec/if_tsec.c =================================================================== --- head/sys/dev/tsec/if_tsec.c (revision 320527) +++ head/sys/dev/tsec/if_tsec.c (revision 320528) @@ -1,1937 +1,1936 @@ /*- * Copyright (C) 2007-2008 Semihalf, Rafal Jaworowski * Copyright (C) 2006-2007 Semihalf, Piotr Kruszynski * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Freescale integrated Three-Speed Ethernet Controller (TSEC) driver. */ #include __FBSDID("$FreeBSD$"); #ifdef HAVE_KERNEL_OPTION_HEADERS #include "opt_device_polling.h" #endif #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int tsec_alloc_dma_desc(device_t dev, bus_dma_tag_t *dtag, bus_dmamap_t *dmap, bus_size_t dsize, void **vaddr, void *raddr, const char *dname); static void tsec_dma_ctl(struct tsec_softc *sc, int state); static void tsec_encap(struct ifnet *ifp, struct tsec_softc *sc, struct mbuf *m0, uint16_t fcb_flags, int *start_tx); static void tsec_free_dma(struct tsec_softc *sc); static void tsec_free_dma_desc(bus_dma_tag_t dtag, bus_dmamap_t dmap, void *vaddr); static int tsec_ifmedia_upd(struct ifnet *ifp); static void tsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int tsec_new_rxbuf(bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **mbufp, uint32_t *paddr); static void tsec_map_dma_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void tsec_intrs_ctl(struct tsec_softc *sc, int state); static void tsec_init(void *xsc); static void tsec_init_locked(struct tsec_softc *sc); static int tsec_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void tsec_reset_mac(struct tsec_softc *sc); static void tsec_setfilter(struct tsec_softc *sc); static void tsec_set_mac_address(struct tsec_softc *sc); static void tsec_start(struct ifnet *ifp); static void tsec_start_locked(struct ifnet *ifp); static void tsec_stop(struct tsec_softc *sc); static void tsec_tick(void *arg); static void tsec_watchdog(struct tsec_softc *sc); static void tsec_add_sysctls(struct tsec_softc *sc); static int tsec_sysctl_ic_time(SYSCTL_HANDLER_ARGS); static int tsec_sysctl_ic_count(SYSCTL_HANDLER_ARGS); static void tsec_set_rxic(struct tsec_softc *sc); static void tsec_set_txic(struct tsec_softc *sc); static int tsec_receive_intr_locked(struct tsec_softc *sc, int count); static void tsec_transmit_intr_locked(struct tsec_softc *sc); static void tsec_error_intr_locked(struct tsec_softc *sc, int count); static void tsec_offload_setup(struct tsec_softc *sc); static void tsec_offload_process_frame(struct tsec_softc *sc, struct mbuf *m); static void tsec_setup_multicast(struct tsec_softc *sc); static int tsec_set_mtu(struct tsec_softc *sc, unsigned int mtu); devclass_t tsec_devclass; DRIVER_MODULE(miibus, tsec, miibus_driver, miibus_devclass, 0, 0); MODULE_DEPEND(tsec, ether, 1, 1, 1); MODULE_DEPEND(tsec, miibus, 1, 1, 1); struct mtx tsec_phy_mtx; int tsec_attach(struct tsec_softc *sc) { uint8_t hwaddr[ETHER_ADDR_LEN]; struct ifnet *ifp; int error = 0; int i; /* Initialize global (because potentially shared) MII lock */ if (!mtx_initialized(&tsec_phy_mtx)) mtx_init(&tsec_phy_mtx, "tsec mii", NULL, MTX_DEF); /* Reset all TSEC counters */ TSEC_TX_RX_COUNTERS_INIT(sc); /* Stop DMA engine if enabled by firmware */ tsec_dma_ctl(sc, 0); /* Reset MAC */ tsec_reset_mac(sc); /* Disable interrupts for now */ tsec_intrs_ctl(sc, 0); /* Configure defaults for interrupts coalescing */ sc->rx_ic_time = 768; sc->rx_ic_count = 16; sc->tx_ic_time = 768; sc->tx_ic_count = 16; tsec_set_rxic(sc); tsec_set_txic(sc); tsec_add_sysctls(sc); /* Allocate a busdma tag and DMA safe memory for TX descriptors. */ error = tsec_alloc_dma_desc(sc->dev, &sc->tsec_tx_dtag, &sc->tsec_tx_dmap, sizeof(*sc->tsec_tx_vaddr) * TSEC_TX_NUM_DESC, (void **)&sc->tsec_tx_vaddr, &sc->tsec_tx_raddr, "TX"); if (error) { tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag and DMA safe memory for RX descriptors. */ error = tsec_alloc_dma_desc(sc->dev, &sc->tsec_rx_dtag, &sc->tsec_rx_dmap, sizeof(*sc->tsec_rx_vaddr) * TSEC_RX_NUM_DESC, (void **)&sc->tsec_rx_vaddr, &sc->tsec_rx_raddr, "RX"); if (error) { tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag for TX mbufs. */ error = bus_dma_tag_create(NULL, /* parent */ TSEC_TXBUFFER_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES * (TSEC_TX_NUM_DESC - 1), /* maxsize */ TSEC_TX_MAX_DMA_SEGS, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->tsec_tx_mtag); /* dmat */ if (error) { device_printf(sc->dev, "failed to allocate busdma tag " "(tx mbufs)\n"); tsec_detach(sc); return (ENXIO); } /* Allocate a busdma tag for RX mbufs. */ error = bus_dma_tag_create(NULL, /* parent */ TSEC_RXBUFFER_ALIGNMENT, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ MCLBYTES, /* maxsize */ 1, /* nsegments */ MCLBYTES, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sc->tsec_rx_mtag); /* dmat */ if (error) { device_printf(sc->dev, "failed to allocate busdma tag " "(rx mbufs)\n"); tsec_detach(sc); return (ENXIO); } /* Create TX busdma maps */ for (i = 0; i < TSEC_TX_NUM_DESC; i++) { error = bus_dmamap_create(sc->tsec_tx_mtag, 0, &sc->tx_bufmap[i].map); if (error) { device_printf(sc->dev, "failed to init TX ring\n"); tsec_detach(sc); return (ENXIO); } sc->tx_bufmap[i].map_initialized = 1; } /* Create RX busdma maps and zero mbuf handlers */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { error = bus_dmamap_create(sc->tsec_rx_mtag, 0, &sc->rx_data[i].map); if (error) { device_printf(sc->dev, "failed to init RX ring\n"); tsec_detach(sc); return (ENXIO); } sc->rx_data[i].mbuf = NULL; } /* Create mbufs for RX buffers */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { error = tsec_new_rxbuf(sc->tsec_rx_mtag, sc->rx_data[i].map, &sc->rx_data[i].mbuf, &sc->rx_data[i].paddr); if (error) { device_printf(sc->dev, "can't load rx DMA map %d, " "error = %d\n", i, error); tsec_detach(sc); return (error); } } /* Create network interface for upper layers */ ifp = sc->tsec_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->dev, "if_alloc() failed\n"); tsec_detach(sc); return (ENOMEM); } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); ifp->if_flags = IFF_SIMPLEX | IFF_MULTICAST | IFF_BROADCAST; ifp->if_init = tsec_init; ifp->if_start = tsec_start; ifp->if_ioctl = tsec_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, TSEC_TX_NUM_DESC - 1); ifp->if_snd.ifq_drv_maxlen = TSEC_TX_NUM_DESC - 1; IFQ_SET_READY(&ifp->if_snd); ifp->if_capabilities = IFCAP_VLAN_MTU; if (sc->is_etsec) ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capenable = ifp->if_capabilities; #ifdef DEVICE_POLLING /* Advertise that polling is supported */ ifp->if_capabilities |= IFCAP_POLLING; #endif /* Attach PHY(s) */ error = mii_attach(sc->dev, &sc->tsec_miibus, ifp, tsec_ifmedia_upd, tsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->phyaddr, MII_OFFSET_ANY, 0); if (error) { device_printf(sc->dev, "attaching PHYs failed\n"); if_free(ifp); sc->tsec_ifp = NULL; tsec_detach(sc); return (error); } sc->tsec_mii = device_get_softc(sc->tsec_miibus); /* Set MAC address */ tsec_get_hwaddr(sc, hwaddr); ether_ifattach(ifp, hwaddr); return (0); } int tsec_detach(struct tsec_softc *sc) { if (sc->tsec_ifp != NULL) { #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) ether_poll_deregister(sc->tsec_ifp); #endif /* Stop TSEC controller and free TX queue */ if (sc->sc_rres) tsec_shutdown(sc->dev); /* Detach network interface */ ether_ifdetach(sc->tsec_ifp); if_free(sc->tsec_ifp); sc->tsec_ifp = NULL; } /* Free DMA resources */ tsec_free_dma(sc); return (0); } int tsec_shutdown(device_t dev) { struct tsec_softc *sc; sc = device_get_softc(dev); TSEC_GLOBAL_LOCK(sc); tsec_stop(sc); TSEC_GLOBAL_UNLOCK(sc); return (0); } int tsec_suspend(device_t dev) { /* TODO not implemented! */ return (0); } int tsec_resume(device_t dev) { /* TODO not implemented! */ return (0); } static void tsec_init(void *xsc) { struct tsec_softc *sc = xsc; TSEC_GLOBAL_LOCK(sc); tsec_init_locked(sc); TSEC_GLOBAL_UNLOCK(sc); } static int tsec_mii_wait(struct tsec_softc *sc, uint32_t flags) { int timeout; /* * The status indicators are not set immediatly after a command. * Discard the first value. */ TSEC_PHY_READ(sc, TSEC_REG_MIIMIND); timeout = TSEC_READ_RETRY; while ((TSEC_PHY_READ(sc, TSEC_REG_MIIMIND) & flags) && --timeout) DELAY(TSEC_READ_DELAY); return (timeout == 0); } static void tsec_init_locked(struct tsec_softc *sc) { struct tsec_desc *tx_desc = sc->tsec_tx_vaddr; struct tsec_desc *rx_desc = sc->tsec_rx_vaddr; struct ifnet *ifp = sc->tsec_ifp; uint32_t val, i; int timeout; if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; TSEC_GLOBAL_LOCK_ASSERT(sc); tsec_stop(sc); /* * These steps are according to the MPC8555E PowerQUICCIII RM: * 14.7 Initialization/Application Information */ /* Step 1: soft reset MAC */ tsec_reset_mac(sc); /* Step 2: Initialize MACCFG2 */ TSEC_WRITE(sc, TSEC_REG_MACCFG2, TSEC_MACCFG2_FULLDUPLEX | /* Full Duplex = 1 */ TSEC_MACCFG2_PADCRC | /* PAD/CRC append */ TSEC_MACCFG2_GMII | /* I/F Mode bit */ TSEC_MACCFG2_PRECNT /* Preamble count = 7 */ ); /* Step 3: Initialize ECNTRL * While the documentation states that R100M is ignored if RPM is * not set, it does seem to be needed to get the orange boxes to * work (which have a Marvell 88E1111 PHY). Go figure. */ /* * XXX kludge - use circumstancial evidence to program ECNTRL * correctly. Ideally we need some board information to guide * us here. */ i = TSEC_READ(sc, TSEC_REG_ID2); val = (i & 0xffff) ? (TSEC_ECNTRL_TBIM | TSEC_ECNTRL_SGMIIM) /* Sumatra */ : TSEC_ECNTRL_R100M; /* Orange + CDS */ TSEC_WRITE(sc, TSEC_REG_ECNTRL, TSEC_ECNTRL_STEN | val); /* Step 4: Initialize MAC station address */ tsec_set_mac_address(sc); /* * Step 5: Assign a Physical address to the TBI so as to not conflict * with the external PHY physical address */ TSEC_WRITE(sc, TSEC_REG_TBIPA, 5); TSEC_PHY_LOCK(sc); /* Step 6: Reset the management interface */ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_RESETMGMT); /* Step 7: Setup the MII Mgmt clock speed */ TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCFG, TSEC_MIIMCFG_CLKDIV28); /* Step 8: Read MII Mgmt indicator register and check for Busy = 0 */ timeout = tsec_mii_wait(sc, TSEC_MIIMIND_BUSY); TSEC_PHY_UNLOCK(sc); if (timeout) { if_printf(ifp, "tsec_init_locked(): Mgmt busy timeout\n"); return; } /* Step 9: Setup the MII Mgmt */ mii_mediachg(sc->tsec_mii); /* Step 10: Clear IEVENT register */ TSEC_WRITE(sc, TSEC_REG_IEVENT, 0xffffffff); /* Step 11: Enable interrupts */ #ifdef DEVICE_POLLING /* * ...only if polling is not turned on. Disable interrupts explicitly * if polling is enabled. */ if (ifp->if_capenable & IFCAP_POLLING ) tsec_intrs_ctl(sc, 0); else #endif /* DEVICE_POLLING */ tsec_intrs_ctl(sc, 1); /* Step 12: Initialize IADDRn */ TSEC_WRITE(sc, TSEC_REG_IADDR0, 0); TSEC_WRITE(sc, TSEC_REG_IADDR1, 0); TSEC_WRITE(sc, TSEC_REG_IADDR2, 0); TSEC_WRITE(sc, TSEC_REG_IADDR3, 0); TSEC_WRITE(sc, TSEC_REG_IADDR4, 0); TSEC_WRITE(sc, TSEC_REG_IADDR5, 0); TSEC_WRITE(sc, TSEC_REG_IADDR6, 0); TSEC_WRITE(sc, TSEC_REG_IADDR7, 0); /* Step 13: Initialize GADDRn */ TSEC_WRITE(sc, TSEC_REG_GADDR0, 0); TSEC_WRITE(sc, TSEC_REG_GADDR1, 0); TSEC_WRITE(sc, TSEC_REG_GADDR2, 0); TSEC_WRITE(sc, TSEC_REG_GADDR3, 0); TSEC_WRITE(sc, TSEC_REG_GADDR4, 0); TSEC_WRITE(sc, TSEC_REG_GADDR5, 0); TSEC_WRITE(sc, TSEC_REG_GADDR6, 0); TSEC_WRITE(sc, TSEC_REG_GADDR7, 0); /* Step 14: Initialize RCTRL */ TSEC_WRITE(sc, TSEC_REG_RCTRL, 0); /* Step 15: Initialize DMACTRL */ tsec_dma_ctl(sc, 1); /* Step 16: Initialize FIFO_PAUSE_CTRL */ TSEC_WRITE(sc, TSEC_REG_FIFO_PAUSE_CTRL, TSEC_FIFO_PAUSE_CTRL_EN); /* * Step 17: Initialize transmit/receive descriptor rings. * Initialize TBASE and RBASE. */ TSEC_WRITE(sc, TSEC_REG_TBASE, sc->tsec_tx_raddr); TSEC_WRITE(sc, TSEC_REG_RBASE, sc->tsec_rx_raddr); for (i = 0; i < TSEC_TX_NUM_DESC; i++) { tx_desc[i].bufptr = 0; tx_desc[i].length = 0; tx_desc[i].flags = ((i == TSEC_TX_NUM_DESC - 1) ? TSEC_TXBD_W : 0); } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); for (i = 0; i < TSEC_RX_NUM_DESC; i++) { rx_desc[i].bufptr = sc->rx_data[i].paddr; rx_desc[i].length = 0; rx_desc[i].flags = TSEC_RXBD_E | TSEC_RXBD_I | ((i == TSEC_RX_NUM_DESC - 1) ? TSEC_RXBD_W : 0); } bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Step 18: Initialize the maximum receive buffer length */ TSEC_WRITE(sc, TSEC_REG_MRBLR, MCLBYTES); /* Step 19: Configure ethernet frame sizes */ TSEC_WRITE(sc, TSEC_REG_MINFLR, TSEC_MIN_FRAME_SIZE); tsec_set_mtu(sc, ifp->if_mtu); /* Step 20: Enable Rx and RxBD sdata snooping */ TSEC_WRITE(sc, TSEC_REG_ATTR, TSEC_ATTR_RDSEN | TSEC_ATTR_RBDSEN); TSEC_WRITE(sc, TSEC_REG_ATTRELI, 0); /* Step 21: Reset collision counters in hardware */ TSEC_WRITE(sc, TSEC_REG_MON_TSCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TMCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TLCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TXCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TNCL, 0); /* Step 22: Mask all CAM interrupts */ TSEC_WRITE(sc, TSEC_REG_MON_CAM1, 0xffffffff); TSEC_WRITE(sc, TSEC_REG_MON_CAM2, 0xffffffff); /* Step 23: Enable Rx and Tx */ val = TSEC_READ(sc, TSEC_REG_MACCFG1); val |= (TSEC_MACCFG1_RX_EN | TSEC_MACCFG1_TX_EN); TSEC_WRITE(sc, TSEC_REG_MACCFG1, val); /* Step 24: Reset TSEC counters for Tx and Rx rings */ TSEC_TX_RX_COUNTERS_INIT(sc); /* Step 25: Setup TCP/IP Off-Load engine */ if (sc->is_etsec) tsec_offload_setup(sc); /* Step 26: Setup multicast filters */ tsec_setup_multicast(sc); /* Step 27: Activate network interface */ ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; sc->tsec_if_flags = ifp->if_flags; sc->tsec_watchdog = 0; /* Schedule watchdog timeout */ callout_reset(&sc->tsec_callout, hz, tsec_tick, sc); } static void tsec_set_mac_address(struct tsec_softc *sc) { uint32_t macbuf[2] = { 0, 0 }; char *macbufp, *curmac; int i; TSEC_GLOBAL_LOCK_ASSERT(sc); KASSERT((ETHER_ADDR_LEN <= sizeof(macbuf)), ("tsec_set_mac_address: (%d <= %zd", ETHER_ADDR_LEN, sizeof(macbuf))); macbufp = (char *)macbuf; curmac = (char *)IF_LLADDR(sc->tsec_ifp); /* Correct order of MAC address bytes */ for (i = 1; i <= ETHER_ADDR_LEN; i++) macbufp[ETHER_ADDR_LEN-i] = curmac[i-1]; /* Initialize MAC station address MACSTNADDR2 and MACSTNADDR1 */ TSEC_WRITE(sc, TSEC_REG_MACSTNADDR2, macbuf[1]); TSEC_WRITE(sc, TSEC_REG_MACSTNADDR1, macbuf[0]); } /* * DMA control function, if argument state is: * 0 - DMA engine will be disabled * 1 - DMA engine will be enabled */ static void tsec_dma_ctl(struct tsec_softc *sc, int state) { device_t dev; uint32_t dma_flags, timeout; dev = sc->dev; dma_flags = TSEC_READ(sc, TSEC_REG_DMACTRL); switch (state) { case 0: /* Temporarily clear stop graceful stop bits. */ tsec_dma_ctl(sc, 1000); /* Set it again */ dma_flags |= (TSEC_DMACTRL_GRS | TSEC_DMACTRL_GTS); break; case 1000: case 1: /* Set write with response (WWR), wait (WOP) and snoop bits */ dma_flags |= (TSEC_DMACTRL_TDSEN | TSEC_DMACTRL_TBDSEN | DMACTRL_WWR | DMACTRL_WOP); /* Clear graceful stop bits */ dma_flags &= ~(TSEC_DMACTRL_GRS | TSEC_DMACTRL_GTS); break; default: device_printf(dev, "tsec_dma_ctl(): unknown state value: %d\n", state); } TSEC_WRITE(sc, TSEC_REG_DMACTRL, dma_flags); switch (state) { case 0: /* Wait for DMA stop */ timeout = TSEC_READ_RETRY; while (--timeout && (!(TSEC_READ(sc, TSEC_REG_IEVENT) & (TSEC_IEVENT_GRSC | TSEC_IEVENT_GTSC)))) DELAY(TSEC_READ_DELAY); if (timeout == 0) device_printf(dev, "tsec_dma_ctl(): timeout!\n"); break; case 1: /* Restart transmission function */ TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); } } /* * Interrupts control function, if argument state is: * 0 - all TSEC interrupts will be masked * 1 - all TSEC interrupts will be unmasked */ static void tsec_intrs_ctl(struct tsec_softc *sc, int state) { device_t dev; dev = sc->dev; switch (state) { case 0: TSEC_WRITE(sc, TSEC_REG_IMASK, 0); break; case 1: TSEC_WRITE(sc, TSEC_REG_IMASK, TSEC_IMASK_BREN | TSEC_IMASK_RXCEN | TSEC_IMASK_BSYEN | TSEC_IMASK_EBERREN | TSEC_IMASK_BTEN | TSEC_IMASK_TXEEN | TSEC_IMASK_TXBEN | TSEC_IMASK_TXFEN | TSEC_IMASK_XFUNEN | TSEC_IMASK_RXFEN); break; default: device_printf(dev, "tsec_intrs_ctl(): unknown state value: %d\n", state); } } static void tsec_reset_mac(struct tsec_softc *sc) { uint32_t maccfg1_flags; /* Set soft reset bit */ maccfg1_flags = TSEC_READ(sc, TSEC_REG_MACCFG1); maccfg1_flags |= TSEC_MACCFG1_SOFT_RESET; TSEC_WRITE(sc, TSEC_REG_MACCFG1, maccfg1_flags); /* Clear soft reset bit */ maccfg1_flags = TSEC_READ(sc, TSEC_REG_MACCFG1); maccfg1_flags &= ~TSEC_MACCFG1_SOFT_RESET; TSEC_WRITE(sc, TSEC_REG_MACCFG1, maccfg1_flags); } static void tsec_watchdog(struct tsec_softc *sc) { struct ifnet *ifp; TSEC_GLOBAL_LOCK_ASSERT(sc); if (sc->tsec_watchdog == 0 || --sc->tsec_watchdog > 0) return; ifp = sc->tsec_ifp; if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if_printf(ifp, "watchdog timeout\n"); tsec_stop(sc); tsec_init_locked(sc); } static void tsec_start(struct ifnet *ifp) { struct tsec_softc *sc = ifp->if_softc; TSEC_TRANSMIT_LOCK(sc); tsec_start_locked(ifp); TSEC_TRANSMIT_UNLOCK(sc); } static void tsec_start_locked(struct ifnet *ifp) { struct tsec_softc *sc; struct mbuf *m0; struct tsec_tx_fcb *tx_fcb; int csum_flags; int start_tx; uint16_t fcb_flags; sc = ifp->if_softc; start_tx = 0; TSEC_TRANSMIT_LOCK_ASSERT(sc); if (sc->tsec_link == 0) return; bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (;;) { if (TSEC_FREE_TX_DESC(sc) < TSEC_TX_MAX_DMA_SEGS) { /* No free descriptors */ ifp->if_drv_flags |= IFF_DRV_OACTIVE; break; } /* Get packet from the queue */ IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; /* Insert TCP/IP Off-load frame control block */ fcb_flags = 0; csum_flags = m0->m_pkthdr.csum_flags; if (csum_flags) { M_PREPEND(m0, sizeof(struct tsec_tx_fcb), M_NOWAIT); if (m0 == NULL) break; if (csum_flags & CSUM_IP) fcb_flags |= TSEC_TX_FCB_IP4 | TSEC_TX_FCB_CSUM_IP; if (csum_flags & CSUM_TCP) fcb_flags |= TSEC_TX_FCB_TCP | TSEC_TX_FCB_CSUM_TCP_UDP; if (csum_flags & CSUM_UDP) fcb_flags |= TSEC_TX_FCB_UDP | TSEC_TX_FCB_CSUM_TCP_UDP; tx_fcb = mtod(m0, struct tsec_tx_fcb *); tx_fcb->flags = fcb_flags; tx_fcb->l3_offset = ETHER_HDR_LEN; tx_fcb->l4_offset = sizeof(struct ip); } tsec_encap(ifp, sc, m0, fcb_flags, &start_tx); } bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (start_tx) { /* Enable transmitter and watchdog timer */ TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); sc->tsec_watchdog = 5; } } static void tsec_encap(struct ifnet *ifp, struct tsec_softc *sc, struct mbuf *m0, uint16_t fcb_flags, int *start_tx) { bus_dma_segment_t segs[TSEC_TX_MAX_DMA_SEGS]; int error, i, nsegs; struct tsec_bufmap *tx_bufmap; uint32_t tx_idx; uint16_t flags; TSEC_TRANSMIT_LOCK_ASSERT(sc); tx_idx = sc->tx_idx_head; tx_bufmap = &sc->tx_bufmap[tx_idx]; /* Create mapping in DMA memory */ error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, tx_bufmap->map, m0, segs, &nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { /* Too many segments! Defrag and try again. */ struct mbuf *m = m_defrag(m0, M_NOWAIT); if (m == NULL) { m_freem(m0); return; } m0 = m; error = bus_dmamap_load_mbuf_sg(sc->tsec_tx_mtag, tx_bufmap->map, m0, segs, &nsegs, BUS_DMA_NOWAIT); } if (error != 0) { /* Give up. */ m_freem(m0); return; } bus_dmamap_sync(sc->tsec_tx_mtag, tx_bufmap->map, BUS_DMASYNC_PREWRITE); tx_bufmap->mbuf = m0; /* * Fill in the TX descriptors back to front so that READY bit in first * descriptor is set last. */ tx_idx = (tx_idx + (uint32_t)nsegs) & (TSEC_TX_NUM_DESC - 1); sc->tx_idx_head = tx_idx; flags = TSEC_TXBD_L | TSEC_TXBD_I | TSEC_TXBD_R | TSEC_TXBD_TC; for (i = nsegs - 1; i >= 0; i--) { struct tsec_desc *tx_desc; tx_idx = (tx_idx - 1) & (TSEC_TX_NUM_DESC - 1); tx_desc = &sc->tsec_tx_vaddr[tx_idx]; tx_desc->length = segs[i].ds_len; tx_desc->bufptr = segs[i].ds_addr; if (i == 0) { wmb(); if (fcb_flags != 0) flags |= TSEC_TXBD_TOE; } /* * Set flags: * - wrap * - checksum * - ready to send * - transmit the CRC sequence after the last data byte * - interrupt after the last buffer */ tx_desc->flags = (tx_idx == (TSEC_TX_NUM_DESC - 1) ? TSEC_TXBD_W : 0) | flags; flags &= ~(TSEC_TXBD_L | TSEC_TXBD_I); } BPF_MTAP(ifp, m0); *start_tx = 1; } static void tsec_setfilter(struct tsec_softc *sc) { struct ifnet *ifp; uint32_t flags; ifp = sc->tsec_ifp; flags = TSEC_READ(sc, TSEC_REG_RCTRL); /* Promiscuous mode */ if (ifp->if_flags & IFF_PROMISC) flags |= TSEC_RCTRL_PROM; else flags &= ~TSEC_RCTRL_PROM; TSEC_WRITE(sc, TSEC_REG_RCTRL, flags); } #ifdef DEVICE_POLLING static poll_handler_t tsec_poll; static int tsec_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) { uint32_t ie; struct tsec_softc *sc = ifp->if_softc; int rx_npkts; rx_npkts = 0; TSEC_GLOBAL_LOCK(sc); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { TSEC_GLOBAL_UNLOCK(sc); return (rx_npkts); } if (cmd == POLL_AND_CHECK_STATUS) { tsec_error_intr_locked(sc, count); /* Clear all events reported */ ie = TSEC_READ(sc, TSEC_REG_IEVENT); TSEC_WRITE(sc, TSEC_REG_IEVENT, ie); } tsec_transmit_intr_locked(sc); TSEC_GLOBAL_TO_RECEIVE_LOCK(sc); rx_npkts = tsec_receive_intr_locked(sc, count); TSEC_RECEIVE_UNLOCK(sc); return (rx_npkts); } #endif /* DEVICE_POLLING */ static int tsec_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct tsec_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int mask, error = 0; switch (command) { case SIOCSIFMTU: TSEC_GLOBAL_LOCK(sc); if (tsec_set_mtu(sc, ifr->ifr_mtu)) ifp->if_mtu = ifr->ifr_mtu; else error = EINVAL; TSEC_GLOBAL_UNLOCK(sc); break; case SIOCSIFFLAGS: TSEC_GLOBAL_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((sc->tsec_if_flags ^ ifp->if_flags) & IFF_PROMISC) tsec_setfilter(sc); if ((sc->tsec_if_flags ^ ifp->if_flags) & IFF_ALLMULTI) tsec_setup_multicast(sc); } else tsec_init_locked(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) tsec_stop(sc); sc->tsec_if_flags = ifp->if_flags; TSEC_GLOBAL_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) { TSEC_GLOBAL_LOCK(sc); tsec_setup_multicast(sc); TSEC_GLOBAL_UNLOCK(sc); } case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->tsec_mii->mii_media, command); break; case SIOCSIFCAP: mask = ifp->if_capenable ^ ifr->ifr_reqcap; if ((mask & IFCAP_HWCSUM) && sc->is_etsec) { TSEC_GLOBAL_LOCK(sc); ifp->if_capenable &= ~IFCAP_HWCSUM; ifp->if_capenable |= IFCAP_HWCSUM & ifr->ifr_reqcap; tsec_offload_setup(sc); TSEC_GLOBAL_UNLOCK(sc); } #ifdef DEVICE_POLLING if (mask & IFCAP_POLLING) { if (ifr->ifr_reqcap & IFCAP_POLLING) { error = ether_poll_register(tsec_poll, ifp); if (error) return (error); TSEC_GLOBAL_LOCK(sc); /* Disable interrupts */ tsec_intrs_ctl(sc, 0); ifp->if_capenable |= IFCAP_POLLING; TSEC_GLOBAL_UNLOCK(sc); } else { error = ether_poll_deregister(ifp); TSEC_GLOBAL_LOCK(sc); /* Enable interrupts */ tsec_intrs_ctl(sc, 1); ifp->if_capenable &= ~IFCAP_POLLING; TSEC_GLOBAL_UNLOCK(sc); } } #endif break; default: error = ether_ioctl(ifp, command, data); } /* Flush buffers if not empty */ if (ifp->if_flags & IFF_UP) tsec_start(ifp); return (error); } static int tsec_ifmedia_upd(struct ifnet *ifp) { struct tsec_softc *sc = ifp->if_softc; struct mii_data *mii; TSEC_TRANSMIT_LOCK(sc); mii = sc->tsec_mii; mii_mediachg(mii); TSEC_TRANSMIT_UNLOCK(sc); return (0); } static void tsec_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct tsec_softc *sc = ifp->if_softc; struct mii_data *mii; TSEC_TRANSMIT_LOCK(sc); mii = sc->tsec_mii; mii_pollstat(mii); ifmr->ifm_active = mii->mii_media_active; ifmr->ifm_status = mii->mii_media_status; TSEC_TRANSMIT_UNLOCK(sc); } static int tsec_new_rxbuf(bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **mbufp, uint32_t *paddr) { struct mbuf *new_mbuf; bus_dma_segment_t seg[1]; int error, nsegs; KASSERT(mbufp != NULL, ("NULL mbuf pointer!")); new_mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MCLBYTES); if (new_mbuf == NULL) return (ENOBUFS); new_mbuf->m_len = new_mbuf->m_pkthdr.len = new_mbuf->m_ext.ext_size; if (*mbufp) { bus_dmamap_sync(tag, map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(tag, map); } error = bus_dmamap_load_mbuf_sg(tag, map, new_mbuf, seg, &nsegs, BUS_DMA_NOWAIT); KASSERT(nsegs == 1, ("Too many segments returned!")); if (nsegs != 1 || error) panic("tsec_new_rxbuf(): nsegs(%d), error(%d)", nsegs, error); #if 0 if (error) { printf("tsec: bus_dmamap_load_mbuf_sg() returned: %d!\n", error); m_freem(new_mbuf); return (ENOBUFS); } #endif #if 0 KASSERT(((seg->ds_addr) & (TSEC_RXBUFFER_ALIGNMENT-1)) == 0, ("Wrong alignment of RX buffer!")); #endif bus_dmamap_sync(tag, map, BUS_DMASYNC_PREREAD); (*mbufp) = new_mbuf; (*paddr) = seg->ds_addr; return (0); } static void tsec_map_dma_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { u_int32_t *paddr; KASSERT(nseg == 1, ("wrong number of segments, should be 1")); paddr = arg; *paddr = segs->ds_addr; } static int tsec_alloc_dma_desc(device_t dev, bus_dma_tag_t *dtag, bus_dmamap_t *dmap, bus_size_t dsize, void **vaddr, void *raddr, const char *dname) { int error; /* Allocate a busdma tag and DMA safe memory for TX/RX descriptors. */ error = bus_dma_tag_create(NULL, /* parent */ PAGE_SIZE, 0, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filtfunc, filtfuncarg */ dsize, 1, /* maxsize, nsegments */ dsize, 0, /* maxsegsz, flags */ NULL, NULL, /* lockfunc, lockfuncarg */ dtag); /* dmat */ if (error) { device_printf(dev, "failed to allocate busdma %s tag\n", dname); (*vaddr) = NULL; return (ENXIO); } error = bus_dmamem_alloc(*dtag, vaddr, BUS_DMA_NOWAIT | BUS_DMA_ZERO, dmap); if (error) { device_printf(dev, "failed to allocate %s DMA safe memory\n", dname); bus_dma_tag_destroy(*dtag); (*vaddr) = NULL; return (ENXIO); } error = bus_dmamap_load(*dtag, *dmap, *vaddr, dsize, tsec_map_dma_addr, raddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "cannot get address of the %s " "descriptors\n", dname); bus_dmamem_free(*dtag, *vaddr, *dmap); bus_dma_tag_destroy(*dtag); (*vaddr) = NULL; return (ENXIO); } return (0); } static void tsec_free_dma_desc(bus_dma_tag_t dtag, bus_dmamap_t dmap, void *vaddr) { if (vaddr == NULL) return; /* Unmap descriptors from DMA memory */ bus_dmamap_sync(dtag, dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dtag, dmap); /* Free descriptors memory */ bus_dmamem_free(dtag, vaddr, dmap); /* Destroy descriptors tag */ bus_dma_tag_destroy(dtag); } static void tsec_free_dma(struct tsec_softc *sc) { int i; /* Free TX maps */ for (i = 0; i < TSEC_TX_NUM_DESC; i++) if (sc->tx_bufmap[i].map_initialized) bus_dmamap_destroy(sc->tsec_tx_mtag, sc->tx_bufmap[i].map); /* Destroy tag for TX mbufs */ bus_dma_tag_destroy(sc->tsec_tx_mtag); /* Free RX mbufs and maps */ for (i = 0; i < TSEC_RX_NUM_DESC; i++) { if (sc->rx_data[i].mbuf) { /* Unload buffer from DMA */ bus_dmamap_sync(sc->tsec_rx_mtag, sc->rx_data[i].map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->tsec_rx_mtag, sc->rx_data[i].map); /* Free buffer */ m_freem(sc->rx_data[i].mbuf); } /* Destroy map for this buffer */ if (sc->rx_data[i].map != NULL) bus_dmamap_destroy(sc->tsec_rx_mtag, sc->rx_data[i].map); } /* Destroy tag for RX mbufs */ bus_dma_tag_destroy(sc->tsec_rx_mtag); /* Unload TX/RX descriptors */ tsec_free_dma_desc(sc->tsec_tx_dtag, sc->tsec_tx_dmap, sc->tsec_tx_vaddr); tsec_free_dma_desc(sc->tsec_rx_dtag, sc->tsec_rx_dmap, sc->tsec_rx_vaddr); } static void tsec_stop(struct tsec_softc *sc) { struct ifnet *ifp; uint32_t tmpval; TSEC_GLOBAL_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; /* Disable interface and watchdog timer */ callout_stop(&sc->tsec_callout); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->tsec_watchdog = 0; /* Disable all interrupts and stop DMA */ tsec_intrs_ctl(sc, 0); tsec_dma_ctl(sc, 0); /* Remove pending data from TX queue */ while (sc->tx_idx_tail != sc->tx_idx_head) { bus_dmamap_sync(sc->tsec_tx_mtag, sc->tx_bufmap[sc->tx_idx_tail].map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->tsec_tx_mtag, sc->tx_bufmap[sc->tx_idx_tail].map); m_freem(sc->tx_bufmap[sc->tx_idx_tail].mbuf); sc->tx_idx_tail = (sc->tx_idx_tail + 1) & (TSEC_TX_NUM_DESC - 1); } /* Disable RX and TX */ tmpval = TSEC_READ(sc, TSEC_REG_MACCFG1); tmpval &= ~(TSEC_MACCFG1_RX_EN | TSEC_MACCFG1_TX_EN); TSEC_WRITE(sc, TSEC_REG_MACCFG1, tmpval); DELAY(10); } static void tsec_tick(void *arg) { struct tsec_softc *sc = arg; struct ifnet *ifp; int link; TSEC_GLOBAL_LOCK(sc); tsec_watchdog(sc); ifp = sc->tsec_ifp; link = sc->tsec_link; mii_tick(sc->tsec_mii); if (link == 0 && sc->tsec_link == 1 && (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))) tsec_start_locked(ifp); /* Schedule another timeout one second from now. */ callout_reset(&sc->tsec_callout, hz, tsec_tick, sc); TSEC_GLOBAL_UNLOCK(sc); } /* * This is the core RX routine. It replenishes mbufs in the descriptor and * sends data which have been dma'ed into host memory to upper layer. * * Loops at most count times if count is > 0, or until done if count < 0. */ static int tsec_receive_intr_locked(struct tsec_softc *sc, int count) { struct tsec_desc *rx_desc; struct ifnet *ifp; struct rx_data_type *rx_data; struct mbuf *m; uint32_t i; int c, rx_npkts; uint16_t flags; TSEC_RECEIVE_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; rx_data = sc->rx_data; rx_npkts = 0; bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); for (c = 0; ; c++) { if (count >= 0 && count-- == 0) break; rx_desc = TSEC_GET_CUR_RX_DESC(sc); flags = rx_desc->flags; /* Check if there is anything to receive */ if ((flags & TSEC_RXBD_E) || (c >= TSEC_RX_NUM_DESC)) { /* * Avoid generating another interrupt */ if (flags & TSEC_RXBD_E) TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXB | TSEC_IEVENT_RXF); /* * We didn't consume current descriptor and have to * return it to the queue */ TSEC_BACK_CUR_RX_DESC(sc); break; } if (flags & (TSEC_RXBD_LG | TSEC_RXBD_SH | TSEC_RXBD_NO | TSEC_RXBD_CR | TSEC_RXBD_OV | TSEC_RXBD_TR)) { rx_desc->length = 0; rx_desc->flags = (rx_desc->flags & ~TSEC_RXBD_ZEROONINIT) | TSEC_RXBD_E | TSEC_RXBD_I; if (sc->frame != NULL) { m_free(sc->frame); sc->frame = NULL; } continue; } /* Ok... process frame */ i = TSEC_GET_CUR_RX_DESC_CNT(sc); m = rx_data[i].mbuf; m->m_len = rx_desc->length; if (sc->frame != NULL) { if ((flags & TSEC_RXBD_L) != 0) m->m_len -= m_length(sc->frame, NULL); m->m_flags &= ~M_PKTHDR; m_cat(sc->frame, m); } else { sc->frame = m; } m = NULL; if ((flags & TSEC_RXBD_L) != 0) { m = sc->frame; sc->frame = NULL; } if (tsec_new_rxbuf(sc->tsec_rx_mtag, rx_data[i].map, &rx_data[i].mbuf, &rx_data[i].paddr)) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); /* * We ran out of mbufs; didn't consume current * descriptor and have to return it to the queue. */ TSEC_BACK_CUR_RX_DESC(sc); break; } /* Attach new buffer to descriptor and clear flags */ rx_desc->bufptr = rx_data[i].paddr; rx_desc->length = 0; rx_desc->flags = (rx_desc->flags & ~TSEC_RXBD_ZEROONINIT) | TSEC_RXBD_E | TSEC_RXBD_I; if (m != NULL) { m->m_pkthdr.rcvif = ifp; m_fixhdr(m); m_adj(m, -ETHER_CRC_LEN); if (sc->is_etsec) tsec_offload_process_frame(sc, m); TSEC_RECEIVE_UNLOCK(sc); (*ifp->if_input)(ifp, m); TSEC_RECEIVE_LOCK(sc); rx_npkts++; } } bus_dmamap_sync(sc->tsec_rx_dtag, sc->tsec_rx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * Make sure TSEC receiver is not halted. * * Various conditions can stop the TSEC receiver, but not all are * signaled and handled by error interrupt, so make sure the receiver * is running. Writing to TSEC_REG_RSTAT restarts the receiver when * halted, and is harmless if already running. */ TSEC_WRITE(sc, TSEC_REG_RSTAT, TSEC_RSTAT_QHLT); return (rx_npkts); } void tsec_receive_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_RECEIVE_LOCK(sc); #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) { TSEC_RECEIVE_UNLOCK(sc); return; } #endif /* Confirm the interrupt was received by driver */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXB | TSEC_IEVENT_RXF); tsec_receive_intr_locked(sc, -1); TSEC_RECEIVE_UNLOCK(sc); } static void tsec_transmit_intr_locked(struct tsec_softc *sc) { struct ifnet *ifp; uint32_t tx_idx; TSEC_TRANSMIT_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; /* Update collision statistics */ if_inc_counter(ifp, IFCOUNTER_COLLISIONS, TSEC_READ(sc, TSEC_REG_MON_TNCL)); /* Reset collision counters in hardware */ TSEC_WRITE(sc, TSEC_REG_MON_TSCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TMCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TLCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TXCL, 0); TSEC_WRITE(sc, TSEC_REG_MON_TNCL, 0); bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); tx_idx = sc->tx_idx_tail; while (tx_idx != sc->tx_idx_head) { struct tsec_desc *tx_desc; struct tsec_bufmap *tx_bufmap; tx_desc = &sc->tsec_tx_vaddr[tx_idx]; if (tx_desc->flags & TSEC_TXBD_R) { break; } tx_bufmap = &sc->tx_bufmap[tx_idx]; tx_idx = (tx_idx + 1) & (TSEC_TX_NUM_DESC - 1); if (tx_bufmap->mbuf == NULL) continue; /* * This is the last buf in this packet, so unmap and free it. */ bus_dmamap_sync(sc->tsec_tx_mtag, tx_bufmap->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->tsec_tx_mtag, tx_bufmap->map); m_freem(tx_bufmap->mbuf); tx_bufmap->mbuf = NULL; if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } sc->tx_idx_tail = tx_idx; bus_dmamap_sync(sc->tsec_tx_dtag, sc->tsec_tx_dmap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; tsec_start_locked(ifp); if (sc->tx_idx_tail == sc->tx_idx_head) sc->tsec_watchdog = 0; } void tsec_transmit_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_TRANSMIT_LOCK(sc); #ifdef DEVICE_POLLING if (sc->tsec_ifp->if_capenable & IFCAP_POLLING) { TSEC_TRANSMIT_UNLOCK(sc); return; } #endif /* Confirm the interrupt was received by driver */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_TXB | TSEC_IEVENT_TXF); tsec_transmit_intr_locked(sc); TSEC_TRANSMIT_UNLOCK(sc); } static void tsec_error_intr_locked(struct tsec_softc *sc, int count) { struct ifnet *ifp; uint32_t eflags; TSEC_GLOBAL_LOCK_ASSERT(sc); ifp = sc->tsec_ifp; eflags = TSEC_READ(sc, TSEC_REG_IEVENT); /* Clear events bits in hardware */ TSEC_WRITE(sc, TSEC_REG_IEVENT, TSEC_IEVENT_RXC | TSEC_IEVENT_BSY | TSEC_IEVENT_EBERR | TSEC_IEVENT_MSRO | TSEC_IEVENT_BABT | TSEC_IEVENT_TXC | TSEC_IEVENT_TXE | TSEC_IEVENT_LC | TSEC_IEVENT_CRL | TSEC_IEVENT_XFUN); /* Check transmitter errors */ if (eflags & TSEC_IEVENT_TXE) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (eflags & TSEC_IEVENT_LC) if_inc_counter(ifp, IFCOUNTER_COLLISIONS, 1); TSEC_WRITE(sc, TSEC_REG_TSTAT, TSEC_TSTAT_THLT); } /* Check for discarded frame due to a lack of buffers */ if (eflags & TSEC_IEVENT_BSY) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } if (ifp->if_flags & IFF_DEBUG) if_printf(ifp, "tsec_error_intr(): event flags: 0x%x\n", eflags); if (eflags & TSEC_IEVENT_EBERR) { if_printf(ifp, "System bus error occurred during" "DMA transaction (flags: 0x%x)\n", eflags); tsec_init_locked(sc); } if (eflags & TSEC_IEVENT_BABT) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (eflags & TSEC_IEVENT_BABR) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } void tsec_error_intr(void *arg) { struct tsec_softc *sc = arg; TSEC_GLOBAL_LOCK(sc); tsec_error_intr_locked(sc, -1); TSEC_GLOBAL_UNLOCK(sc); } int tsec_miibus_readreg(device_t dev, int phy, int reg) { struct tsec_softc *sc; int timeout; int rv; sc = device_get_softc(dev); TSEC_PHY_LOCK(); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, 0); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCOM, TSEC_MIIMCOM_READCYCLE); timeout = tsec_mii_wait(sc, TSEC_MIIMIND_NOTVALID | TSEC_MIIMIND_BUSY); rv = TSEC_PHY_READ(sc, TSEC_REG_MIIMSTAT); TSEC_PHY_UNLOCK(); if (timeout) device_printf(dev, "Timeout while reading from PHY!\n"); return (rv); } int tsec_miibus_writereg(device_t dev, int phy, int reg, int value) { struct tsec_softc *sc; int timeout; sc = device_get_softc(dev); TSEC_PHY_LOCK(); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMADD, (phy << 8) | reg); TSEC_PHY_WRITE(sc, TSEC_REG_MIIMCON, value); timeout = tsec_mii_wait(sc, TSEC_MIIMIND_BUSY); TSEC_PHY_UNLOCK(); if (timeout) device_printf(dev, "Timeout while writing to PHY!\n"); return (0); } void tsec_miibus_statchg(device_t dev) { struct tsec_softc *sc; struct mii_data *mii; uint32_t ecntrl, id, tmp; int link; sc = device_get_softc(dev); mii = sc->tsec_mii; link = ((mii->mii_media_status & IFM_ACTIVE) ? 1 : 0); tmp = TSEC_READ(sc, TSEC_REG_MACCFG2) & ~TSEC_MACCFG2_IF; if ((mii->mii_media_active & IFM_GMASK) == IFM_FDX) tmp |= TSEC_MACCFG2_FULLDUPLEX; else tmp &= ~TSEC_MACCFG2_FULLDUPLEX; switch (IFM_SUBTYPE(mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: tmp |= TSEC_MACCFG2_GMII; sc->tsec_link = link; break; case IFM_100_TX: case IFM_10_T: tmp |= TSEC_MACCFG2_MII; sc->tsec_link = link; break; case IFM_NONE: if (link) device_printf(dev, "No speed selected but link " "active!\n"); sc->tsec_link = 0; return; default: sc->tsec_link = 0; device_printf(dev, "Unknown speed (%d), link %s!\n", IFM_SUBTYPE(mii->mii_media_active), ((link) ? "up" : "down")); return; } TSEC_WRITE(sc, TSEC_REG_MACCFG2, tmp); /* XXX kludge - use circumstantial evidence for reduced mode. */ id = TSEC_READ(sc, TSEC_REG_ID2); if (id & 0xffff) { ecntrl = TSEC_READ(sc, TSEC_REG_ECNTRL) & ~TSEC_ECNTRL_R100M; ecntrl |= (tmp & TSEC_MACCFG2_MII) ? TSEC_ECNTRL_R100M : 0; TSEC_WRITE(sc, TSEC_REG_ECNTRL, ecntrl); } } static void tsec_add_sysctls(struct tsec_softc *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(sc->dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); tree = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "int_coal", CTLFLAG_RD, 0, "TSEC Interrupts coalescing"); children = SYSCTL_CHILDREN(tree); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rx_time", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_RX, tsec_sysctl_ic_time, "I", "IC RX time threshold (0-65535)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rx_count", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_RX, tsec_sysctl_ic_count, "I", "IC RX frame count threshold (0-255)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_time", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_TX, tsec_sysctl_ic_time, "I", "IC TX time threshold (0-65535)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_count", CTLTYPE_UINT | CTLFLAG_RW, sc, TSEC_IC_TX, tsec_sysctl_ic_count, "I", "IC TX frame count threshold (0-255)"); } /* * With Interrupt Coalescing (IC) active, a transmit/receive frame * interrupt is raised either upon: * * - threshold-defined period of time elapsed, or * - threshold-defined number of frames is received/transmitted, * whichever occurs first. * * The following sysctls regulate IC behaviour (for TX/RX separately): * * dev.tsec..int_coal.rx_time * dev.tsec..int_coal.rx_count * dev.tsec..int_coal.tx_time * dev.tsec..int_coal.tx_count * * Values: * * - 0 for either time or count disables IC on the given TX/RX path * * - count: 1-255 (expresses frame count number; note that value of 1 is * effectively IC off) * * - time: 1-65535 (value corresponds to a real time period and is * expressed in units equivalent to 64 TSEC interface clocks, i.e. one timer * threshold unit is 26.5 us, 2.56 us, or 512 ns, corresponding to 10 Mbps, * 100 Mbps, or 1Gbps, respectively. For detailed discussion consult the * TSEC reference manual. */ static int tsec_sysctl_ic_time(SYSCTL_HANDLER_ARGS) { int error; uint32_t time; struct tsec_softc *sc = (struct tsec_softc *)arg1; time = (arg2 == TSEC_IC_RX) ? sc->rx_ic_time : sc->tx_ic_time; error = sysctl_handle_int(oidp, &time, 0, req); if (error != 0) return (error); if (time > 65535) return (EINVAL); TSEC_IC_LOCK(sc); if (arg2 == TSEC_IC_RX) { sc->rx_ic_time = time; tsec_set_rxic(sc); } else { sc->tx_ic_time = time; tsec_set_txic(sc); } TSEC_IC_UNLOCK(sc); return (0); } static int tsec_sysctl_ic_count(SYSCTL_HANDLER_ARGS) { int error; uint32_t count; struct tsec_softc *sc = (struct tsec_softc *)arg1; count = (arg2 == TSEC_IC_RX) ? sc->rx_ic_count : sc->tx_ic_count; error = sysctl_handle_int(oidp, &count, 0, req); if (error != 0) return (error); if (count > 255) return (EINVAL); TSEC_IC_LOCK(sc); if (arg2 == TSEC_IC_RX) { sc->rx_ic_count = count; tsec_set_rxic(sc); } else { sc->tx_ic_count = count; tsec_set_txic(sc); } TSEC_IC_UNLOCK(sc); return (0); } static void tsec_set_rxic(struct tsec_softc *sc) { uint32_t rxic_val; if (sc->rx_ic_count == 0 || sc->rx_ic_time == 0) /* Disable RX IC */ rxic_val = 0; else { rxic_val = 0x80000000; rxic_val |= (sc->rx_ic_count << 21); rxic_val |= sc->rx_ic_time; } TSEC_WRITE(sc, TSEC_REG_RXIC, rxic_val); } static void tsec_set_txic(struct tsec_softc *sc) { uint32_t txic_val; if (sc->tx_ic_count == 0 || sc->tx_ic_time == 0) /* Disable TX IC */ txic_val = 0; else { txic_val = 0x80000000; txic_val |= (sc->tx_ic_count << 21); txic_val |= sc->tx_ic_time; } TSEC_WRITE(sc, TSEC_REG_TXIC, txic_val); } static void tsec_offload_setup(struct tsec_softc *sc) { struct ifnet *ifp = sc->tsec_ifp; uint32_t reg; TSEC_GLOBAL_LOCK_ASSERT(sc); reg = TSEC_READ(sc, TSEC_REG_TCTRL); reg |= TSEC_TCTRL_IPCSEN | TSEC_TCTRL_TUCSEN; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = TSEC_CHECKSUM_FEATURES; else ifp->if_hwassist = 0; TSEC_WRITE(sc, TSEC_REG_TCTRL, reg); reg = TSEC_READ(sc, TSEC_REG_RCTRL); reg &= ~(TSEC_RCTRL_IPCSEN | TSEC_RCTRL_TUCSEN | TSEC_RCTRL_PRSDEP); reg |= TSEC_RCTRL_PRSDEP_PARSE_L2 | TSEC_RCTRL_VLEX; if (ifp->if_capenable & IFCAP_RXCSUM) reg |= TSEC_RCTRL_IPCSEN | TSEC_RCTRL_TUCSEN | TSEC_RCTRL_PRSDEP_PARSE_L234; TSEC_WRITE(sc, TSEC_REG_RCTRL, reg); } static void tsec_offload_process_frame(struct tsec_softc *sc, struct mbuf *m) { struct tsec_rx_fcb rx_fcb; int csum_flags = 0; int protocol, flags; TSEC_RECEIVE_LOCK_ASSERT(sc); m_copydata(m, 0, sizeof(struct tsec_rx_fcb), (caddr_t)(&rx_fcb)); flags = rx_fcb.flags; protocol = rx_fcb.protocol; if (TSEC_RX_FCB_IP_CSUM_CHECKED(flags)) { csum_flags |= CSUM_IP_CHECKED; if ((flags & TSEC_RX_FCB_IP_CSUM_ERROR) == 0) csum_flags |= CSUM_IP_VALID; } if ((protocol == IPPROTO_TCP || protocol == IPPROTO_UDP) && TSEC_RX_FCB_TCP_UDP_CSUM_CHECKED(flags) && (flags & TSEC_RX_FCB_TCP_UDP_CSUM_ERROR) == 0) { csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; } m->m_pkthdr.csum_flags = csum_flags; if (flags & TSEC_RX_FCB_VLAN) { m->m_pkthdr.ether_vtag = rx_fcb.vlan; m->m_flags |= M_VLANTAG; } m_adj(m, sizeof(struct tsec_rx_fcb)); } static void tsec_setup_multicast(struct tsec_softc *sc) { uint32_t hashtable[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; struct ifnet *ifp = sc->tsec_ifp; struct ifmultiaddr *ifma; uint32_t h; int i; TSEC_GLOBAL_LOCK_ASSERT(sc); if (ifp->if_flags & IFF_ALLMULTI) { for (i = 0; i < 8; i++) TSEC_WRITE(sc, TSEC_REG_GADDR(i), 0xFFFFFFFF); return; } if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; h = (ether_crc32_be(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), ETHER_ADDR_LEN) >> 24) & 0xFF; hashtable[(h >> 5)] |= 1 << (0x1F - (h & 0x1F)); } if_maddr_runlock(ifp); for (i = 0; i < 8; i++) TSEC_WRITE(sc, TSEC_REG_GADDR(i), hashtable[i]); } static int tsec_set_mtu(struct tsec_softc *sc, unsigned int mtu) { mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; TSEC_GLOBAL_LOCK_ASSERT(sc); if (mtu >= TSEC_MIN_FRAME_SIZE && mtu <= TSEC_MAX_FRAME_SIZE) { TSEC_WRITE(sc, TSEC_REG_MAXFRM, mtu); return (mtu); } return (0); } Index: head/sys/dev/xdma/xdma.c =================================================================== --- head/sys/dev/xdma/xdma.c (revision 320527) +++ head/sys/dev/xdma/xdma.c (revision 320528) @@ -1,674 +1,673 @@ /*- * Copyright (c) 2016 Ruslan Bukin * All rights reserved. * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237 * ("CTSRD"), as part of the DARPA CRASH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #ifdef FDT #include #include #include #endif #include #include MALLOC_DEFINE(M_XDMA, "xdma", "xDMA framework"); /* * Multiple xDMA controllers may work with single DMA device, * so we have global lock for physical channel management. */ static struct mtx xdma_mtx; #define XDMA_LOCK() mtx_lock(&xdma_mtx) #define XDMA_UNLOCK() mtx_unlock(&xdma_mtx) #define XDMA_ASSERT_LOCKED() mtx_assert(&xdma_mtx, MA_OWNED) /* * Per channel locks. */ #define XCHAN_LOCK(xchan) mtx_lock(&(xchan)->mtx_lock) #define XCHAN_UNLOCK(xchan) mtx_unlock(&(xchan)->mtx_lock) #define XCHAN_ASSERT_LOCKED(xchan) mtx_assert(&(xchan)->mtx_lock, MA_OWNED) /* * Allocate virtual xDMA channel. */ xdma_channel_t * xdma_channel_alloc(xdma_controller_t *xdma) { xdma_channel_t *xchan; int ret; xchan = malloc(sizeof(xdma_channel_t), M_XDMA, M_WAITOK | M_ZERO); if (xchan == NULL) { device_printf(xdma->dev, "%s: Can't allocate memory for channel.\n", __func__); return (NULL); } xchan->xdma = xdma; XDMA_LOCK(); /* Request a real channel from hardware driver. */ ret = XDMA_CHANNEL_ALLOC(xdma->dma_dev, xchan); if (ret != 0) { device_printf(xdma->dev, "%s: Can't request hardware channel.\n", __func__); XDMA_UNLOCK(); free(xchan, M_XDMA); return (NULL); } TAILQ_INIT(&xchan->ie_handlers); mtx_init(&xchan->mtx_lock, "xDMA", NULL, MTX_DEF); TAILQ_INSERT_TAIL(&xdma->channels, xchan, xchan_next); XDMA_UNLOCK(); return (xchan); } int xdma_channel_free(xdma_channel_t *xchan) { xdma_controller_t *xdma; int err; xdma = xchan->xdma; XDMA_LOCK(); /* Free the real DMA channel. */ err = XDMA_CHANNEL_FREE(xdma->dma_dev, xchan); if (err != 0) { device_printf(xdma->dev, "%s: Can't free real hw channel.\n", __func__); XDMA_UNLOCK(); return (-1); } xdma_teardown_all_intr(xchan); /* Deallocate descriptors, if any. */ xdma_desc_free(xchan); mtx_destroy(&xchan->mtx_lock); TAILQ_REMOVE(&xdma->channels, xchan, xchan_next); free(xchan, M_XDMA); XDMA_UNLOCK(); return (0); } int xdma_setup_intr(xdma_channel_t *xchan, int (*cb)(void *), void *arg, void **ihandler) { struct xdma_intr_handler *ih; xdma_controller_t *xdma; xdma = xchan->xdma; KASSERT(xdma != NULL, ("xdma is NULL")); /* Sanity check. */ if (cb == NULL) { device_printf(xdma->dev, "%s: Can't setup interrupt handler.\n", __func__); return (-1); } ih = malloc(sizeof(struct xdma_intr_handler), M_XDMA, M_WAITOK | M_ZERO); if (ih == NULL) { device_printf(xdma->dev, "%s: Can't allocate memory for interrupt handler.\n", __func__); return (-1); } ih->cb = cb; ih->cb_user = arg; TAILQ_INSERT_TAIL(&xchan->ie_handlers, ih, ih_next); if (ihandler != NULL) { *ihandler = ih; } return (0); } int xdma_teardown_intr(xdma_channel_t *xchan, struct xdma_intr_handler *ih) { xdma_controller_t *xdma; xdma = xchan->xdma; KASSERT(xdma != NULL, ("xdma is NULL")); /* Sanity check. */ if (ih == NULL) { device_printf(xdma->dev, "%s: Can't teardown interrupt.\n", __func__); return (-1); } TAILQ_REMOVE(&xchan->ie_handlers, ih, ih_next); free(ih, M_XDMA); return (0); } int xdma_teardown_all_intr(xdma_channel_t *xchan) { struct xdma_intr_handler *ih_tmp; struct xdma_intr_handler *ih; xdma_controller_t *xdma; xdma = xchan->xdma; KASSERT(xdma != NULL, ("xdma is NULL")); TAILQ_FOREACH_SAFE(ih, &xchan->ie_handlers, ih_next, ih_tmp) { TAILQ_REMOVE(&xchan->ie_handlers, ih, ih_next); free(ih, M_XDMA); } return (0); } static void xdma_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { xdma_channel_t *xchan; int i; xchan = (xdma_channel_t *)arg; KASSERT(xchan != NULL, ("xchan is NULL")); if (err) { xchan->map_err = 1; return; } for (i = 0; i < nseg; i++) { xchan->descs_phys[i].ds_addr = segs[i].ds_addr; xchan->descs_phys[i].ds_len = segs[i].ds_len; } } static int xdma_desc_alloc_bus_dma(xdma_channel_t *xchan, uint32_t desc_size, uint32_t align) { xdma_controller_t *xdma; bus_size_t all_desc_sz; xdma_config_t *conf; int nsegments; int err; xdma = xchan->xdma; conf = &xchan->conf; nsegments = conf->block_num; all_desc_sz = (nsegments * desc_size); err = bus_dma_tag_create( bus_get_dma_tag(xdma->dev), align, desc_size, /* alignment, boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ all_desc_sz, nsegments, /* maxsize, nsegments*/ desc_size, 0, /* maxsegsize, flags */ NULL, NULL, /* lockfunc, lockarg */ &xchan->dma_tag); if (err) { device_printf(xdma->dev, "%s: Can't create bus_dma tag.\n", __func__); return (-1); } err = bus_dmamem_alloc(xchan->dma_tag, (void **)&xchan->descs, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &xchan->dma_map); if (err) { device_printf(xdma->dev, "%s: Can't allocate memory for descriptors.\n", __func__); return (-1); } xchan->descs_phys = malloc(nsegments * sizeof(xdma_descriptor_t), M_XDMA, (M_WAITOK | M_ZERO)); xchan->map_err = 0; err = bus_dmamap_load(xchan->dma_tag, xchan->dma_map, xchan->descs, all_desc_sz, xdma_dmamap_cb, xchan, BUS_DMA_WAITOK); if (err) { device_printf(xdma->dev, "%s: Can't load DMA map.\n", __func__); return (-1); } if (xchan->map_err != 0) { device_printf(xdma->dev, "%s: Can't load DMA map.\n", __func__); return (-1); } return (0); } /* * This function called by DMA controller driver. */ int xdma_desc_alloc(xdma_channel_t *xchan, uint32_t desc_size, uint32_t align) { xdma_controller_t *xdma; xdma_config_t *conf; int ret; XCHAN_ASSERT_LOCKED(xchan); xdma = xchan->xdma; if (xdma == NULL) { device_printf(xdma->dev, "%s: Channel was not allocated properly.\n", __func__); return (-1); } if (xchan->flags & XCHAN_DESC_ALLOCATED) { device_printf(xdma->dev, "%s: Descriptors already allocated.\n", __func__); return (-1); } if ((xchan->flags & XCHAN_CONFIGURED) == 0) { device_printf(xdma->dev, "%s: Channel has no configuration.\n", __func__); return (-1); } conf = &xchan->conf; XCHAN_UNLOCK(xchan); ret = xdma_desc_alloc_bus_dma(xchan, desc_size, align); XCHAN_LOCK(xchan); if (ret != 0) { device_printf(xdma->dev, "%s: Can't allocate memory for descriptors.\n", __func__); return (-1); } xchan->flags |= XCHAN_DESC_ALLOCATED; /* We are going to write to descriptors. */ bus_dmamap_sync(xchan->dma_tag, xchan->dma_map, BUS_DMASYNC_PREWRITE); return (0); } int xdma_desc_free(xdma_channel_t *xchan) { if ((xchan->flags & XCHAN_DESC_ALLOCATED) == 0) { /* No descriptors allocated. */ return (-1); } bus_dmamap_unload(xchan->dma_tag, xchan->dma_map); bus_dmamem_free(xchan->dma_tag, xchan->descs, xchan->dma_map); bus_dma_tag_destroy(xchan->dma_tag); free(xchan->descs_phys, M_XDMA); xchan->flags &= ~(XCHAN_DESC_ALLOCATED); return (0); } int xdma_prep_memcpy(xdma_channel_t *xchan, uintptr_t src_addr, uintptr_t dst_addr, size_t len) { xdma_controller_t *xdma; xdma_config_t *conf; int ret; xdma = xchan->xdma; KASSERT(xdma != NULL, ("xdma is NULL")); conf = &xchan->conf; conf->direction = XDMA_MEM_TO_MEM; conf->src_addr = src_addr; conf->dst_addr = dst_addr; conf->block_len = len; conf->block_num = 1; xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_MEMCPY); XCHAN_LOCK(xchan); /* Deallocate old descriptors, if any. */ xdma_desc_free(xchan); ret = XDMA_CHANNEL_PREP_MEMCPY(xdma->dma_dev, xchan); if (ret != 0) { device_printf(xdma->dev, "%s: Can't prepare memcpy transfer.\n", __func__); XCHAN_UNLOCK(xchan); return (-1); } if (xchan->flags & XCHAN_DESC_ALLOCATED) { /* Driver created xDMA descriptors. */ bus_dmamap_sync(xchan->dma_tag, xchan->dma_map, BUS_DMASYNC_POSTWRITE); } XCHAN_UNLOCK(xchan); return (0); } int xdma_prep_cyclic(xdma_channel_t *xchan, enum xdma_direction dir, uintptr_t src_addr, uintptr_t dst_addr, int block_len, int block_num, int src_width, int dst_width) { xdma_controller_t *xdma; xdma_config_t *conf; int ret; xdma = xchan->xdma; KASSERT(xdma != NULL, ("xdma is NULL")); conf = &xchan->conf; conf->direction = dir; conf->src_addr = src_addr; conf->dst_addr = dst_addr; conf->block_len = block_len; conf->block_num = block_num; conf->src_width = src_width; conf->dst_width = dst_width; xchan->flags |= (XCHAN_CONFIGURED | XCHAN_TYPE_CYCLIC); XCHAN_LOCK(xchan); /* Deallocate old descriptors, if any. */ xdma_desc_free(xchan); ret = XDMA_CHANNEL_PREP_CYCLIC(xdma->dma_dev, xchan); if (ret != 0) { device_printf(xdma->dev, "%s: Can't prepare cyclic transfer.\n", __func__); XCHAN_UNLOCK(xchan); return (-1); } if (xchan->flags & XCHAN_DESC_ALLOCATED) { /* Driver has created xDMA descriptors. */ bus_dmamap_sync(xchan->dma_tag, xchan->dma_map, BUS_DMASYNC_POSTWRITE); } XCHAN_UNLOCK(xchan); return (0); } int xdma_begin(xdma_channel_t *xchan) { xdma_controller_t *xdma; int ret; xdma = xchan->xdma; ret = XDMA_CHANNEL_CONTROL(xdma->dma_dev, xchan, XDMA_CMD_BEGIN); if (ret != 0) { device_printf(xdma->dev, "%s: Can't begin the channel operation.\n", __func__); return (-1); } return (0); } int xdma_terminate(xdma_channel_t *xchan) { xdma_controller_t *xdma; int ret; xdma = xchan->xdma; ret = XDMA_CHANNEL_CONTROL(xdma->dma_dev, xchan, XDMA_CMD_TERMINATE); if (ret != 0) { device_printf(xdma->dev, "%s: Can't terminate the channel operation.\n", __func__); return (-1); } return (0); } int xdma_pause(xdma_channel_t *xchan) { xdma_controller_t *xdma; int ret; xdma = xchan->xdma; ret = XDMA_CHANNEL_CONTROL(xdma->dma_dev, xchan, XDMA_CMD_PAUSE); if (ret != 0) { device_printf(xdma->dev, "%s: Can't pause the channel operation.\n", __func__); return (-1); } return (ret); } int xdma_callback(xdma_channel_t *xchan) { struct xdma_intr_handler *ih_tmp; struct xdma_intr_handler *ih; TAILQ_FOREACH_SAFE(ih, &xchan->ie_handlers, ih_next, ih_tmp) { if (ih->cb != NULL) { ih->cb(ih->cb_user); } } return (0); } void xdma_assert_locked(void) { XDMA_ASSERT_LOCKED(); } #ifdef FDT /* * Notify the DMA driver we have machine-dependent data in FDT. */ static int xdma_ofw_md_data(xdma_controller_t *xdma, pcell_t *cells, int ncells) { uint32_t ret; ret = XDMA_OFW_MD_DATA(xdma->dma_dev, cells, ncells, (void **)&xdma->data); return (ret); } /* * Allocate xdma controller. */ xdma_controller_t * xdma_ofw_get(device_t dev, const char *prop) { phandle_t node, parent; xdma_controller_t *xdma; device_t dma_dev; pcell_t *cells; int ncells; int error; int ndmas; int idx; node = ofw_bus_get_node(dev); if (node <= 0) { device_printf(dev, "%s called on not ofw based device.\n", __func__); } error = ofw_bus_parse_xref_list_get_length(node, "dmas", "#dma-cells", &ndmas); if (error) { device_printf(dev, "%s can't get dmas list.\n", __func__); return (NULL); } if (ndmas == 0) { device_printf(dev, "%s dmas list is empty.\n", __func__); return (NULL); } error = ofw_bus_find_string_index(node, "dma-names", prop, &idx); if (error != 0) { device_printf(dev, "%s can't find string index.\n", __func__); return (NULL); } error = ofw_bus_parse_xref_list_alloc(node, "dmas", "#dma-cells", idx, &parent, &ncells, &cells); if (error != 0) { device_printf(dev, "%s can't get dma device xref.\n", __func__); return (NULL); } dma_dev = OF_device_from_xref(parent); if (dma_dev == NULL) { device_printf(dev, "%s can't get dma device.\n", __func__); return (NULL); } xdma = malloc(sizeof(struct xdma_controller), M_XDMA, M_WAITOK | M_ZERO); if (xdma == NULL) { device_printf(dev, "%s can't allocate memory for xdma.\n", __func__); return (NULL); } xdma->dev = dev; xdma->dma_dev = dma_dev; TAILQ_INIT(&xdma->channels); xdma_ofw_md_data(xdma, cells, ncells); free(cells, M_OFWPROP); return (xdma); } #endif /* * Free xDMA controller object. */ int xdma_put(xdma_controller_t *xdma) { XDMA_LOCK(); /* Ensure no channels allocated. */ if (!TAILQ_EMPTY(&xdma->channels)) { device_printf(xdma->dev, "%s: Can't free xDMA\n", __func__); return (-1); } free(xdma->data, M_DEVBUF); free(xdma, M_XDMA); XDMA_UNLOCK(); return (0); } static void xdma_init(void) { mtx_init(&xdma_mtx, "xDMA", NULL, MTX_DEF); } SYSINIT(xdma, SI_SUB_DRIVERS, SI_ORDER_FIRST, xdma_init, NULL); Index: head/sys/dev/xen/blkfront/blkfront.c =================================================================== --- head/sys/dev/xen/blkfront/blkfront.c (revision 320527) +++ head/sys/dev/xen/blkfront/blkfront.c (revision 320528) @@ -1,1616 +1,1615 @@ /* * XenBSD block device driver * * Copyright (c) 2010-2013 Spectra Logic Corporation * Copyright (c) 2009 Scott Long, Yahoo! * Copyright (c) 2009 Frank Suchomel, Citrix * Copyright (c) 2009 Doug F. Rabson, Citrix * Copyright (c) 2005 Kip Macy * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /*--------------------------- Forward Declarations ---------------------------*/ static void xbd_closing(device_t); static void xbd_startio(struct xbd_softc *sc); /*---------------------------------- Macros ----------------------------------*/ #if 0 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) #else #define DPRINTK(fmt, args...) #endif #define XBD_SECTOR_SHFT 9 /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); static int xbd_enable_indirect = 1; SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, &xbd_enable_indirect, 0, "Enable xbd indirect segments"); /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0) return; sc->xbd_flags |= xbd_flag; sc->xbd_qfrozen_cnt++; } static void xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) { if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0) return; if (sc->xbd_qfrozen_cnt == 0) panic("%s: Thaw with flag 0x%x while not frozen.", __func__, xbd_flag); sc->xbd_flags &= ~xbd_flag; sc->xbd_qfrozen_cnt--; } static void xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) { if ((cm->cm_flags & XBDCF_FROZEN) != 0) return; cm->cm_flags |= XBDCF_FROZEN|cm_flag; xbd_freeze(sc, XBDF_NONE); } static void xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) { if ((cm->cm_flags & XBDCF_FROZEN) == 0) return; cm->cm_flags &= ~XBDCF_FROZEN; xbd_thaw(sc, XBDF_NONE); } static inline void xbd_flush_requests(struct xbd_softc *sc) { int notify; RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify); if (notify) xen_intr_signal(sc->xen_intr_handle); } static void xbd_free_command(struct xbd_command *cm) { KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE, ("Freeing command that is still on queue %d.", cm->cm_flags & XBDCF_Q_MASK)); cm->cm_flags = XBDCF_INITIALIZER; cm->cm_bp = NULL; cm->cm_complete = NULL; xbd_enqueue_cm(cm, XBD_Q_FREE); xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE); } static void xbd_mksegarray(bus_dma_segment_t *segs, int nsegs, grant_ref_t * gref_head, int otherend_id, int readonly, grant_ref_t * sg_ref, struct blkif_request_segment *sg) { struct blkif_request_segment *last_block_sg = sg + nsegs; vm_paddr_t buffer_ma; uint64_t fsect, lsect; int ref; while (sg < last_block_sg) { KASSERT(segs->ds_addr % (1 << XBD_SECTOR_SHFT) == 0, ("XEN disk driver I/O must be sector aligned")); KASSERT(segs->ds_len % (1 << XBD_SECTOR_SHFT) == 0, ("XEN disk driver I/Os must be a multiple of " "the sector length")); buffer_ma = segs->ds_addr; fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; lsect = fsect + (segs->ds_len >> XBD_SECTOR_SHFT) - 1; KASSERT(lsect <= 7, ("XEN disk driver data cannot " "cross a page boundary")); /* install a grant reference. */ ref = gnttab_claim_grant_reference(gref_head); /* * GNTTAB_LIST_END == 0xffffffff, but it is private * to gnttab.c. */ KASSERT(ref != ~0, ("grant_reference failed")); gnttab_grant_foreign_access_ref( ref, otherend_id, buffer_ma >> PAGE_SHIFT, readonly); *sg_ref = ref; *sg = (struct blkif_request_segment) { .gref = ref, .first_sect = fsect, .last_sect = lsect }; sg++; sg_ref++; segs++; } } static void xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct xbd_softc *sc; struct xbd_command *cm; int op; cm = arg; sc = cm->cm_sc; if (error) { cm->cm_bp->bio_error = EIO; biodone(cm->cm_bp); xbd_free_command(cm); return; } KASSERT(nsegs <= sc->xbd_max_request_segments, ("Too many segments in a blkfront I/O")); if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { blkif_request_t *ring_req; /* Fill out a blkif_request_t structure. */ ring_req = (blkif_request_t *) RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; ring_req->id = cm->cm_id; ring_req->operation = cm->cm_operation; ring_req->sector_number = cm->cm_sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; ring_req->nr_segments = nsegs; cm->cm_nseg = nsegs; xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, xenbus_get_otherend_id(sc->xbd_dev), cm->cm_operation == BLKIF_OP_WRITE, cm->cm_sg_refs, ring_req->seg); } else { blkif_request_indirect_t *ring_req; /* Fill out a blkif_request_indirect_t structure. */ ring_req = (blkif_request_indirect_t *) RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); sc->xbd_ring.req_prod_pvt++; ring_req->id = cm->cm_id; ring_req->operation = BLKIF_OP_INDIRECT; ring_req->indirect_op = cm->cm_operation; ring_req->sector_number = cm->cm_sector_number; ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; ring_req->nr_segments = nsegs; cm->cm_nseg = nsegs; xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, xenbus_get_otherend_id(sc->xbd_dev), cm->cm_operation == BLKIF_OP_WRITE, cm->cm_sg_refs, cm->cm_indirectionpages); memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); } if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; else if (cm->cm_operation == BLKIF_OP_WRITE) op = BUS_DMASYNC_PREWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); gnttab_free_grant_references(cm->cm_gref_head); xbd_enqueue_cm(cm, XBD_Q_BUSY); /* * If bus dma had to asynchronously call us back to dispatch * this command, we are no longer executing in the context of * xbd_startio(). Thus we cannot rely on xbd_startio()'s call to * xbd_flush_requests() to publish this command to the backend * along with any other commands that it could batch. */ if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0) xbd_flush_requests(sc); return; } static int xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) { int error; if (cm->cm_bp != NULL) error = bus_dmamap_load_bio(sc->xbd_io_dmat, cm->cm_map, cm->cm_bp, xbd_queue_cb, cm, 0); else error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data, cm->cm_datalen, xbd_queue_cb, cm, 0); if (error == EINPROGRESS) { /* * Maintain queuing order by freezing the queue. The next * command may not require as many resources as the command * we just attempted to map, so we can't rely on bus dma * blocking for it too. */ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); return (0); } return (error); } static void xbd_restart_queue_callback(void *arg) { struct xbd_softc *sc = arg; mtx_lock(&sc->xbd_io_lock); xbd_thaw(sc, XBDF_GNT_SHORTAGE); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); } static struct xbd_command * xbd_bio_command(struct xbd_softc *sc) { struct xbd_command *cm; struct bio *bp; if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED)) return (NULL); bp = xbd_dequeue_bio(sc); if (bp == NULL) return (NULL); if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) { xbd_freeze(sc, XBDF_CM_SHORTAGE); xbd_requeue_bio(sc, bp); return (NULL); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { gnttab_request_free_callback(&sc->xbd_callback, xbd_restart_queue_callback, sc, sc->xbd_max_request_segments); xbd_freeze(sc, XBDF_GNT_SHORTAGE); xbd_requeue_bio(sc, bp); xbd_enqueue_cm(cm, XBD_Q_FREE); return (NULL); } cm->cm_bp = bp; cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; switch (bp->bio_cmd) { case BIO_READ: cm->cm_operation = BLKIF_OP_READ; break; case BIO_WRITE: cm->cm_operation = BLKIF_OP_WRITE; if ((bp->bio_flags & BIO_ORDERED) != 0) { if ((sc->xbd_flags & XBDF_BARRIER) != 0) { cm->cm_operation = BLKIF_OP_WRITE_BARRIER; } else { /* * Single step this command. */ cm->cm_flags |= XBDCF_Q_FREEZE; if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Wait for in-flight requests to * finish. */ xbd_freeze(sc, XBDF_WAIT_IDLE); xbd_requeue_cm(cm, XBD_Q_READY); return (NULL); } } } break; case BIO_FLUSH: if ((sc->xbd_flags & XBDF_FLUSH) != 0) cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; else if ((sc->xbd_flags & XBDF_BARRIER) != 0) cm->cm_operation = BLKIF_OP_WRITE_BARRIER; else panic("flush request, but no flush support available"); break; default: panic("unknown bio command %d", bp->bio_cmd); } return (cm); } /* * Dequeue buffers and place them in the shared communication ring. * Return when no more requests can be accepted or all buffers have * been queued. * * Signal XEN once the ring has been filled out. */ static void xbd_startio(struct xbd_softc *sc) { struct xbd_command *cm; int error, queued = 0; mtx_assert(&sc->xbd_io_lock, MA_OWNED); if (sc->xbd_state != XBD_STATE_CONNECTED) return; while (!RING_FULL(&sc->xbd_ring)) { if (sc->xbd_qfrozen_cnt != 0) break; cm = xbd_dequeue_cm(sc, XBD_Q_READY); if (cm == NULL) cm = xbd_bio_command(sc); if (cm == NULL) break; if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { /* * Single step command. Future work is * held off until this command completes. */ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); } if ((error = xbd_queue_request(sc, cm)) != 0) { printf("xbd_queue_request returned %d\n", error); break; } queued++; } if (queued != 0) xbd_flush_requests(sc); } static void xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm) { struct bio *bp; bp = cm->cm_bp; if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) { disk_err(bp, "disk error" , -1, 0); printf(" status: %x\n", cm->cm_status); bp->bio_flags |= BIO_ERROR; } if (bp->bio_flags & BIO_ERROR) bp->bio_error = EIO; else bp->bio_resid = 0; xbd_free_command(cm); biodone(bp); } static void xbd_int(void *xsc) { struct xbd_softc *sc = xsc; struct xbd_command *cm; blkif_response_t *bret; RING_IDX i, rp; int op; mtx_lock(&sc->xbd_io_lock); if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) { mtx_unlock(&sc->xbd_io_lock); return; } again: rp = sc->xbd_ring.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ for (i = sc->xbd_ring.rsp_cons; i != rp;) { bret = RING_GET_RESPONSE(&sc->xbd_ring, i); cm = &sc->xbd_shadow[bret->id]; xbd_remove_cm(cm, XBD_Q_BUSY); gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs); i++; if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; else if (cm->cm_operation == BLKIF_OP_WRITE || cm->cm_operation == BLKIF_OP_WRITE_BARRIER) op = BUS_DMASYNC_POSTWRITE; else op = 0; bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op); bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map); /* * Release any hold this command has on future command * dispatch. */ xbd_cm_thaw(sc, cm); /* * Directly call the i/o complete routine to save an * an indirection in the common case. */ cm->cm_status = bret->status; if (cm->cm_bp) xbd_bio_complete(sc, cm); else if (cm->cm_complete != NULL) cm->cm_complete(cm); else xbd_free_command(cm); } sc->xbd_ring.rsp_cons = i; if (i != sc->xbd_ring.req_prod_pvt) { int more_to_do; RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do); if (more_to_do) goto again; } else { sc->xbd_ring.sring->rsp_event = i + 1; } if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) xbd_thaw(sc, XBDF_WAIT_IDLE); xbd_startio(sc); if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED)) wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]); mtx_unlock(&sc->xbd_io_lock); } /*------------------------------- Dump Support -------------------------------*/ /** * Quiesce the disk writes for a dump file before allowing the next buffer. */ static void xbd_quiesce(struct xbd_softc *sc) { int mtd; // While there are outstanding requests while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); if (mtd) { /* Received request completions, update queue. */ xbd_int(sc); } if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Still pending requests, wait for the disk i/o * to complete. */ HYPERVISOR_yield(); } } } /* Kernel dump function for a paravirtualized disk device */ static void xbd_dump_complete(struct xbd_command *cm) { xbd_enqueue_cm(cm, XBD_Q_COMPLETE); } static int xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t length) { struct disk *dp = arg; struct xbd_softc *sc = dp->d_drv1; struct xbd_command *cm; size_t chunk; int sbp; int rc = 0; if (length <= 0) return (rc); xbd_quiesce(sc); /* All quiet on the western front. */ /* * If this lock is held, then this module is failing, and a * successful kernel dump is highly unlikely anyway. */ mtx_lock(&sc->xbd_io_lock); /* Split the 64KB block as needed */ for (sbp=0; length > 0; sbp++) { cm = xbd_dequeue_cm(sc, XBD_Q_FREE); if (cm == NULL) { mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "dump: no more commands?\n"); return (EBUSY); } if (gnttab_alloc_grant_references(sc->xbd_max_request_segments, &cm->cm_gref_head) != 0) { xbd_free_command(cm); mtx_unlock(&sc->xbd_io_lock); device_printf(sc->xbd_dev, "no more grant allocs?\n"); return (EBUSY); } chunk = length > sc->xbd_max_request_size ? sc->xbd_max_request_size : length; cm->cm_data = virtual; cm->cm_datalen = chunk; cm->cm_operation = BLKIF_OP_WRITE; cm->cm_sector_number = offset / dp->d_sectorsize; cm->cm_complete = xbd_dump_complete; xbd_enqueue_cm(cm, XBD_Q_READY); length -= chunk; offset += chunk; virtual = (char *) virtual + chunk; } /* Tell DOM0 to do the I/O */ xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); /* Poll for the completion. */ xbd_quiesce(sc); /* All quite on the eastern front */ /* If there were any errors, bail out... */ while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) { if (cm->cm_status != BLKIF_RSP_OKAY) { device_printf(sc->xbd_dev, "Dump I/O failed at sector %jd\n", cm->cm_sector_number); rc = EIO; } xbd_free_command(cm); } return (rc); } /*----------------------------- Disk Entrypoints -----------------------------*/ static int xbd_open(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) { printf("xbd%d: not found", dp->d_unit); return (ENXIO); } sc->xbd_flags |= XBDF_OPEN; sc->xbd_users++; return (0); } static int xbd_close(struct disk *dp) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); sc->xbd_flags &= ~XBDF_OPEN; if (--(sc->xbd_users) == 0) { /* * Check whether we have been instructed to close. We will * have ignored this request initially, as the device was * still mounted. */ if (xenbus_get_otherend_state(sc->xbd_dev) == XenbusStateClosing) xbd_closing(sc->xbd_dev); } return (0); } static int xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) { struct xbd_softc *sc = dp->d_drv1; if (sc == NULL) return (ENXIO); return (ENOTTY); } /* * Read/write routine for a buffer. Finds the proper unit, place it on * the sortq and kick the controller. */ static void xbd_strategy(struct bio *bp) { struct xbd_softc *sc = bp->bio_disk->d_drv1; /* bogus disk? */ if (sc == NULL) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return; } /* * Place it in the queue of disk activities for this disk */ mtx_lock(&sc->xbd_io_lock); xbd_enqueue_bio(sc, bp); xbd_startio(sc); mtx_unlock(&sc->xbd_io_lock); return; } /*------------------------------ Ring Management -----------------------------*/ static int xbd_alloc_ring(struct xbd_softc *sc) { blkif_sring_t *sring; uintptr_t sring_page_addr; int error; int i; sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sring == NULL) { xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring"); return (ENOMEM); } SHARED_RING_INIT(sring); FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE); for (i = 0, sring_page_addr = (uintptr_t)sring; i < sc->xbd_ring_pages; i++, sring_page_addr += PAGE_SIZE) { error = xenbus_grant_ring(sc->xbd_dev, (vtophys(sring_page_addr) >> PAGE_SHIFT), &sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "granting ring_ref(%d)", i); return (error); } } if (sc->xbd_ring_pages == 1) { error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), "ring-ref", "%u", sc->xbd_ring_ref[0]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-ref", xenbus_get_node(sc->xbd_dev)); return (error); } } else { for (i = 0; i < sc->xbd_ring_pages; i++) { char ring_ref_name[]= "ring_refXX"; snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i); error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev), ring_ref_name, "%u", sc->xbd_ring_ref[i]); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/%s", xenbus_get_node(sc->xbd_dev), ring_ref_name); return (error); } } } error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev, xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc, INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "xen_intr_alloc_and_bind_local_port failed"); return (error); } return (0); } static void xbd_free_ring(struct xbd_softc *sc) { int i; if (sc->xbd_ring.sring == NULL) return; for (i = 0; i < sc->xbd_ring_pages; i++) { if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) { gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]); sc->xbd_ring_ref[i] = GRANT_REF_INVALID; } } free(sc->xbd_ring.sring, M_XENBLOCKFRONT); sc->xbd_ring.sring = NULL; } /*-------------------------- Initialization/Teardown -------------------------*/ static int xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) { struct sbuf sb; int feature_cnt; sbuf_new(&sb, features, len, SBUF_FIXEDLEN); feature_cnt = 0; if ((sc->xbd_flags & XBDF_FLUSH) != 0) { sbuf_printf(&sb, "flush"); feature_cnt++; } if ((sc->xbd_flags & XBDF_BARRIER) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "write_barrier"); feature_cnt++; } if ((sc->xbd_flags & XBDF_DISCARD) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "discard"); feature_cnt++; } if ((sc->xbd_flags & XBDF_PERSISTENT) != 0) { if (feature_cnt != 0) sbuf_printf(&sb, ", "); sbuf_printf(&sb, "persistent_grants"); feature_cnt++; } (void) sbuf_finish(&sb); return (sbuf_len(&sb)); } static int xbd_sysctl_features(SYSCTL_HANDLER_ARGS) { char features[80]; struct xbd_softc *sc = arg1; int error; int len; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); len = xbd_feature_string(sc, features, sizeof(features)); /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); } static void xbd_setup_sysctl(struct xbd_softc *xbd) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; struct sysctl_oid_list *children; sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); if (sysctl_ctx == NULL) return; sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev); if (sysctl_tree == NULL) return; children = SYSCTL_CHILDREN(sysctl_tree); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, "maximum outstanding requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbd->xbd_max_request_segments, 0, "maximum number of pages per requests (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, "maximum size in bytes of a request (negotiated)"); SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, "communication channel pages (negotiated)"); SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, xbd_sysctl_features, "A", "protocol features (negotiated)"); } /* * Translate Linux major/minor to an appropriate name and unit * number. For HVM guests, this allows us to use the same drive names * with blkfront as the emulated drives, easing transition slightly. */ static void xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name) { static struct vdev_info { int major; int shift; int base; const char *name; } info[] = { {3, 6, 0, "ada"}, /* ide0 */ {22, 6, 2, "ada"}, /* ide1 */ {33, 6, 4, "ada"}, /* ide2 */ {34, 6, 6, "ada"}, /* ide3 */ {56, 6, 8, "ada"}, /* ide4 */ {57, 6, 10, "ada"}, /* ide5 */ {88, 6, 12, "ada"}, /* ide6 */ {89, 6, 14, "ada"}, /* ide7 */ {90, 6, 16, "ada"}, /* ide8 */ {91, 6, 18, "ada"}, /* ide9 */ {8, 4, 0, "da"}, /* scsi disk0 */ {65, 4, 16, "da"}, /* scsi disk1 */ {66, 4, 32, "da"}, /* scsi disk2 */ {67, 4, 48, "da"}, /* scsi disk3 */ {68, 4, 64, "da"}, /* scsi disk4 */ {69, 4, 80, "da"}, /* scsi disk5 */ {70, 4, 96, "da"}, /* scsi disk6 */ {71, 4, 112, "da"}, /* scsi disk7 */ {128, 4, 128, "da"}, /* scsi disk8 */ {129, 4, 144, "da"}, /* scsi disk9 */ {130, 4, 160, "da"}, /* scsi disk10 */ {131, 4, 176, "da"}, /* scsi disk11 */ {132, 4, 192, "da"}, /* scsi disk12 */ {133, 4, 208, "da"}, /* scsi disk13 */ {134, 4, 224, "da"}, /* scsi disk14 */ {135, 4, 240, "da"}, /* scsi disk15 */ {202, 4, 0, "xbd"}, /* xbd */ {0, 0, 0, NULL}, }; int major = vdevice >> 8; int minor = vdevice & 0xff; int i; if (vdevice & (1 << 28)) { *unit = (vdevice & ((1 << 28) - 1)) >> 8; *name = "xbd"; return; } for (i = 0; info[i].major; i++) { if (info[i].major == major) { *unit = info[i].base + (minor >> info[i].shift); *name = info[i].name; return; } } *unit = minor >> 4; *name = "xbd"; } int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size, unsigned long phys_sector_size) { char features[80]; int unit, error = 0; const char *name; xbd_vdevice_to_unit(vdevice, &unit, &name); sc->xbd_unit = unit; if (strcmp(name, "xbd") != 0) device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); if (xbd_feature_string(sc, features, sizeof(features)) > 0) { device_printf(sc->xbd_dev, "features: %s\n", features); } sc->xbd_disk = disk_alloc(); sc->xbd_disk->d_unit = sc->xbd_unit; sc->xbd_disk->d_open = xbd_open; sc->xbd_disk->d_close = xbd_close; sc->xbd_disk->d_ioctl = xbd_ioctl; sc->xbd_disk->d_strategy = xbd_strategy; sc->xbd_disk->d_dump = xbd_dump; sc->xbd_disk->d_name = name; sc->xbd_disk->d_drv1 = sc; sc->xbd_disk->d_sectorsize = sector_size; sc->xbd_disk->d_stripesize = phys_sector_size; sc->xbd_disk->d_stripeoffset = 0; sc->xbd_disk->d_mediasize = sectors * sector_size; sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; sc->xbd_disk->d_flags = DISKFLAG_UNMAPPED_BIO; if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; device_printf(sc->xbd_dev, "synchronize cache commands enabled.\n"); } disk_create(sc->xbd_disk, DISK_VERSION); return error; } static void xbd_free(struct xbd_softc *sc) { int i; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_DISCONNECTED; mtx_unlock(&sc->xbd_io_lock); /* Free resources associated with old device channel. */ xbd_free_ring(sc); if (sc->xbd_shadow) { for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; cm = &sc->xbd_shadow[i]; if (cm->cm_sg_refs != NULL) { free(cm->cm_sg_refs, M_XENBLOCKFRONT); cm->cm_sg_refs = NULL; } if (cm->cm_indirectionpages != NULL) { gnttab_end_foreign_access_references( sc->xbd_max_request_indirectpages, &cm->cm_indirectionrefs[0]); contigfree(cm->cm_indirectionpages, PAGE_SIZE * sc->xbd_max_request_indirectpages, M_XENBLOCKFRONT); cm->cm_indirectionpages = NULL; } bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); sc->xbd_shadow = NULL; bus_dma_tag_destroy(sc->xbd_io_dmat); xbd_initq_cm(sc, XBD_Q_FREE); xbd_initq_cm(sc, XBD_Q_READY); xbd_initq_cm(sc, XBD_Q_COMPLETE); } xen_intr_unbind(&sc->xen_intr_handle); } /*--------------------------- State Change Handlers --------------------------*/ static void xbd_initialize(struct xbd_softc *sc) { const char *otherend_path; const char *node_path; uint32_t max_ring_page_order; int error; if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) { /* Initialization has already been performed. */ return; } /* * Protocol defaults valid even if negotiation for a * setting fails. */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; /* * Protocol negotiation. * * \note xs_gather() returns on the first encountered error, so * we must use independent calls in order to guarantee * we don't miss information in a sparsly populated back-end * tree. * * \note xs_scanf() does not update variables for unmatched * fields. */ otherend_path = xenbus_get_otherend_path(sc->xbd_dev); node_path = xenbus_get_node(sc->xbd_dev); /* Support both backend schemes for relaying ring page limits. */ (void)xs_scanf(XST_NIL, otherend_path, "max-ring-page-order", NULL, "%" PRIu32, &max_ring_page_order); sc->xbd_ring_pages = 1 << max_ring_page_order; (void)xs_scanf(XST_NIL, otherend_path, "max-ring-pages", NULL, "%" PRIu32, &sc->xbd_ring_pages); if (sc->xbd_ring_pages < 1) sc->xbd_ring_pages = 1; if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) { device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "limited to front-end limit of %u.\n", sc->xbd_ring_pages, XBD_MAX_RING_PAGES); sc->xbd_ring_pages = XBD_MAX_RING_PAGES; } if (powerof2(sc->xbd_ring_pages) == 0) { uint32_t new_page_limit; new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1); device_printf(sc->xbd_dev, "Back-end specified ring-pages of %u " "is not a power of 2. Limited to %u.\n", sc->xbd_ring_pages, new_page_limit); sc->xbd_ring_pages = new_page_limit; } sc->xbd_max_requests = BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE); if (sc->xbd_max_requests > XBD_MAX_REQUESTS) { device_printf(sc->xbd_dev, "Back-end specified max_requests of %u " "limited to front-end limit of %zu.\n", sc->xbd_max_requests, XBD_MAX_REQUESTS); sc->xbd_max_requests = XBD_MAX_REQUESTS; } if (xbd_alloc_ring(sc) != 0) return; /* Support both backend schemes for relaying ring page limits. */ if (sc->xbd_ring_pages > 1) { error = xs_printf(XST_NIL, node_path, "num-ring-pages","%u", sc->xbd_ring_pages); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/num-ring-pages", node_path); return; } error = xs_printf(XST_NIL, node_path, "ring-page-order", "%u", fls(sc->xbd_ring_pages) - 1); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/ring-page-order", node_path); return; } } error = xs_printf(XST_NIL, node_path, "event-channel", "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/event-channel", node_path); return; } error = xs_printf(XST_NIL, node_path, "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); if (error) { xenbus_dev_fatal(sc->xbd_dev, error, "writing %s/protocol", node_path); return; } xenbus_set_state(sc->xbd_dev, XenbusStateInitialised); } /* * Invoked when the backend is finally 'ready' (and has published * the details about the physical device - #sectors, size, etc). */ static void xbd_connect(struct xbd_softc *sc) { device_t dev = sc->xbd_dev; unsigned long sectors, sector_size, phys_sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; int i, j; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) return; DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "sectors", "%lu", §ors, "info", "%u", &binfo, "sector-size", "%lu", §or_size, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading backend fields at %s", xenbus_get_otherend_path(dev)); return; } if ((sectors == 0) || (sector_size == 0)) { xenbus_dev_fatal(dev, 0, "invalid parameters from %s:" " sectors = %lu, sector_size = %lu", xenbus_get_otherend_path(dev), sectors, sector_size); return; } err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "physical-sector-size", "%lu", &phys_sector_size, NULL); if (err || phys_sector_size <= sector_size) phys_sector_size = 0; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-barrier", "%d", &feature_barrier, NULL); if (err == 0 && feature_barrier != 0) sc->xbd_flags |= XBDF_BARRIER; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-flush-cache", "%d", &feature_flush, NULL); if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-max-indirect-segments", "%" PRIu32, &sc->xbd_max_request_segments, NULL); if ((err != 0) || (xbd_enable_indirect == 0)) sc->xbd_max_request_segments = 0; if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); sc->xbd_max_request_indirectpages = XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; sc->xbd_max_request_size = XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); /* Allocate datastructures based on negotiated values. */ err = bus_dma_tag_create( bus_get_dma_tag(sc->xbd_dev), /* parent */ 512, PAGE_SIZE, /* algnmnt, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->xbd_max_request_size, sc->xbd_max_request_segments, PAGE_SIZE, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ busdma_lock_mutex, /* lockfunc */ &sc->xbd_io_lock, /* lockarg */ &sc->xbd_io_dmat); if (err != 0) { xenbus_dev_fatal(sc->xbd_dev, err, "Cannot allocate parent DMA tag\n"); return; } /* Per-transaction data allocation. */ sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests, M_XENBLOCKFRONT, M_NOWAIT|M_ZERO); if (sc->xbd_shadow == NULL) { bus_dma_tag_destroy(sc->xbd_io_dmat); xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "Cannot allocate request structures\n"); return; } for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; void * indirectpages; cm = &sc->xbd_shadow[i]; cm->cm_sg_refs = malloc( sizeof(grant_ref_t) * sc->xbd_max_request_segments, M_XENBLOCKFRONT, M_NOWAIT); if (cm->cm_sg_refs == NULL) break; cm->cm_id = i; cm->cm_flags = XBDCF_INITIALIZER; cm->cm_sc = sc; if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) break; if (sc->xbd_max_request_indirectpages > 0) { indirectpages = contigmalloc( PAGE_SIZE * sc->xbd_max_request_indirectpages, M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); } else { indirectpages = NULL; } for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { if (gnttab_grant_foreign_access( xenbus_get_otherend_id(sc->xbd_dev), (vtophys(indirectpages) >> PAGE_SHIFT) + j, 1 /* grant read-only access */, &cm->cm_indirectionrefs[j])) break; } if (j < sc->xbd_max_request_indirectpages) break; cm->cm_indirectionpages = indirectpages; xbd_free_command(cm); } if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), device_get_desc(dev), xenbus_get_node(dev)); bus_print_child_footer(device_get_parent(dev), dev); xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo, sector_size, phys_sector_size); } (void)xenbus_set_state(dev, XenbusStateConnected); /* Kick pending requests. */ mtx_lock(&sc->xbd_io_lock); sc->xbd_state = XBD_STATE_CONNECTED; xbd_startio(sc); sc->xbd_flags |= XBDF_READY; mtx_unlock(&sc->xbd_io_lock); } /** * Handle the change of state of the backend to Closing. We must delete our * device-layer structures now, to ensure that writes are flushed through to * the backend. Once this is done, we can switch to Closed in * acknowledgement. */ static void xbd_closing(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); xenbus_set_state(dev, XenbusStateClosing); DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev)); if (sc->xbd_disk != NULL) { disk_destroy(sc->xbd_disk); sc->xbd_disk = NULL; } xenbus_set_state(dev, XenbusStateClosed); } /*---------------------------- NewBus Entrypoints ----------------------------*/ static int xbd_probe(device_t dev) { if (strcmp(xenbus_get_type(dev), "vbd") != 0) return (ENXIO); if (xen_hvm_domain() && xen_disable_pv_disks != 0) return (ENXIO); if (xen_hvm_domain()) { int error; char *type; /* * When running in an HVM domain, IDE disk emulation is * disabled early in boot so that native drivers will * not see emulated hardware. However, CDROM device * emulation cannot be disabled. * * Through use of FreeBSD's vm_guest and xen_hvm_domain() * APIs, we could modify the native CDROM driver to fail its * probe when running under Xen. Unfortunatlely, the PV * CDROM support in XenServer (up through at least version * 6.2) isn't functional, so we instead rely on the emulated * CDROM instance, and fail to attach the PV one here in * the blkfront driver. */ error = xs_read(XST_NIL, xenbus_get_node(dev), "device-type", NULL, (void **) &type); if (error) return (ENXIO); if (strncmp(type, "cdrom", 5) == 0) { free(type, M_XENSTORE); return (ENXIO); } free(type, M_XENSTORE); } device_set_desc(dev, "Virtual Block Device"); device_quiet(dev); return (0); } /* * Setup supplies the backend dir, virtual device. We place an event * channel and shared frame entries. We watch backend to wait if it's * ok. */ static int xbd_attach(device_t dev) { struct xbd_softc *sc; const char *name; uint32_t vdevice; int error; int i; int unit; /* FIXME: Use dynamic device id if this is not set. */ error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device", NULL, "%" PRIu32, &vdevice); if (error) error = xs_scanf(XST_NIL, xenbus_get_node(dev), "virtual-device-ext", NULL, "%" PRIu32, &vdevice); if (error) { xenbus_dev_fatal(dev, error, "reading virtual-device"); device_printf(dev, "Couldn't determine virtual device.\n"); return (error); } xbd_vdevice_to_unit(vdevice, &unit, &name); if (!strcmp(name, "xbd")) device_set_unit(dev, unit); sc = device_get_softc(dev); mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF); xbd_initqs(sc); for (i = 0; i < XBD_MAX_RING_PAGES; i++) sc->xbd_ring_ref[i] = GRANT_REF_INVALID; sc->xbd_dev = dev; sc->xbd_vdevice = vdevice; sc->xbd_state = XBD_STATE_DISCONNECTED; xbd_setup_sysctl(sc); /* Wait for backend device to publish its protocol capabilities. */ xenbus_set_state(dev, XenbusStateInitialising); return (0); } static int xbd_detach(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev)); xbd_free(sc); mtx_destroy(&sc->xbd_io_lock); return 0; } static int xbd_suspend(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); int retval; int saved_state; /* Prevent new requests being issued until we fix things up. */ mtx_lock(&sc->xbd_io_lock); saved_state = sc->xbd_state; sc->xbd_state = XBD_STATE_SUSPENDED; /* Wait for outstanding I/O to drain. */ retval = 0; while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { retval = EBUSY; break; } } mtx_unlock(&sc->xbd_io_lock); if (retval != 0) sc->xbd_state = saved_state; return (retval); } static int xbd_resume(device_t dev) { struct xbd_softc *sc = device_get_softc(dev); if (xen_suspend_cancelled) { sc->xbd_state = XBD_STATE_CONNECTED; return (0); } DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev)); xbd_free(sc); xbd_initialize(sc); return (0); } /** * Callback received when the backend's state changes. */ static void xbd_backend_changed(device_t dev, XenbusState backend_state) { struct xbd_softc *sc = device_get_softc(dev); DPRINTK("backend_state=%d\n", backend_state); switch (backend_state) { case XenbusStateUnknown: case XenbusStateInitialising: case XenbusStateReconfigured: case XenbusStateReconfiguring: case XenbusStateClosed: break; case XenbusStateInitWait: case XenbusStateInitialised: xbd_initialize(sc); break; case XenbusStateConnected: xbd_initialize(sc); xbd_connect(sc); break; case XenbusStateClosing: if (sc->xbd_users > 0) { device_printf(dev, "detaching with pending users\n"); KASSERT(sc->xbd_disk != NULL, ("NULL disk with pending users\n")); disk_gone(sc->xbd_disk); } else { xbd_closing(dev); } break; } } /*---------------------------- NewBus Registration ---------------------------*/ static device_method_t xbd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xbd_probe), DEVMETHOD(device_attach, xbd_attach), DEVMETHOD(device_detach, xbd_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xbd_suspend), DEVMETHOD(device_resume, xbd_resume), /* Xenbus interface */ DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed), { 0, 0 } }; static driver_t xbd_driver = { "xbd", xbd_methods, sizeof(struct xbd_softc), }; devclass_t xbd_devclass; DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); Index: head/sys/i386/include/bus_dma.h =================================================================== --- head/sys/i386/include/bus_dma.h (revision 320527) +++ head/sys/i386/include/bus_dma.h (revision 320528) @@ -1,34 +1,34 @@ /*- * Copyright (c) 2005 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _I386_BUS_DMA_H_ #define _I386_BUS_DMA_H_ -#include +#include #endif /* _I386_BUS_DMA_H_ */ Index: head/sys/mips/include/bus_dma.h =================================================================== --- head/sys/mips/include/bus_dma.h (revision 320527) +++ head/sys/mips/include/bus_dma.h (revision 320528) @@ -1,34 +1,35 @@ /*- * Copyright (c) 2005 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MIPS_BUS_DMA_H_ #define _MIPS_BUS_DMA_H_ #include +#include #endif /* _MIPS_BUS_DMA_H_ */ Index: head/sys/mips/mips/busdma_machdep.c =================================================================== --- head/sys/mips/mips/busdma_machdep.c (revision 320527) +++ head/sys/mips/mips/busdma_machdep.c (revision 320528) @@ -1,1516 +1,1516 @@ /*- * Copyright (c) 2006 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From i386/busdma_machdep.c,v 1.26 2002/04/19 22:58:09 alfred */ #include __FBSDID("$FreeBSD$"); /* * MIPS bus dma support routines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 64 #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * On XBurst cores from Ingenic, cache-line writeback is local * only, unless accompanied by invalidation. Invalidations force * dirty line writeout and invalidation requests forwarded to * other cores if other cores have the cache line dirty. */ #if defined(SMP) && defined(CPU_XBURST) #define BUS_DMA_FORCE_WBINV #endif struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ vm_offset_t vaddr_nocache; /* kva of bounce buffer uncached */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ bus_addr_t dataaddr; /* client physical address */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; struct sync_list { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ bus_size_t datacount; /* client data count */ }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); #define DMAMAP_UNCACHEABLE 0x08 #define DMAMAP_CACHE_ALIGNED 0x10 struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; int flags; void *origbuffer; void *allocbuffer; TAILQ_ENTRY(bus_dmamap) freelist; STAILQ_ENTRY(bus_dmamap) links; bus_dmamap_callback_t *callback; void *callback_arg; int sync_count; struct sync_list *slist; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); /* Default tag, as most drivers provide no parent tag. */ bus_dma_tag_t mips_root_dma_tag; static uma_zone_t dmamap_zone; /* Cache of struct bus_dmamap items */ static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */ static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */ MALLOC_DEFINE(M_BUSDMA, "busdma", "busdma metadata"); MALLOC_DEFINE(M_BOUNCE, "bounce", "busdma bounce pages"); /* * This is the ctor function passed to uma_zcreate() for the pool of dma maps. * It'll need platform-specific changes if this code is copied. */ static int dmamap_ctor(void *mem, int size, void *arg, int flags) { bus_dmamap_t map; bus_dma_tag_t dmat; map = (bus_dmamap_t)mem; dmat = (bus_dma_tag_t)arg; dmat->map_count++; map->dmat = dmat; map->flags = 0; map->slist = NULL; map->allocbuffer = NULL; map->sync_count = 0; STAILQ_INIT(&map->bpages); return (0); } /* * This is the dtor function passed to uma_zcreate() for the pool of dma maps. * It may need platform-specific changes if this code is copied . */ static void dmamap_dtor(void *mem, int size, void *arg) { bus_dmamap_t map; map = (bus_dmamap_t)mem; map->dmat->map_count--; } static void busdma_init(void *dummy) { /* Create a cache of maps for bus_dmamap_create(). */ dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap), dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); /* Create a cache of buffers in standard (cacheable) memory. */ standard_allocator = busdma_bufalloc_create("buffer", mips_dcache_max_linesize, /* minimum_alignment */ NULL, /* uma_alloc func */ NULL, /* uma_free func */ 0); /* uma_zcreate_flags */ /* * Create a cache of buffers in uncacheable memory, to implement the * BUS_DMA_COHERENT flag. */ coherent_allocator = busdma_bufalloc_create("coherent", mips_dcache_max_linesize, /* minimum_alignment */ busdma_bufalloc_alloc_uncacheable, busdma_bufalloc_free_uncacheable, 0); /* uma_zcreate_flags */ } SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_FOURTH, busdma_init, NULL); /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) || ((paddr & (dmat->alignment - 1)) != 0)) && (dmat->filter == NULL || (*dmat->filter)(dmat->filterarg, paddr) != 0)) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * Check to see if the specified page is in an allowed DMA range. */ static __inline int _bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) || (lowaddr < phys_avail[i] && highaddr > phys_avail[i])) return (1); } return (0); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { #ifdef INVARIANTS panic("driver error: busdma dflt_lock called"); #else printf("DRIVER_ERROR: busdma dflt_lock called\n"); #endif } static __inline bus_dmamap_t _busdma_alloc_dmamap(bus_dma_tag_t dmat) { struct sync_list *slist; bus_dmamap_t map; slist = malloc(sizeof(*slist) * dmat->nsegments, M_BUSDMA, M_NOWAIT); if (slist == NULL) return (NULL); map = uma_zalloc_arg(dmamap_zone, dmat, M_NOWAIT); if (map != NULL) map->slist = slist; else free(slist, M_BUSDMA); return (map); } static __inline void _busdma_free_dmamap(bus_dmamap_t map) { free(map->slist, M_BUSDMA); uma_zfree(dmamap_zone, map); } /* * Allocate a device specific dma_tag. */ #define SEG_NB 1024 int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Return a NULL tag on failure */ *dmat = NULL; if (!parent) parent = mips_root_dma_tag; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_BUSDMA, M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; if (cpuinfo.cache_coherent_dma) newtag->flags |= BUS_DMA_COHERENT; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } newtag->segments = NULL; /* * Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if ((newtag->filter != NULL) || ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); } if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) || newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_BUSDMA); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } else newtag->bounce_zone = NULL; if (error != 0) free(newtag, M_BUSDMA); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { #ifdef KTR bus_dma_tag_t dmat_copy = dmat; #endif if (dmat != NULL) { if (dmat->map_count != 0) return (EBUSY); while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_BUSDMA); free(dmat, M_BUSDMA); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } CTR2(KTR_BUSDMA, "%s tag %p", __func__, dmat_copy); return (0); } #include /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap; int error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } *mapp = newmap; /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { _busdma_free_dmamap(newmap); *mapp = NULL; return (error); } } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ maxpages = MAX_BPAGES; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (0); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; _busdma_free_dmamap(map); CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddrp, int flags, bus_dmamap_t *mapp) { bus_dmamap_t newmap = NULL; busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; vm_memattr_t memattr; void *vaddr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_BUSDMA, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } } newmap = _busdma_alloc_dmamap(dmat); if (newmap == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } /* * If all the memory is coherent with DMA then we don't need to * do anything special for a coherent mapping request. */ if (dmat->flags & BUS_DMA_COHERENT) flags &= ~BUS_DMA_COHERENT; if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; newmap->flags |= DMAMAP_UNCACHEABLE; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; } /* All buffers we allocate are cache-aligned. */ newmap->flags |= DMAMAP_CACHE_ALIGNED; if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; /* * Try to find a bufzone in the allocator that holds a cache of buffers * of the right size for this request. If the buffer is too big to be * held in the allocator cache, this returns NULL. */ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); /* * Allocate the buffer from the uma(9) allocator if... * - It's small enough to be in the allocator (bufzone not NULL). * - The alignment constraint isn't larger than the allocation size * (the allocator aligns buffers to their size boundaries). * - There's no need to handle lowaddr/highaddr exclusion zones. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= howmany(dmat->maxsize, MIN(dmat->maxsegsz, PAGE_SIZE)) && dmat->alignment <= PAGE_SIZE && (dmat->boundary % PAGE_SIZE) == 0) { vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, memattr); } else { vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->maxsize, mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary, memattr); } if (vaddr == NULL) { _busdma_free_dmamap(newmap); newmap = NULL; } else { newmap->sync_count = 0; } *vaddrp = vaddr; *mapp = newmap; return (vaddr == NULL ? ENOMEM : 0); } /* * Free a piece of memory and it's allocated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { struct busdma_bufzone *bufzone; busdma_bufalloc_t ba; if (map->flags & DMAMAP_UNCACHEABLE) ba = coherent_allocator; else ba = standard_allocator; free(map->slist, M_BUSDMA); uma_zfree(dmamap_zone, map); bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR3(KTR_BUSDMA, "lowaddr= %d, boundary= %d, alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); paddr = pmap_kextract(vaddr); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * the previous segment if possible. */ seg = *segp; if (seg >= 0 && curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) { segs[seg].ds_len += sgsize; } else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = dmat->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. * first indicates if this is the first invocation of this function. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; struct sync_list *sl; vm_offset_t vaddr = (vm_offset_t)buf; int error = 0; if (segs == NULL) segs = dmat->segments; if ((flags & BUS_DMA_LOAD_MBUF) != 0) map->flags |= DMAMAP_CACHE_ALIGNED; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } CTR3(KTR_BUSDMA, "lowaddr= %d boundary= %d, " "alignment= %d", dmat->lowaddr, dmat->boundary, dmat->alignment); while (buflen > 0) { /* * Get the physical address for this segment. * * XXX Don't support checking for coherent mappings * XXX in user address space. */ KASSERT(kernel_pmap == pmap, ("pmap is not kernel pmap")); curaddr = pmap_kextract(vaddr); /* * Compute the segment size, and adjust counts. */ sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); if (sgsize > dmat->maxsegsz) sgsize = dmat->maxsegsz; if (buflen < sgsize) sgsize = buflen; if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && run_filter(dmat, curaddr)) { curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { sl = &map->slist[map->sync_count - 1]; if (map->sync_count == 0 || vaddr != sl->vaddr + sl->datacount) { if (++map->sync_count > dmat->nsegments) goto cleanup; sl++; sl->vaddr = vaddr; sl->datacount = sgsize; sl->busaddr = curaddr; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } cleanup: /* * Did we fit? */ if (buflen != 0) { - _bus_dmamap_unload(dmat, map); + bus_dmamap_unload(dmat, map); error = EFBIG; /* XXX better return value here? */ } return (error); } void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { KASSERT(dmat != NULL, ("dmatag is NULL")); KASSERT(map != NULL, ("dmamap is NULL")); map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; return; } static void bus_dmamap_sync_buf(vm_offset_t buf, int len, bus_dmasync_op_t op, int aligned) { char tmp_cl[mips_dcache_max_linesize], tmp_clend[mips_dcache_max_linesize]; vm_offset_t buf_cl, buf_clend; vm_size_t size_cl, size_clend; int cache_linesize_mask = mips_dcache_max_linesize - 1; /* * dcache invalidation operates on cache line aligned addresses * and could modify areas of memory that share the same cache line * at the beginning and the ending of the buffer. In order to * prevent a data loss we save these chunks in temporary buffer * before invalidation and restore them afer it. * * If the aligned flag is set the buffer is either an mbuf or came from * our allocator caches. In both cases they are always sized and * aligned to cacheline boundaries, so we can skip preserving nearby * data if a transfer appears to overlap cachelines. An mbuf in * particular will usually appear to be overlapped because of offsetting * within the buffer to align the L3 headers, but we know that the bytes * preceeding that offset are part of the same mbuf memory and are not * unrelated adjacent data (and a rule of mbuf handling is that the cpu * is not allowed to touch the mbuf while dma is in progress, including * header fields). */ if (aligned) { size_cl = 0; size_clend = 0; } else { buf_cl = buf & ~cache_linesize_mask; size_cl = buf & cache_linesize_mask; buf_clend = buf + len; size_clend = (mips_dcache_max_linesize - (buf_clend & cache_linesize_mask)) & cache_linesize_mask; } switch (op) { case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: case BUS_DMASYNC_POSTREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void*)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void*)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void*)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void*)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_dcache_max_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE: mips_dcache_wbinv_range(buf, len); break; case BUS_DMASYNC_PREREAD: /* * Save buffers that might be modified by invalidation */ if (size_cl) memcpy (tmp_cl, (void *)buf_cl, size_cl); if (size_clend) memcpy (tmp_clend, (void *)buf_clend, size_clend); mips_dcache_inv_range(buf, len); /* * Restore them */ if (size_cl) memcpy ((void *)buf_cl, tmp_cl, size_cl); if (size_clend) memcpy ((void *)buf_clend, tmp_clend, size_clend); /* * Copies above have brought corresponding memory * cache lines back into dirty state. Write them back * out and invalidate affected cache lines again if * necessary. */ if (size_cl) mips_dcache_wbinv_range(buf_cl, size_cl); if (size_clend && (size_cl == 0 || buf_clend - buf_cl > mips_dcache_max_linesize)) mips_dcache_wbinv_range(buf_clend, size_clend); break; case BUS_DMASYNC_PREWRITE: #ifdef BUS_DMA_FORCE_WBINV mips_dcache_wbinv_range(buf, len); #else mips_dcache_wb_range(buf, len); #endif break; } } static void _bus_dmamap_sync_bp(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; STAILQ_FOREACH(bpage, &map->bpages, links) { if (op & BUS_DMASYNC_PREWRITE) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); else physcopyout(bpage->dataaddr, (void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->datacount); if (bpage->vaddr_nocache == 0) { #ifdef BUS_DMA_FORCE_WBINV mips_dcache_wbinv_range(bpage->vaddr, bpage->datacount); #else mips_dcache_wb_range(bpage->vaddr, bpage->datacount); #endif } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { if (bpage->vaddr_nocache == 0) { mips_dcache_inv_range(bpage->vaddr, bpage->datacount); } if (bpage->datavaddr != 0) bcopy((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), (void *)bpage->datavaddr, bpage->datacount); else physcopyin((void *)(bpage->vaddr_nocache != 0 ? bpage->vaddr_nocache : bpage->vaddr), bpage->dataaddr, bpage->datacount); dmat->bounce_zone->total_bounced++; } } } void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct sync_list *sl, *end; int aligned; if (op == BUS_DMASYNC_POSTWRITE) return; if (STAILQ_FIRST(&map->bpages)) _bus_dmamap_sync_bp(dmat, map, op); if ((dmat->flags & BUS_DMA_COHERENT) || (map->flags & DMAMAP_UNCACHEABLE)) { if (op & BUS_DMASYNC_PREWRITE) mips_sync(); return; } aligned = (map->flags & DMAMAP_CACHE_ALIGNED) ? 1 : 0; CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags); if (map->sync_count) { end = &map->slist[map->sync_count]; for (sl = &map->slist[0]; sl != end; sl++) bus_dmamap_sync_buf(sl->vaddr, sl->datacount, op, aligned); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_BUSDMA, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_BUSDMA, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_BOUNCE, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_BUSDMA); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); bpage->vaddr_nocache = (vm_offset_t)pmap_mapdev(bpage->busaddr, PAGE_SIZE); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->dataaddr = addr; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } Index: head/sys/net/iflib.h =================================================================== --- head/sys/net/iflib.h (revision 320527) +++ head/sys/net/iflib.h (revision 320528) @@ -1,395 +1,394 @@ /*- * Copyright (c) 2014-2017, Matthew Macy (mmacy@nextbsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __IFLIB_H_ #define __IFLIB_H_ #include #include #include #include -#include #include #include /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t * in the future if the need arises. */ typedef uint16_t qidx_t; #define QIDX_INVALID 0xFFFF /* * Most cards can handle much larger TSO requests * but the FreeBSD TCP stack will break on larger * values */ #define FREEBSD_TSO_SIZE_MAX 65518 struct iflib_ctx; typedef struct iflib_ctx *if_ctx_t; struct if_shared_ctx; typedef struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; /* * File organization: * - public structures * - iflib accessors * - iflib utility functions * - iflib core functions */ typedef struct if_rxd_frag { uint8_t irf_flid; qidx_t irf_idx; uint16_t irf_len; } *if_rxd_frag_t; typedef struct if_rxd_info { /* set by iflib */ uint16_t iri_qsidx; /* qset index */ uint16_t iri_vtag; /* vlan tag - if flag set */ /* XXX redundant with the new irf_len field */ uint16_t iri_len; /* packet length */ qidx_t iri_cidx; /* consumer index of cq */ struct ifnet *iri_ifp; /* some drivers >1 interface per softc */ /* updated by driver */ if_rxd_frag_t iri_frags; uint32_t iri_flowid; /* RSS hash for packet */ uint32_t iri_csum_flags; /* m_pkthdr csum flags */ uint32_t iri_csum_data; /* m_pkthdr csum data */ uint8_t iri_flags; /* mbuf flags for packet */ uint8_t iri_nfrags; /* number of fragments in packet */ uint8_t iri_rsstype; /* RSS hash type */ uint8_t iri_pad; /* any padding in the received data */ } *if_rxd_info_t; typedef struct if_rxd_update { uint64_t *iru_paddrs; caddr_t *iru_vaddrs; qidx_t *iru_idxs; qidx_t iru_pidx; uint16_t iru_qsidx; uint16_t iru_count; uint16_t iru_buf_size; uint8_t iru_flidx; } *if_rxd_update_t; #define IPI_TX_INTR 0x1 /* send an interrupt when this packet is sent */ #define IPI_TX_IPV4 0x2 /* ethertype IPv4 */ #define IPI_TX_IPV6 0x4 /* ethertype IPv6 */ typedef struct if_pkt_info { bus_dma_segment_t *ipi_segs; /* physical addresses */ uint32_t ipi_len; /* packet length */ uint16_t ipi_qsidx; /* queue set index */ qidx_t ipi_nsegs; /* number of segments */ qidx_t ipi_ndescs; /* number of descriptors used by encap */ uint16_t ipi_flags; /* iflib per-packet flags */ qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ uint8_t ipi_ipproto; /* ip protocol */ uint32_t ipi_csum_flags; /* packet checksum flags */ uint16_t ipi_tso_segsz; /* tso segment size */ uint16_t ipi_vtag; /* VLAN tag */ uint16_t ipi_etype; /* ether header type */ uint8_t ipi_tcp_hflags; /* tcp header flags */ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ uint32_t ipi_tcp_sum; /* tcp csum */ } *if_pkt_info_t; typedef struct if_irq { struct resource *ii_res; int ii_rid; void *ii_tag; } *if_irq_t; struct if_int_delay_info { if_ctx_t iidi_ctx; /* Back-pointer to the iflib ctx (softc) */ int iidi_offset; /* Register offset to read/write */ int iidi_value; /* Current value in usecs */ struct sysctl_oid *iidi_oidp; struct sysctl_req *iidi_req; }; typedef enum { IFLIB_INTR_LEGACY, IFLIB_INTR_MSI, IFLIB_INTR_MSIX } iflib_intr_mode_t; /* * This really belongs in pciio.h or some place more general * but this is the only consumer for now. */ typedef struct pci_vendor_info { uint32_t pvi_vendor_id; uint32_t pvi_device_id; uint32_t pvi_subvendor_id; uint32_t pvi_subdevice_id; uint32_t pvi_rev_id; uint32_t pvi_class_mask; caddr_t pvi_name; } pci_vendor_info_t; #define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name} #define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name} #define PVID_END {0, 0, 0, 0, 0, 0, NULL} typedef struct if_txrx { int (*ift_txd_encap) (void *, if_pkt_info_t); void (*ift_txd_flush) (void *, uint16_t, qidx_t pidx); int (*ift_txd_credits_update) (void *, uint16_t qsidx, bool clear); int (*ift_rxd_available) (void *, uint16_t qsidx, qidx_t pidx, qidx_t budget); int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri); void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); } *if_txrx_t; typedef struct if_softc_ctx { int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; int isc_nrxd[8]; uint32_t isc_txqsizes[8]; uint32_t isc_rxqsizes[8]; /* is there such thing as a descriptor that is more than 248 bytes ? */ uint8_t isc_txd_size[8]; uint8_t isc_rxd_size[8]; int isc_max_txqsets; int isc_max_rxqsets; int isc_tx_tso_segments_max; int isc_tx_tso_size_max; int isc_tx_tso_segsize_max; int isc_tx_csum_flags; int isc_capenable; int isc_rss_table_size; int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ uint32_t isc_pause_frames; /* set by driver for iflib_timer to detect */ pci_vendor_info_t isc_vendor_info; /* set by iflib prior to attach_pre */ int isc_disable_msix; if_txrx_t isc_txrx; } *if_softc_ctx_t; /* * Initialization values for device */ struct if_shared_ctx { int isc_magic; driver_t *isc_driver; bus_size_t isc_q_align; bus_size_t isc_tx_maxsize; bus_size_t isc_tx_maxsegsize; bus_size_t isc_rx_maxsize; bus_size_t isc_rx_maxsegsize; int isc_rx_nsegments; int isc_admin_intrcnt; /* # of admin/link interrupts */ /* fields necessary for probe */ pci_vendor_info_t *isc_vendor_info; char *isc_driver_version; /* optional function to transform the read values to match the table*/ void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id, uint16_t *subdevice_id, uint16_t *rev_id); int isc_nrxd_min[8]; int isc_nrxd_default[8]; int isc_nrxd_max[8]; int isc_ntxd_min[8]; int isc_ntxd_default[8]; int isc_ntxd_max[8]; /* actively used during operation */ int isc_nfl __aligned(CACHE_LINE_SIZE); int isc_ntxqs; /* # of tx queues per tx qset - usually 1 */ int isc_nrxqs; /* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */ int isc_rx_process_limit; int isc_tx_reclaim_thresh; int isc_flags; }; typedef struct iflib_dma_info { bus_addr_t idi_paddr; caddr_t idi_vaddr; bus_dma_tag_t idi_tag; bus_dmamap_t idi_map; uint32_t idi_size; } *iflib_dma_info_t; #define IFLIB_MAGIC 0xCAFEF00D typedef enum { IFLIB_INTR_RX, IFLIB_INTR_TX, IFLIB_INTR_RXTX, IFLIB_INTR_ADMIN, IFLIB_INTR_IOV, } iflib_intr_type_t; #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif /* * Interface has a separate command queue for RX */ #define IFLIB_HAS_RXCQ 0x01 /* * Driver has already allocated vectors */ #define IFLIB_SKIP_MSIX 0x02 /* * Interface is a virtual function */ #define IFLIB_IS_VF 0x04 /* * Interface has a separate command queue for TX */ #define IFLIB_HAS_TXCQ 0x08 /* * Interface does checksum in place */ #define IFLIB_NEED_SCRATCH 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ #define IFLIB_TSO_INIT_IP 0x20 /* * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 /* * field accessors */ void *iflib_get_softc(if_ctx_t ctx); device_t iflib_get_dev(if_ctx_t ctx); if_t iflib_get_ifp(if_ctx_t ctx); struct ifmedia *iflib_get_media(if_ctx_t ctx); if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx); if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx); void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]); /* * If the driver can plug cleanly in to newbus use these */ int iflib_device_probe(device_t); int iflib_device_attach(device_t); int iflib_device_detach(device_t); int iflib_device_suspend(device_t); int iflib_device_resume(device_t); int iflib_device_shutdown(device_t); int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *); void iflib_device_iov_uninit(device_t); int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *); /* * If the driver can't plug cleanly in to newbus * use these */ int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp); int iflib_device_deregister(if_ctx_t); int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, char *name); int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name); void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name); void iflib_irq_free(if_ctx_t ctx, if_irq_t irq); void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name); void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name); void iflib_config_gtask_deinit(struct grouptask *gtask); void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid); void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid); void iflib_admin_intr_deferred(if_ctx_t ctx); void iflib_iov_intr_deferred(if_ctx_t ctx); void iflib_link_state_change(if_ctx_t ctx, int linkstate, uint64_t baudrate); int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags); void iflib_dma_free(iflib_dma_info_t dma); int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count); void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); struct mtx *iflib_ctx_lock_get(if_ctx_t); struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); #endif /* __IFLIB_H_ */ Index: head/sys/powerpc/include/bus_dma.h =================================================================== --- head/sys/powerpc/include/bus_dma.h (revision 320527) +++ head/sys/powerpc/include/bus_dma.h (revision 320528) @@ -1,37 +1,38 @@ /*- * Copyright (c) 2005 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _POWERPC_BUS_DMA_H_ #define _POWERPC_BUS_DMA_H_ #include +#include struct device; int bus_dma_tag_set_iommu(bus_dma_tag_t, struct device *iommu, void *cookie); #endif /* _POWERPC_BUS_DMA_H_ */ Index: head/sys/powerpc/powerpc/busdma_machdep.c =================================================================== --- head/sys/powerpc/powerpc/busdma_machdep.c (revision 320527) +++ head/sys/powerpc/powerpc/busdma_machdep.c (revision 320528) @@ -1,1220 +1,1220 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * From amd64/busdma_machdep.c, r204214 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iommu_if.h" #define MAX_BPAGES MIN(8192, physmem/40) struct bounce_zone; struct bus_dma_tag { bus_dma_tag_t parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; int ref_count; int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; struct bounce_zone *bounce_zone; device_t iommu; void *iommu_cookie; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dma_segment_t *segments; int nsegs; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; int contigalloc; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) { int retval; retval = 0; do { if (dmat->filter == NULL && dmat->iommu == NULL && paddr > dmat->lowaddr && paddr <= dmat->highaddr) retval = 1; if (dmat->filter == NULL && (paddr & (dmat->alignment - 1)) != 0) retval = 1; if (dmat->filter != NULL && (*dmat->filter)(dmat->filterarg, paddr) != 0) retval = 1; dmat = dmat->parent; } while (retval == 0 && dmat != NULL); return (retval); } /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ static void dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error = 0; /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) { return (EINVAL); } /* Return a NULL tag on failure */ *dmat = NULL; newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, error); return (ENOMEM); } newtag->parent = parent; newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; newtag->nsegments = nsegments; newtag->maxsegsz = maxsegsz; newtag->flags = flags; newtag->ref_count = 1; /* Count ourself */ newtag->map_count = 0; if (lockfunc != NULL) { newtag->lockfunc = lockfunc; newtag->lockfuncarg = lockfuncarg; } else { newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); if (newtag->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; newtag->filterarg = parent->filterarg; newtag->parent = parent->parent; } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); newtag->iommu = parent->iommu; newtag->iommu_cookie = parent->iommu_cookie; } if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; } if (error != 0) { free(newtag, M_DEVBUF); } else { *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { bus_dma_tag_t parent; parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { int error; error = 0; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ struct bounce_zone *bz; int maxpages; if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = MAX(atop(dmat->maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; } else { error = 0; } } bz->map_count++; } (*mapp)->nsegs = 0; (*mapp)->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, M_NOWAIT); if ((*mapp)->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; } free(map->segments, M_DEVBUF); free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; bus_dmamap_create(dmat, flags, mapp); if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; #ifdef NOTYET if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else #endif attr = VM_MEMATTR_DEFAULT; /* * XXX: * (dmat->alignment <= dmat->maxsize) is just a quick hack; the exact * alignment guarantees of malloc need to be nailed down, and the * code below should be rewritten to take that into account. * * In the meantime, we'll warn the user if malloc gets it wrong. */ if ((dmat->maxsize <= PAGE_SIZE) && (dmat->alignment <= dmat->maxsize) && dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); } else { /* * XXX Use Contigmalloc until it is merged into this facility * and handles multi-seg allocations. Nobody is doing * multi-seg allocations yet though. * XXX Certain AGP hardware does. */ *vaddr = (void *)kmem_alloc_contig(kmem_arena, dmat->maxsize, mflags, 0ul, dmat->lowaddr, dmat->alignment ? dmat->alignment : 1ul, dmat->boundary, attr); (*mapp)->contigalloc = 1; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { if (!map->contigalloc) free(vaddr, M_DEVBUF); else kmem_free(kmem_arena, (vm_offset_t)vaddr, dmat->maxsize); bus_dmamap_destroy(dmat, map); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); if (run_filter(dmat, curaddr) != 0) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { bus_size_t sg_len; sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (run_filter(dmat, paddr) != 0) { sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->boundary - 1); if (dmat->boundary > 0) { baddr = (curaddr + dmat->boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && (dmat->boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_addr_t curaddr; bus_size_t sgsize; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, segs, segp)); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrance, and the ending segment on exit. */ int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; vm_offset_t kvaddr, vaddr; int error; if (segs == NULL) segs = map->segments; if ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { bus_size_t max_sgsize; /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { sgsize = roundup2(sgsize, dmat->alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (dmat->flags & BUS_DMA_COULD_BOUNCE) { map->dmat = dmat; map->mem = *mem; map->callback = callback; map->callback_arg = callback_arg; } } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { map->nsegs = nsegs; if (segs != NULL) memcpy(map->segments, segs, map->nsegs*sizeof(segs[0])); if (dmat->iommu != NULL) IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, dmat->highaddr, dmat->alignment, dmat->boundary, dmat->iommu_cookie); if (segs != NULL) memcpy(segs, map->segments, map->nsegs*sizeof(segs[0])); else segs = map->segments; return (segs); } /* * Release the mapping held by map. */ void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (dmat->iommu) { IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie); map->nsegs = 0; } while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { /* * Handle data bouncing. We might also * want to add support for invalidating * the caches on broken hardware */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->flags, op); if (op & BUS_DMASYNC_PREWRITE) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if (op & BUS_DMASYNC_POSTREAD) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } powerpc_sync(); } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->alignment <= bz->alignment) && (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->lowaddr; bz->alignment = MAX(dmat->alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } int bus_dma_tag_set_iommu(bus_dma_tag_t tag, device_t iommu, void *cookie) { tag->iommu = iommu; tag->iommu_cookie = cookie; return (0); } Index: head/sys/riscv/include/bus_dma.h =================================================================== --- head/sys/riscv/include/bus_dma.h (revision 320527) +++ head/sys/riscv/include/bus_dma.h (revision 320528) @@ -1,8 +1,9 @@ /* $FreeBSD$ */ #ifndef _MACHINE_BUS_DMA_H_ #define _MACHINE_BUS_DMA_H_ #include +#include #endif /* !_MACHINE_BUS_DMA_H_ */ Index: head/sys/riscv/riscv/busdma_machdep.c =================================================================== --- head/sys/riscv/riscv/busdma_machdep.c (revision 320527) +++ head/sys/riscv/riscv/busdma_machdep.c (revision 320528) @@ -1,102 +1,102 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { panic("busdma"); } int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { panic("busdma"); } int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { panic("busdma"); } void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { panic("busdma"); } bus_dma_segment_t * _bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { panic("busdma"); } /* * Release the mapping held by map. */ void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { panic("busdma"); } void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { panic("busdma"); } Index: head/sys/sparc64/include/bus_dma.h =================================================================== --- head/sys/sparc64/include/bus_dma.h (revision 320527) +++ head/sys/sparc64/include/bus_dma.h (revision 320528) @@ -1,152 +1,219 @@ /*- * Copyright (c) 1996, 1997, 1998, 2001 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1997-1999 Eduardo E. Horvath. All rights reserved. * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: NetBSD: bus.h,v 1.58 2008/04/28 20:23:36 martin Exp * and * from: FreeBSD: src/sys/alpha/include/bus.h,v 1.9 2001/01/09 * * $FreeBSD$ */ #ifndef _SPARC64_BUS_DMA_H #define _SPARC64_BUS_DMA_H +#define WANT_INLINE_DMAMAP #include /* DMA support */ /* * Method table for a bus_dma_tag. */ struct bus_dma_methods { int (*dm_dmamap_create)(bus_dma_tag_t, int, bus_dmamap_t *); int (*dm_dmamap_destroy)(bus_dma_tag_t, bus_dmamap_t); int (*dm_dmamap_load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*dm_dmamap_load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*dm_dmamap_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*dm_dmamap_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*dm_dmamap_unload)(bus_dma_tag_t, bus_dmamap_t); void (*dm_dmamap_sync)(bus_dma_tag_t, bus_dmamap_t, bus_dmasync_op_t); int (*dm_dmamem_alloc)(bus_dma_tag_t, void **, int, bus_dmamap_t *); void (*dm_dmamem_free)(bus_dma_tag_t, void *, bus_dmamap_t); }; /* * bus_dma_tag_t * * A machine-dependent opaque type describing the implementation of * DMA for a given bus. */ struct bus_dma_tag { void *dt_cookie; /* cookie used in the guts */ bus_dma_tag_t dt_parent; bus_size_t dt_alignment; bus_addr_t dt_boundary; bus_addr_t dt_lowaddr; bus_addr_t dt_highaddr; bus_dma_filter_t *dt_filter; void *dt_filterarg; bus_size_t dt_maxsize; int dt_nsegments; bus_size_t dt_maxsegsz; int dt_flags; int dt_ref_count; int dt_map_count; bus_dma_lock_t *dt_lockfunc; void * *dt_lockfuncarg; bus_dma_segment_t *dt_segments; struct bus_dma_methods *dt_mt; }; -#define bus_dmamap_create(t, f, p) \ - ((t)->dt_mt->dm_dmamap_create((t), (f), (p))) -#define bus_dmamap_destroy(t, p) \ - ((t)->dt_mt->dm_dmamap_destroy((t), (p))) -#define _bus_dmamap_load_phys(t, m, b, l, f, s, sp) \ - ((t)->dt_mt->dm_dmamap_load_phys((t), (m), (b), (l), \ - (f), (s), (sp))) -#define _bus_dmamap_load_buffer(t, m, b, l, p, f, s, sp) \ - ((t)->dt_mt->dm_dmamap_load_buffer((t), (m), (b), (l), (p), \ - (f), (s), (sp))) -#define _bus_dmamap_waitok(t, m, mem, c, ca) \ - ((t)->dt_mt->dm_dmamap_waitok((t), (m), (mem), (c), (ca))) -#define _bus_dmamap_complete(t, m, s, n, e) \ - ((t)->dt_mt->dm_dmamap_complete((t), (m), (s), (n), (e))) -#define bus_dmamap_unload(t, p) \ - ((t)->dt_mt->dm_dmamap_unload((t), (p))) -#define bus_dmamap_sync(t, m, op) \ - ((t)->dt_mt->dm_dmamap_sync((t), (m), (op))) -#define bus_dmamem_alloc(t, v, f, m) \ - ((t)->dt_mt->dm_dmamem_alloc((t), (v), (f), (m))) -#define bus_dmamem_free(t, v, m) \ - ((t)->dt_mt->dm_dmamem_free((t), (v), (m))) -#define _bus_dmamap_load_ma(t, m, a, tt, o, f, s, p) \ - bus_dmamap_load_ma_triv((t), (m), (a), (tt), (o), (f), (s), (p)) +static inline int +bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + + return (dmat->dt_mt->dm_dmamap_create(dmat, flags, mapp)); +} + +static inline int +bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + + return (dmat->dt_mt->dm_dmamap_destroy(dmat, map)); +} + +static inline void +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + + dmat->dt_mt->dm_dmamap_sync(dmat, map, op); +} + +static inline void +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + + dmat->dt_mt->dm_dmamap_unload(dmat, map); +} + +static inline int +bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) +{ + + return (dmat->dt_mt->dm_dmamem_alloc(dmat, vaddr, flags, mapp)); +} + +static inline void +bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + + dmat->dt_mt->dm_dmamem_free(dmat, vaddr, map); +} + +static inline bus_dma_segment_t* +_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error) +{ + + return (dmat->dt_mt->dm_dmamap_complete(dmat, map, segs, + nsegs, error)); +} + +static inline int +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, + void *buf, bus_size_t buflen, struct pmap *pmap, + int flags, bus_dma_segment_t *segs, int *segp) +{ + + return (dmat->dt_mt->dm_dmamap_load_buffer(dmat, map, buf, buflen, + pmap, flags, segs, segp)); +} + +static inline int +_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, + struct vm_page **ma, bus_size_t tlen, int ma_offs, + int flags, bus_dma_segment_t *segs, int *segp) +{ + + return (bus_dmamap_load_ma_triv(dmat, map, ma, tlen, ma_offs, flags, + segs, segp)); +} + +static inline int +_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t paddr, bus_size_t buflen, + int flags, bus_dma_segment_t *segs, int *segp) +{ + + return (dmat->dt_mt->dm_dmamap_load_phys(dmat, map, paddr, buflen, + flags, segs, segp)); +} + +static inline void +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, + void *callback_arg) +{ + + return (dmat->dt_mt->dm_dmamap_waitok(dmat, map, mem, callback, + callback_arg)); +} #endif /* !_SPARC64_BUS_DMA_H_ */ Index: head/sys/sys/bus_dma.h =================================================================== --- head/sys/sys/bus_dma.h (revision 320527) +++ head/sys/sys/bus_dma.h (revision 320528) @@ -1,352 +1,296 @@ /* $NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $ */ /*- * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, * NASA Ames Research Center. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1996 Charles M. Hannum. All rights reserved. * Copyright (c) 1996 Christopher G. Demetriou. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christopher G. Demetriou * for the NetBSD Project. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _BUS_DMA_H_ #define _BUS_DMA_H_ #include /* * Machine independent interface for mapping physical addresses to peripheral * bus 'physical' addresses, and assisting with DMA operations. * * XXX This file is always included from and should not * (yet) be included directly. */ /* * Flags used in various bus DMA methods. */ #define BUS_DMA_WAITOK 0x00 /* safe to sleep (pseudo-flag) */ #define BUS_DMA_NOWAIT 0x01 /* not safe to sleep */ #define BUS_DMA_ALLOCNOW 0x02 /* perform resource allocation now */ #define BUS_DMA_COHERENT 0x04 /* hint: map memory in a coherent way */ #define BUS_DMA_ZERO 0x08 /* allocate zero'ed memory */ #define BUS_DMA_BUS1 0x10 /* placeholders for bus functions... */ #define BUS_DMA_BUS2 0x20 #define BUS_DMA_BUS3 0x40 #define BUS_DMA_BUS4 0x80 /* * The following two flags are non-standard or specific to only certain * architectures */ #define BUS_DMA_NOWRITE 0x100 #define BUS_DMA_NOCACHE 0x200 /* * The following flag is a DMA tag hint that the page offset of the * loaded kernel virtual address must be preserved in the first * physical segment address, when the KVA is loaded into DMA. */ #define BUS_DMA_KEEP_PG_OFFSET 0x400 #define BUS_DMA_LOAD_MBUF 0x800 /* Forwards needed by prototypes below. */ union ccb; struct bio; struct mbuf; struct memdesc; struct pmap; struct uio; /* * Operations performed by bus_dmamap_sync(). */ #define BUS_DMASYNC_PREREAD 1 #define BUS_DMASYNC_POSTREAD 2 #define BUS_DMASYNC_PREWRITE 4 #define BUS_DMASYNC_POSTWRITE 8 /* * bus_dma_segment_t * * Describes a single contiguous DMA transaction. Values * are suitable for programming into DMA registers. */ typedef struct bus_dma_segment { bus_addr_t ds_addr; /* DMA address */ bus_size_t ds_len; /* length of transfer */ } bus_dma_segment_t; /* * A function that returns 1 if the address cannot be accessed by * a device and 0 if it can be. */ typedef int bus_dma_filter_t(void *, bus_addr_t); /* * Generic helper function for manipulating mutexes. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op); /* * Allocate a device specific dma_tag encapsulating the constraints of * the parent tag in addition to other restrictions specified: * * alignment: Alignment for segments. * boundary: Boundary that segments cannot cross. * lowaddr: Low restricted address that cannot appear in a mapping. * highaddr: High restricted address that cannot appear in a mapping. * filtfunc: An optional function to further test if an address * within the range of lowaddr and highaddr cannot appear * in a mapping. * filtfuncarg: An argument that will be passed to filtfunc in addition * to the address to test. * maxsize: Maximum mapping size supported by this tag. * nsegments: Number of discontinuities allowed in maps. * maxsegsz: Maximum size of a segment in the map. * flags: Bus DMA flags. * lockfunc: An optional function to handle driver-defined lock * operations. * lockfuncarg: An argument that will be passed to lockfunc in addition * to the lock operation. * dmat: A pointer to set to a valid dma tag should the return * value of this function indicate success. */ /* XXX Should probably allow specification of alignment */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filtfunc, void *filtfuncarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int bus_dma_tag_destroy(bus_dma_tag_t dmat); /* * A function that processes a successfully loaded dma map or an error * from a delayed load map. */ typedef void bus_dmamap_callback_t(void *, bus_dma_segment_t *, int, int); /* * Like bus_dmamap_callback but includes map size in bytes. This is * defined as a separate interface to maintain compatibility for users * of bus_dmamap_callback_t--at some point these interfaces should be merged. */ typedef void bus_dmamap_callback2_t(void *, bus_dma_segment_t *, int, bus_size_t, int); /* * Map the buffer buf into bus space using the dmamap map. */ int bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for mbufs. Note the use of the * bus_dmamap_callback2_t interface. */ int bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *mbuf, bus_dmamap_callback2_t *callback, void *callback_arg, int flags); int bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *mbuf, bus_dma_segment_t *segs, int *nsegs, int flags); /* * Like bus_dmamap_load but for uios. Note the use of the * bus_dmamap_callback2_t interface. */ int bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *ui, bus_dmamap_callback2_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for cam control blocks. */ int bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Like bus_dmamap_load but for bios. */ int bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Loads any memory descriptor. */ int bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg, int flags); /* * Placeholder for use by busdma implementations which do not benefit * from optimized procedure to load an array of vm_page_t. Falls back * to do _bus_dmamap_load_phys() in loop. */ int bus_dmamap_load_ma_triv(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); -/* - * XXX sparc64 uses the same interface, but a much different implementation. - * for the sparc64 arch contains the equivalent - * declarations. - */ -#if !defined(__sparc64__) +#ifdef WANT_INLINE_DMAMAP +#define BUS_DMAMAP_OP static inline +#else +#define BUS_DMAMAP_OP +#endif /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ -int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); +BUS_DMAMAP_OP int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ -int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map); +BUS_DMAMAP_OP int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map); /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints listed in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ -int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, +BUS_DMAMAP_OP int bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); /* * Free a piece of memory and its allocated dmamap, that was allocated * via bus_dmamem_alloc. */ -void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); +BUS_DMAMAP_OP void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); /* * Perform a synchronization operation on the given map. If the map - * is NULL we have a fully IO-coherent system. On every ARM architecture - * there must be a memory barrier placed to ensure that all data - * accesses are visible before going any further. + * is NULL we have a fully IO-coherent system. */ -void _bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_dmasync_op_t); -#if defined(__arm__) - #define __BUS_DMAMAP_SYNC_DEFAULT mb() -#elif defined(__aarch64__) - #define __BUS_DMAMAP_SYNC_DEFAULT dmb(sy) -#else - #define __BUS_DMAMAP_SYNC_DEFAULT do {} while (0) -#endif -#define bus_dmamap_sync(dmat, dmamap, op) \ - do { \ - if ((dmamap) != NULL) \ - _bus_dmamap_sync(dmat, dmamap, op); \ - else \ - __BUS_DMAMAP_SYNC_DEFAULT; \ - } while (0) +BUS_DMAMAP_OP void bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t dmamap, bus_dmasync_op_t op); /* * Release the mapping held by map. */ -void _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map); -#define bus_dmamap_unload(dmat, dmamap) \ - do { \ - if ((dmamap) != NULL) \ - _bus_dmamap_unload(dmat, dmamap); \ - } while (0) +BUS_DMAMAP_OP void bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t dmamap); -/* - * The following functions define the interface between the MD and MI - * busdma layers. These are not intended for consumption by driver - * software. - */ -void __bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, - struct memdesc *mem, - bus_dmamap_callback_t *callback, - void *callback_arg); - -#define _bus_dmamap_waitok(dmat, map, mem, callback, callback_arg) \ - do { \ - if ((map) != NULL) \ - __bus_dmamap_waitok(dmat, map, mem, callback, \ - callback_arg); \ - } while (0); - -int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, - void *buf, bus_size_t buflen, struct pmap *pmap, - int flags, bus_dma_segment_t *segs, int *segp); - -int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, - vm_paddr_t paddr, bus_size_t buflen, - int flags, bus_dma_segment_t *segs, int *segp); - -int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, - struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, - bus_dma_segment_t *segs, int *segp); - -bus_dma_segment_t *_bus_dmamap_complete(bus_dma_tag_t dmat, - bus_dmamap_t map, - bus_dma_segment_t *segs, - int nsegs, int error); - -#endif /* __sparc64__ */ +#undef BUS_DMAMAP_OP #endif /* _BUS_DMA_H_ */ Index: head/sys/sys/bus_dma_internal.h =================================================================== --- head/sys/sys/bus_dma_internal.h (nonexistent) +++ head/sys/sys/bus_dma_internal.h (revision 320528) @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2017 Jason A. Harmening. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _BUS_DMA_INTERNAL_H_ +#define _BUS_DMA_INTERNAL_H_ + +/* + * The following functions define the interface between the MD and MI + * busdma layers. These are not intended for consumption by driver + * software. + */ + +bus_dma_segment_t *_bus_dmamap_complete(bus_dma_tag_t dmat, + bus_dmamap_t map, bus_dma_segment_t *segs, + int nsegs, int error); + +int _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, + void *buf, bus_size_t buflen, struct pmap *pmap, + int flags, bus_dma_segment_t *segs, int *segp); + +int _bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, + struct vm_page **ma, bus_size_t tlen, int ma_offs, + int flags, bus_dma_segment_t *segs, int *segp); + +int _bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, + vm_paddr_t paddr, bus_size_t buflen, + int flags, bus_dma_segment_t *segs, int *segp); + +void _bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, + void *callback_arg); + +#endif /* !_BUS_DMA_INTERNAL_H_ */ + Property changes on: head/sys/sys/bus_dma_internal.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/x86/include/bus_dma.h =================================================================== --- head/sys/x86/include/bus_dma.h (nonexistent) +++ head/sys/x86/include/bus_dma.h (revision 320528) @@ -0,0 +1,183 @@ +/*- + * Copyright (c) 2017 Jason A. Harmening. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _X86_BUS_DMA_H_ +#define _X86_BUS_DMA_H_ + +#define WANT_INLINE_DMAMAP +#include +#include + +#include + +/* + * Allocate a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static inline int +bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_create(dmat, flags, mapp)); +} + +/* + * Destroy a handle for mapping from kva/uva/physical + * address space into bus device space. + */ +static inline int +bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_destroy(dmat, map)); +} + +/* + * Allocate a piece of memory that can be efficiently mapped into + * bus device space based on the constraints lited in the dma tag. + * A dmamap to for use with dmamap_load is also allocated. + */ +static inline int +bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, + bus_dmamap_t *mapp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); +} + +/* + * Free a piece of memory and it's allociated dmamap, that was allocated + * via bus_dmamem_alloc. Make the same choice for free/contigfree. + */ +static inline void +bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->mem_free(dmat, vaddr, map); +} + +/* + * Release the mapping held by map. + */ +static inline void +bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) +{ + struct bus_dma_tag_common *tc; + + if (map != NULL) { + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_unload(dmat, map); + } +} + +static inline void +bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) +{ + struct bus_dma_tag_common *tc; + + if (map != NULL) { + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_sync(dmat, map, op); + } +} + +/* + * Utility function to load a physical buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static inline int +_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, + bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, + segp)); +} + +static inline int +_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, + bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, + segs, segp)); +} + +/* + * Utility function to load a linear buffer. segp contains + * the starting segment on entrace, and the ending segment on exit. + */ +static inline int +_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, + bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, + int *segp) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, + segp)); +} + +static inline void +_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, + struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) +{ + struct bus_dma_tag_common *tc; + + if (map != NULL) { + tc = (struct bus_dma_tag_common *)dmat; + tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); + } +} + +static inline bus_dma_segment_t * +_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, + bus_dma_segment_t *segs, int nsegs, int error) +{ + struct bus_dma_tag_common *tc; + + tc = (struct bus_dma_tag_common *)dmat; + return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); +} + +#endif /* !_X86_BUS_DMA_H_ */ + Property changes on: head/sys/x86/include/bus_dma.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/x86/include/busdma_impl.h =================================================================== --- head/sys/x86/include/busdma_impl.h (revision 320527) +++ head/sys/x86/include/busdma_impl.h (revision 320528) @@ -1,96 +1,96 @@ /*- * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __X86_BUSDMA_IMPL_H #define __X86_BUSDMA_IMPL_H struct bus_dma_tag_common { struct bus_dma_impl *impl; struct bus_dma_tag_common *parent; bus_size_t alignment; bus_addr_t boundary; bus_addr_t lowaddr; bus_addr_t highaddr; bus_dma_filter_t *filter; void *filterarg; bus_size_t maxsize; u_int nsegments; bus_size_t maxsegsz; int flags; bus_dma_lock_t *lockfunc; void *lockfuncarg; int ref_count; }; struct bus_dma_impl { int (*tag_create)(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat); int (*tag_destroy)(bus_dma_tag_t dmat); int (*map_create)(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp); int (*map_destroy)(bus_dma_tag_t dmat, bus_dmamap_t map); int (*mem_alloc)(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp); void (*mem_free)(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map); int (*load_ma)(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp); int (*load_phys)(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp); int (*load_buffer)(bus_dma_tag_t dmat, bus_dmamap_t map, - void *buf, bus_size_t buflen, pmap_t pmap, int flags, + void *buf, bus_size_t buflen, struct pmap *pmap, int flags, bus_dma_segment_t *segs, int *segp); void (*map_waitok)(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg); bus_dma_segment_t *(*map_complete)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error); void (*map_unload)(bus_dma_tag_t dmat, bus_dmamap_t map); void (*map_sync)(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op); }; void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op); int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr); int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat); extern struct bus_dma_impl bus_dma_bounce_impl; #endif Index: head/sys/x86/iommu/busdma_dmar.c =================================================================== --- head/sys/x86/iommu/busdma_dmar.c (revision 320527) +++ head/sys/x86/iommu/busdma_dmar.c (revision 320528) @@ -1,910 +1,912 @@ /*- * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * busdma_dmar.c, the implementation of the busdma(9) interface using * DMAR units from Intel VT-d. */ static bool dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) { char str[128], *env; int default_bounce; bool ret; static const char bounce_str[] = "bounce"; static const char dmar_str[] = "dmar"; default_bounce = 0; env = kern_getenv("hw.busdma.default"); if (env != NULL) { if (strcmp(env, bounce_str) == 0) default_bounce = 1; else if (strcmp(env, dmar_str) == 0) default_bounce = 0; freeenv(env); } snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", domain, bus, slot, func); env = kern_getenv(str); if (env == NULL) return (default_bounce != 0); if (strcmp(env, bounce_str) == 0) ret = true; else if (strcmp(env, dmar_str) == 0) ret = false; else ret = default_bounce != 0; freeenv(env); return (ret); } /* * Given original device, find the requester ID that will be seen by * the DMAR unit and used for page table lookup. PCI bridges may take * ownership of transactions from downstream devices, so it may not be * the same as the BSF of the target device. In those cases, all * devices downstream of the bridge must share a single mapping * domain, and must collectively be assigned to use either DMAR or * bounce mapping. */ device_t dmar_get_requester(device_t dev, uint16_t *rid) { devclass_t pci_class; device_t l, pci, pcib, pcip, pcibp, requester; int cap_offset; uint16_t pcie_flags; bool bridge_is_pcie; pci_class = devclass_find("pci"); l = requester = dev; *rid = pci_get_rid(dev); /* * Walk the bridge hierarchy from the target device to the * host port to find the translating bridge nearest the DMAR * unit. */ for (;;) { pci = device_get_parent(l); KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent " "for %s", device_get_name(dev), device_get_name(l))); KASSERT(device_get_devclass(pci) == pci_class, ("dmar_get_requester(%s): non-pci parent %s for %s", device_get_name(dev), device_get_name(pci), device_get_name(l))); pcib = device_get_parent(pci); KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge " "for %s", device_get_name(dev), device_get_name(pci))); /* * The parent of our "bridge" isn't another PCI bus, * so pcib isn't a PCI->PCI bridge but rather a host * port, and the requester ID won't be translated * further. */ pcip = device_get_parent(pcib); if (device_get_devclass(pcip) != pci_class) break; pcibp = device_get_parent(pcip); if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { /* * Do not stop the loop even if the target * device is PCIe, because it is possible (but * unlikely) to have a PCI->PCIe bridge * somewhere in the hierarchy. */ l = pcib; } else { /* * Device is not PCIe, it cannot be seen as a * requester by DMAR unit. Check whether the * bridge is PCIe. */ bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, &cap_offset) == 0; requester = pcib; /* * Check for a buggy PCIe/PCI bridge that * doesn't report the express capability. If * the bridge above it is express but isn't a * PCI bridge, then we know pcib is actually a * PCIe/PCI bridge. */ if (!bridge_is_pcie && pci_find_cap(pcibp, PCIY_EXPRESS, &cap_offset) == 0) { pcie_flags = pci_read_config(pcibp, cap_offset + PCIER_FLAGS, 2); if ((pcie_flags & PCIEM_FLAGS_TYPE) != PCIEM_TYPE_PCI_BRIDGE) bridge_is_pcie = true; } if (bridge_is_pcie) { /* * The current device is not PCIe, but * the bridge above it is. This is a * PCIe->PCI bridge. Assume that the * requester ID will be the secondary * bus number with slot and function * set to zero. * * XXX: Doesn't handle the case where * the bridge is PCIe->PCI-X, and the * bridge will only take ownership of * requests in some cases. We should * provide context entries with the * same page tables for taken and * non-taken transactions. */ *rid = PCI_RID(pci_get_bus(l), 0, 0); l = pcibp; } else { /* * Neither the device nor the bridge * above it are PCIe. This is a * conventional PCI->PCI bridge, which * will use the bridge's BSF as the * requester ID. */ *rid = pci_get_rid(pcib); l = pcib; } } } return (requester); } struct dmar_ctx * dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr) { device_t requester; struct dmar_ctx *ctx; bool disabled; uint16_t rid; requester = dmar_get_requester(dev, &rid); /* * If the user requested the IOMMU disabled for the device, we * cannot disable the DMAR, due to possibility of other * devices on the same DMAR still requiring translation. * Instead provide the identity mapping for the device * context. */ disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester), pci_get_bus(requester), pci_get_slot(requester), pci_get_function(requester)); ctx = dmar_get_ctx_for_dev(dmar, requester, rid, disabled, rmrr); if (ctx == NULL) return (NULL); if (disabled) { /* * Keep the first reference on context, release the * later refs. */ DMAR_LOCK(dmar); if ((ctx->flags & DMAR_CTX_DISABLED) == 0) { ctx->flags |= DMAR_CTX_DISABLED; DMAR_UNLOCK(dmar); } else { dmar_free_ctx_locked(dmar, ctx); } ctx = NULL; } return (ctx); } bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child) { struct dmar_unit *dmar; struct dmar_ctx *ctx; bus_dma_tag_t res; dmar = dmar_find(child); /* Not in scope of any DMAR ? */ if (dmar == NULL) return (NULL); if (!dmar->dma_enabled) return (NULL); dmar_quirks_pre_use(dmar); dmar_instantiate_rmrr_ctxs(dmar); ctx = dmar_instantiate_ctx(dmar, child, false); res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag; return (res); } static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map"); static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map); static int dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_dmar *newtag, *oldtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_dmar), (void **)&newtag); if (error != 0) goto out; oldtag = (struct bus_dma_tag_dmar *)parent; newtag->common.impl = &bus_dma_dmar_impl; newtag->ctx = oldtag->ctx; newtag->owner = oldtag->owner; *dmat = (bus_dma_tag_t)newtag; out: CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1) { struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent; int error; error = 0; dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (struct bus_dma_tag_dmar *)dmat->common.parent; if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 1) { if (dmat == &dmat->ctx->ctx_tag) dmar_free_ctx(dmat->ctx); free(dmat->segments, M_DMAR_DMAMAP); free(dmat, M_DEVBUF); dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static int dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); + tag = (struct bus_dma_tag_dmar *)dmat; map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO); if (map == NULL) { *mapp = NULL; return (ENOMEM); } if (tag->segments == NULL) { tag->segments = malloc(sizeof(bus_dma_segment_t) * tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT); if (tag->segments == NULL) { free(map, M_DMAR_DMAMAP); *mapp = NULL; return (ENOMEM); } } TAILQ_INIT(&map->map_entries); map->tag = tag; map->locked = true; map->cansleep = false; tag->map_count++; *mapp = (bus_dmamap_t)map; return (0); } static int dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_domain *domain; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (map != NULL) { domain = tag->ctx->domain; DMAR_DOMAIN_LOCK(domain); if (!TAILQ_EMPTY(&map->map_entries)) { DMAR_DOMAIN_UNLOCK(domain); return (EBUSY); } DMAR_DOMAIN_UNLOCK(domain); free(map, M_DMAR_DMAMAP); } tag->map_count--; return (0); } static int dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; int error, mflags; vm_memattr_t attr; error = dmar_bus_dmamap_create(dmat, flags, mapp); if (error != 0) return (error); mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : VM_MEMATTR_DEFAULT; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)*mapp; if (tag->common.maxsize < PAGE_SIZE && tag->common.alignment <= tag->common.maxsize && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags); map->flags |= BUS_DMAMAP_DMAR_MALLOC; } else { *vaddr = (void *)kmem_alloc_attr(kernel_arena, tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR, attr); map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC; } if (*vaddr == NULL) { dmar_bus_dmamap_destroy(dmat, *mapp); *mapp = NULL; return (ENOMEM); } return (0); } static void dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) { free(vaddr, M_DEVBUF); map->flags &= ~BUS_DMAMAP_DMAR_MALLOC; } else { KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0, ("dmar_bus_dmamem_free for non alloced map %p", map)); kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize); map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC; } dmar_bus_dmamap_destroy(dmat, map1); } static int dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp, struct dmar_map_entries_tailq *unroll_list) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry; dmar_gaddr_t size; bus_size_t buflen1; int error, idx, gas_flags, seg; KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset)); if (segs == NULL) segs = tag->segments; ctx = tag->ctx; domain = ctx->domain; seg = *segp; error = 0; idx = 0; while (buflen > 0) { seg++; if (seg >= tag->common.nsegments) { error = EFBIG; break; } buflen1 = buflen > tag->common.maxsegsz ? tag->common.maxsegsz : buflen; size = round_page(offset + buflen1); /* * (Too) optimistically allow split if there are more * then one segments left. */ gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0; if (seg + 1 < tag->common.nsegments) gas_flags |= DMAR_GM_CANSPLIT; error = dmar_gas_map(domain, &tag->common, size, offset, DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE, gas_flags, ma + idx, &entry); if (error != 0) break; if ((gas_flags & DMAR_GM_CANSPLIT) != 0) { KASSERT(size >= entry->end - entry->start, ("split increased entry size %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); size = entry->end - entry->start; if (buflen1 > size) buflen1 = size; } else { KASSERT(entry->end - entry->start == size, ("no split allowed %jx %jx %jx", (uintmax_t)size, (uintmax_t)entry->start, (uintmax_t)entry->end)); } if (offset + buflen1 > size) buflen1 = size - offset; if (buflen1 > tag->common.maxsegsz) buflen1 = tag->common.maxsegsz; KASSERT(((entry->start + offset) & (tag->common.alignment - 1)) == 0, ("alignment failed: ctx %p start 0x%jx offset %x " "align 0x%jx", ctx, (uintmax_t)entry->start, offset, (uintmax_t)tag->common.alignment)); KASSERT(entry->end <= tag->common.lowaddr || entry->start >= tag->common.highaddr, ("entry placement failed: ctx %p start 0x%jx end 0x%jx " "lowaddr 0x%jx highaddr 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.lowaddr, (uintmax_t)tag->common.highaddr)); KASSERT(dmar_test_boundary(entry->start + offset, buflen1, tag->common.boundary), ("boundary failed: ctx %p start 0x%jx end 0x%jx " "boundary 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); KASSERT(buflen1 <= tag->common.maxsegsz, ("segment too large: ctx %p start 0x%jx end 0x%jx " "buflen1 0x%jx maxsegsz 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); DMAR_DOMAIN_LOCK(domain); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); entry->flags |= DMAR_MAP_ENTRY_MAP; DMAR_DOMAIN_UNLOCK(domain); TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; idx += OFF_TO_IDX(trunc_page(offset + buflen1)); offset += buflen1; offset &= DMAR_PAGE_MASK; buflen -= buflen1; } if (error == 0) *segp = seg; return (error); } static int dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag, struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct dmar_ctx *ctx; struct dmar_domain *domain; struct dmar_map_entry *entry, *entry1; struct dmar_map_entries_tailq unroll_list; int error; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->loads, 1); TAILQ_INIT(&unroll_list); error = dmar_bus_dmamap_load_something1(tag, map, ma, offset, buflen, flags, segs, segp, &unroll_list); if (error != 0) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ DMAR_DOMAIN_LOCK(domain); TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, entry1) { /* * No entries other than what we have created * during the failed run might have been * inserted there in between, since we own ctx * pglock. */ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); TAILQ_REMOVE(&unroll_list, entry, unroll_link); TAILQ_INSERT_TAIL(&domain->unload_entries, entry, dmamap_link); } DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); } if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && !map->cansleep) error = EINPROGRESS; if (error == EINPROGRESS) dmar_bus_schedule_dmamap(domain->dmar, map); return (error); } static int dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, flags, segs, segp)); } static int dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma; vm_paddr_t pstart, pend; int error, i, ma_cnt, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page(buf); pend = round_page(buf + buflen); offset = buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? M_WAITOK : M_NOWAIT); if (ma == NULL) return (ENOMEM); for (i = 0; i < ma_cnt; i++) ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE); error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); return (error); } static int dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, offset; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; pstart = trunc_page((vm_offset_t)buf); pend = round_page((vm_offset_t)buf + buflen); offset = (vm_offset_t)buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ? M_WAITOK : M_NOWAIT); if (ma == NULL) return (ENOMEM); if (dumping) { /* * If dumping, do not attempt to call * PHYS_TO_VM_PAGE() at all. It may return non-NULL * but the vm_page returned might be not initialized, * e.g. for the kernel itself. */ KASSERT(pmap == kernel_pmap, ("non-kernel address write")); fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT)); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { paddr = pmap_kextract(pstart); vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } else { fma = NULL; for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { if (pmap == kernel_pmap) paddr = pmap_kextract(pstart); else paddr = pmap_extract(pmap, pstart); ma[i] = PHYS_TO_VM_PAGE(paddr); KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr, ("PHYS_TO_VM_PAGE failed %jx %jx m %p", (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]), ma[i])); } } error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); free(fma, M_DEVBUF); return (error); } static void dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dmamap_dmar *map; if (map1 == NULL) return; map = (struct bus_dmamap_dmar *)map1; map->mem = *mem; map->tag = (struct bus_dma_tag_dmar *)dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; if (!map->locked) { KASSERT(map->cansleep, ("map not locked and not sleepable context %p", map)); /* * We are called from the delayed context. Relock the * driver. */ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); map->locked = true; } if (segs == NULL) segs = tag->segments; return (segs); } /* * The limitations of busdma KPI forces the dmar to perform the actual * unload, consisting of the unmapping of the map entries page tables, * from the delayed context on i386, since page table page mapping * might require a sleep to be successfull. The unfortunate * consequence is that the DMA requests can be served some time after * the bus_dmamap_unload() call returned. * * On amd64, we assume that sf allocation cannot fail. */ static void dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_ctx *ctx; struct dmar_domain *domain; #if defined(__amd64__) struct dmar_map_entries_tailq entries; #endif tag = (struct bus_dma_tag_dmar *)dmat; map = (struct bus_dmamap_dmar *)map1; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->unloads, 1); #if defined(__i386__) DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->dmar->delayed_taskqueue, &domain->unload_task); #else /* defined(__amd64__) */ TAILQ_INIT(&entries); DMAR_DOMAIN_LOCK(domain); TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); DMAR_DOMAIN_UNLOCK(domain); THREAD_NO_SLEEPING(); dmar_domain_unload(domain, &entries, false); THREAD_SLEEPING_OK(); KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx)); #endif } static void dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { } struct bus_dma_impl bus_dma_dmar_impl = { .tag_create = dmar_bus_dma_tag_create, .tag_destroy = dmar_bus_dma_tag_destroy, .map_create = dmar_bus_dmamap_create, .map_destroy = dmar_bus_dmamap_destroy, .mem_alloc = dmar_bus_dmamem_alloc, .mem_free = dmar_bus_dmamem_free, .load_phys = dmar_bus_dmamap_load_phys, .load_buffer = dmar_bus_dmamap_load_buffer, .load_ma = dmar_bus_dmamap_load_ma, .map_waitok = dmar_bus_dmamap_waitok, .map_complete = dmar_bus_dmamap_complete, .map_unload = dmar_bus_dmamap_unload, .map_sync = dmar_bus_dmamap_sync }; static void dmar_bus_task_dmamap(void *arg, int pending) { struct bus_dma_tag_dmar *tag; struct bus_dmamap_dmar *map; struct dmar_unit *unit; unit = arg; DMAR_LOCK(unit); while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); tag = map->tag; map->cansleep = true; map->locked = false; bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); map->cansleep = false; if (map->locked) { (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_UNLOCK); } else map->locked = true; map->cansleep = false; DMAR_LOCK(unit); } DMAR_UNLOCK(unit); } static void dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map) { map->locked = false; DMAR_LOCK(unit); TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); DMAR_UNLOCK(unit); taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); } int dmar_init_busdma(struct dmar_unit *unit) { unit->dma_enabled = 1; TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); TAILQ_INIT(&unit->delayed_maps); TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit); unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK, taskqueue_thread_enqueue, &unit->delayed_taskqueue); taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, "dmar%d busdma taskq", unit->unit); return (0); } void dmar_fini_busdma(struct dmar_unit *unit) { if (unit->delayed_taskqueue == NULL) return; taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); taskqueue_free(unit->delayed_taskqueue); unit->delayed_taskqueue = NULL; } Index: head/sys/x86/x86/busdma_bounce.c =================================================================== --- head/sys/x86/x86/busdma_bounce.c (revision 320527) +++ head/sys/x86/x86/busdma_bounce.c (revision 320528) @@ -1,1279 +1,1283 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define MAX_BPAGES 512 #else #define MAX_BPAGES 8192 #endif enum { BUS_DMA_COULD_BOUNCE = 0x01, BUS_DMA_MIN_ALLOC_COMP = 0x02, BUS_DMA_KMEM_ALLOC = 0x04, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_offset_t dataoffs; /* page offset of client data */ vm_page_t datapage[2]; /* physical page(s) of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static struct bus_dmamap nobounce_dmamap; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr1, bus_addr_t addr2, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if (parent != NULL && ((newtag->common.filter != NULL) || ((parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0))) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && (flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; } else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { bus_dma_tag_t dmat_copy, parent; int error; error = 0; dmat_copy = dmat; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); + error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) { /* Must bounce */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) return (error); } bz = dmat->bounce_zone; *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, M_NOWAIT | M_ZERO); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); /* * Attempt to add pages to our pool on a per-instance * basis up to a sane limit. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = MAX(atop(dmat->common.maxsize), 1); pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 1); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; } else { *mapp = NULL; } if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { if (map != NULL && map != &nobounce_dmamap) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; free(map, M_DEVBUF); } dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; /* If we succeed, no mapping/bouncing will be required */ *mapp = NULL; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if ((dmat->common.maxsize <= PAGE_SIZE) && (dmat->common.alignment <= dmat->common.maxsize) && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags); } else if (dmat->common.nsegments >= howmany(dmat->common.maxsize, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->common.alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr(kernel_arena, dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } else { *vaddr = (void *)kmem_alloc_contig(kernel_arena, dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr, dmat->common.alignment != 0 ? dmat->common.alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * dmamem does not need to be bounced, so the map should be * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if (map != NULL) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->common.maxsize); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { bus_addr_t curaddr; bus_size_t sgsize; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { /* * Count the number of bounce pages * needed in order to complete this transfer */ curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->common.maxsegsz); if (bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); map->pagesneeded++; } curaddr += sgsize; buflen -= sgsize; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; bus_size_t sg_len; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, int ma_offs, bus_size_t buflen, int flags) { bus_size_t sg_len, max_sgsize; int page_index; vm_paddr_t paddr; if ((map != &nobounce_dmamap && map->pagesneeded == 0)) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", map, &nobounce_dmamap, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ page_index = 0; while (buflen > 0) { paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; sg_len = PAGE_SIZE - ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sg_len = MIN(sg_len, max_sgsize); if (bus_dma_run_filter(&dmat->common, paddr) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); sg_len = MIN(sg_len, max_sgsize); KASSERT((sg_len & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); map->pagesneeded++; } if (((ma_offs + sg_len) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sg_len) & PAGE_MASK; KASSERT(buflen >= sg_len, ("Segment length overruns original buffer")); buflen -= sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static int _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize; bus_addr_t curaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, 0, sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { bus_size_t sgsize, max_sgsize; bus_addr_t curaddr; vm_offset_t kvaddr, vaddr; int error; if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } vaddr = (vm_offset_t)buf; while (buflen > 0) { /* * Get the physical address for this segment. */ if (pmap == kernel_pmap) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, curaddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, 0, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static int bounce_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, bus_size_t buflen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { vm_paddr_t paddr, next_paddr; int error, page_index; bus_size_t sgsize, max_sgsize; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * If we have to keep the offset of each page this function * is not suitable, switch back to bus_dmamap_load_ma_triv * which is going to do the right thing in this case. */ error = bus_dmamap_load_ma_triv(dmat, map, ma, buflen, ma_offs, flags, segs, segp); return (error); } if (map == NULL) map = &nobounce_dmamap; if (segs == NULL) segs = dmat->segments; if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) { _bus_dmamap_count_ma(dmat, map, ma, ma_offs, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } page_index = 0; while (buflen > 0) { /* * Compute the segment size, and adjust counts. */ paddr = VM_PAGE_TO_PHYS(ma[page_index]) + ma_offs; max_sgsize = MIN(buflen, dmat->common.maxsegsz); sgsize = PAGE_SIZE - ma_offs; if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) && map->pagesneeded != 0 && bus_dma_run_filter(&dmat->common, paddr)) { sgsize = roundup2(sgsize, dmat->common.alignment); sgsize = MIN(sgsize, max_sgsize); KASSERT((sgsize & (dmat->common.alignment - 1)) == 0, ("Segment size is not aligned")); /* * Check if two pages of the user provided buffer * are used. */ if ((ma_offs + sgsize) > PAGE_SIZE) next_paddr = VM_PAGE_TO_PHYS(ma[page_index + 1]); else next_paddr = 0; paddr = add_bounce_page(dmat, map, 0, paddr, next_paddr, sgsize); } else { sgsize = MIN(sgsize, max_sgsize); } sgsize = _bus_dmamap_addseg(dmat, map, paddr, sgsize, segs, segp); if (sgsize == 0) break; KASSERT(buflen >= sgsize, ("Segment length overruns original buffer")); buflen -= sgsize; if (((ma_offs + sgsize) & ~PAGE_MASK) != 0) page_index++; ma_offs = (ma_offs + sgsize) & PAGE_MASK; } /* * Did we fit? */ return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { if (map == NULL) return; map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; if (map == NULL) return; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; vm_offset_t datavaddr, tempvaddr; bus_size_t datacount1, datacount2; if (map == NULL || (bpage = STAILQ_FIRST(&map->bpages)) == NULL) return; /* * Handle data bouncing. We might also want to add support for * invalidating the caches on broken hardware. */ CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)datavaddr, (void *)bpage->vaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_w; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)datavaddr, (void *)(bpage->vaddr + datacount1), datacount2); pmap_quick_remove_page(datavaddr); next_w: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; datacount1 = bpage->datacount; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page(bpage->datapage[0]); datavaddr = tempvaddr | bpage->dataoffs; datacount1 = min(PAGE_SIZE - bpage->dataoffs, datacount1); } bcopy((void *)bpage->vaddr, (void *)datavaddr, datacount1); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if (bpage->datapage[1] == 0) { KASSERT(datacount1 == bpage->datacount, ("Mismatch between data size and provided memory space")); goto next_r; } /* * We are dealing with an unmapped buffer that expands * over two pages. */ datavaddr = pmap_quick_enter_page(bpage->datapage[1]); datacount2 = bpage->datacount - datacount1; bcopy((void *)(bpage->vaddr + datacount1), (void *)datavaddr, datacount2); pmap_quick_remove_page(datavaddr); next_r: bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->common.alignment <= bz->alignment) && (dmat->common.lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr1, bus_addr_t addr2, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); KASSERT(map != NULL && map != &nobounce_dmamap, ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr1 & PAGE_MASK; bpage->busaddr |= addr1 & PAGE_MASK; KASSERT(addr2 == 0, ("Trying to bounce multiple pages with BUS_DMA_KEEP_PG_OFFSET")); } bpage->datavaddr = vaddr; bpage->datapage[0] = PHYS_TO_VM_PAGE(addr1); KASSERT((addr2 & PAGE_MASK) == 0, ("Second page is not aligned")); bpage->datapage[1] = PHYS_TO_VM_PAGE(addr2); bpage->dataoffs = addr1 & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bounce_bus_dmamap_load_ma, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync }; Index: head/sys/x86/x86/busdma_machdep.c =================================================================== --- head/sys/x86/x86/busdma_machdep.c (revision 320527) +++ head/sys/x86/x86/busdma_machdep.c (revision 320528) @@ -1,365 +1,223 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Convenience function for manipulating driver locks from busdma (during * busdma_swi, for example). Drivers that don't provide their own locks * should specify &Giant to dmat->lockfuncarg. Drivers that use their own * non-mutex locking scheme don't have to use this at all. */ void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) { struct mtx *dmtx; dmtx = (struct mtx *)arg; switch (op) { case BUS_DMA_LOCK: mtx_lock(dmtx); break; case BUS_DMA_UNLOCK: mtx_unlock(dmtx); break; default: panic("Unknown operation 0x%x for busdma_lock_mutex!", op); } } /* * dflt_lock should never get called. It gets put into the dma tag when * lockfunc == NULL, which is only valid if the maps that are associated * with the tag are meant to never be defered. * XXX Should have a way to identify which driver is responsible here. */ void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op) { panic("driver error: busdma dflt_lock called"); } /* * Return true if a match is made. * * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. * * If paddr is within the bounds of the dma tag then call the filter callback * to check for a match, if there is no filter callback then assume a match. */ int bus_dma_run_filter(struct bus_dma_tag_common *tc, bus_addr_t paddr) { int retval; retval = 0; do { if (((paddr > tc->lowaddr && paddr <= tc->highaddr) || ((paddr & (tc->alignment - 1)) != 0)) && (tc->filter == NULL || (*tc->filter)(tc->filterarg, paddr) != 0)) retval = 1; tc = tc->parent; } while (retval == 0 && tc != NULL); return (retval); } int common_bus_dma_tag_create(struct bus_dma_tag_common *parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, size_t sz, void **dmat) { void *newtag; struct bus_dma_tag_common *common; KASSERT(sz >= sizeof(struct bus_dma_tag_common), ("sz")); /* Basic sanity checking */ if (boundary != 0 && boundary < maxsegsz) maxsegsz = boundary; if (maxsegsz == 0) return (EINVAL); /* Return a NULL tag on failure */ *dmat = NULL; newtag = malloc(sz, M_DEVBUF, M_ZERO | M_NOWAIT); if (newtag == NULL) { CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, 0, ENOMEM); return (ENOMEM); } common = newtag; common->impl = &bus_dma_bounce_impl; common->parent = parent; common->alignment = alignment; common->boundary = boundary; common->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); common->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); common->filter = filter; common->filterarg = filterarg; common->maxsize = maxsize; common->nsegments = nsegments; common->maxsegsz = maxsegsz; common->flags = flags; common->ref_count = 1; /* Count ourself */ if (lockfunc != NULL) { common->lockfunc = lockfunc; common->lockfuncarg = lockfuncarg; } else { common->lockfunc = bus_dma_dflt_lock; common->lockfuncarg = NULL; } /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { common->impl = parent->impl; common->lowaddr = MIN(parent->lowaddr, common->lowaddr); common->highaddr = MAX(parent->highaddr, common->highaddr); if (common->boundary == 0) common->boundary = parent->boundary; else if (parent->boundary != 0) { common->boundary = MIN(parent->boundary, common->boundary); } if (common->filter == NULL) { /* * Short circuit looking at our parent directly * since we have encapsulated all of its information */ common->filter = parent->filter; common->filterarg = parent->filterarg; common->parent = parent->parent; } atomic_add_int(&parent->ref_count, 1); } *dmat = common; return (0); } /* * Allocate a device specific dma_tag. */ int bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_common *tc; int error; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); if (parent == NULL) { error = bus_dma_bounce_impl.tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } else { tc = (struct bus_dma_tag_common *)parent; error = tc->impl->tag_create(parent, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, dmat); } return (error); } int bus_dma_tag_destroy(bus_dma_tag_t dmat) { struct bus_dma_tag_common *tc; tc = (struct bus_dma_tag_common *)dmat; return (tc->impl->tag_destroy(dmat)); } -/* - * Allocate a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) -{ - struct bus_dma_tag_common *tc; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_create(dmat, flags, mapp)); -} - -/* - * Destroy a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_destroy(dmat, map)); -} - - -/* - * Allocate a piece of memory that can be efficiently mapped into - * bus device space based on the constraints lited in the dma tag. - * A dmamap to for use with dmamap_load is also allocated. - */ -int -bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) -{ - struct bus_dma_tag_common *tc; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s", __func__); - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->mem_alloc(dmat, vaddr, flags, mapp)); -} - -/* - * Free a piece of memory and it's allociated dmamap, that was allocated - * via bus_dmamem_alloc. Make the same choice for free/contigfree. - */ -void -bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->mem_free(dmat, vaddr, map); -} - -/* - * Utility function to load a physical buffer. segp contains - * the starting segment on entrace, and the ending segment on exit. - */ -int -_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, - bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_phys(dmat, map, buf, buflen, flags, segs, - segp)); -} - -int -_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map, struct vm_page **ma, - bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, - int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_ma(dmat, map, ma, tlen, ma_offs, flags, - segs, segp)); -} - -/* - * Utility function to load a linear buffer. segp contains - * the starting segment on entrace, and the ending segment on exit. - */ -int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, - bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, - int *segp) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->load_buffer(dmat, map, buf, buflen, pmap, flags, segs, - segp)); -} - -void -__bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, - struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_waitok(dmat, map, mem, callback, callback_arg); -} - -bus_dma_segment_t * -_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, - bus_dma_segment_t *segs, int nsegs, int error) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - return (tc->impl->map_complete(dmat, map, segs, nsegs, error)); -} - -/* - * Release the mapping held by map. - */ -void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_unload(dmat, map); -} - -void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) -{ - struct bus_dma_tag_common *tc; - - tc = (struct bus_dma_tag_common *)dmat; - tc->impl->map_sync(dmat, map, op); -}