diff --git a/sys/arm64/arm64/busdma_bounce.c b/sys/arm64/arm64/busdma_bounce.c index d2e63d7b72d8..b2d00b9c0abd 100644 --- a/sys/arm64/arm64/busdma_bounce.c +++ b/sys/arm64/arm64/busdma_bounce.c @@ -1,1481 +1,1487 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Andrew Turner * under sponsorship of the FreeBSD Foundation. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_BPAGES 4096 enum { BF_COULD_BOUNCE = 0x01, BF_MIN_ALLOC_COMP = 0x02, BF_KMEM_ALLOC = 0x04, BF_COHERENT = 0x10, }; struct bounce_zone; struct bus_dma_tag { struct bus_dma_tag_common common; size_t alloc_size; size_t alloc_alignment; int map_count; int bounce_flags; bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; }; struct bounce_page { vm_offset_t vaddr; /* kva of bounce buffer */ bus_addr_t busaddr; /* Physical address */ vm_offset_t datavaddr; /* kva of client data */ vm_page_t datapage; /* physical page of client data */ vm_offset_t dataoffs; /* page offset of client data */ bus_size_t datacount; /* client data count */ STAILQ_ENTRY(bounce_page) links; }; int busdma_swi_pending; struct bounce_zone { STAILQ_ENTRY(bounce_zone) links; STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; int total_bpages; int free_bpages; int reserved_bpages; int active_bpages; int total_bounced; int total_deferred; int map_count; bus_size_t alignment; bus_addr_t lowaddr; char zoneid[8]; char lowaddrid[20]; struct sysctl_ctx_list sysctl_tree; struct sysctl_oid *sysctl_tree_top; }; static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; static STAILQ_HEAD(, bounce_zone) bounce_zone_list; static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Busdma parameters"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); struct sync_list { vm_offset_t vaddr; /* kva of client data */ bus_addr_t paddr; /* physical address */ vm_page_t pages; /* starting page of client data */ bus_size_t datacount; /* client data count */ }; struct bus_dmamap { struct bp_list bpages; int pagesneeded; int pagesreserved; bus_dma_tag_t dmat; struct memdesc mem; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; u_int flags; #define DMAMAP_COHERENT (1 << 0) #define DMAMAP_FROM_DMAMEM (1 << 1) #define DMAMAP_MBUF (1 << 2) int sync_count; struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit); static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int *pagesneeded); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags); static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags); /* * Return true if the DMA should bounce because the start or end does not fall * on a cacheline boundary (which would require a partial cacheline flush). * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a * strict rule that such memory cannot be accessed by the CPU while DMA is in * progress (or by multiple DMA engines at once), so that it's always safe to do * full cacheline flushes even if that affects memory outside the range of a * given DMA operation that doesn't involve the full allocated buffer. If we're * mapping an mbuf, that follows the same rules as a buffer we allocated. */ static bool cacheline_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, bus_size_t size) { #define DMAMAP_CACHELINE_FLAGS \ (DMAMAP_FROM_DMAMEM | DMAMAP_COHERENT | DMAMAP_MBUF) if ((dmat->bounce_flags & BF_COHERENT) != 0) return (false); if (map != NULL && (map->flags & DMAMAP_CACHELINE_FLAGS) != 0) return (false); return (((paddr | size) & (dcache_line_size - 1)) != 0); #undef DMAMAP_CACHELINE_FLAGS } /* * Return true if the given address does not fall on the alignment boundary. */ static bool alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) { return ((addr & (dmat->common.alignment - 1)) != 0); } static bool might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, bus_size_t size) { /* Memory allocated by bounce_bus_dmamem_alloc won't bounce */ if (map && (map->flags & DMAMAP_FROM_DMAMEM) != 0) return (false); if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0) return (true); if (cacheline_bounce(dmat, map, paddr, size)) return (true); if (alignment_bounce(dmat, paddr)) return (true); return (false); } static bool must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, bus_size_t size) { if (cacheline_bounce(dmat, map, paddr, size)) return (true); if (alignment_bounce(dmat, paddr)) return (true); if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0 && bus_dma_run_filter(&dmat->common, paddr)) return (true); return (false); } /* * Allocate a device specific dma_tag. */ static int bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { bus_dma_tag_t newtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &parent->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof (struct bus_dma_tag), (void **)&newtag); if (error != 0) return (error); newtag->common.impl = &bus_dma_bounce_impl; newtag->map_count = 0; newtag->segments = NULL; if ((flags & BUS_DMA_COHERENT) != 0) { newtag->bounce_flags |= BF_COHERENT; newtag->alloc_alignment = newtag->common.alignment; newtag->alloc_size = newtag->common.maxsize; } else { /* * Ensure the buffer is aligned to a cacheline when allocating * a non-coherent buffer. This is so we don't have any data * that another CPU may be accessing around DMA buffer * causing the cache to become dirty. */ newtag->alloc_alignment = MAX(newtag->common.alignment, dcache_line_size); newtag->alloc_size = roundup2(newtag->common.maxsize, dcache_line_size); } if (parent != NULL) { if ((newtag->common.filter != NULL || (parent->bounce_flags & BF_COULD_BOUNCE) != 0)) newtag->bounce_flags |= BF_COULD_BOUNCE; /* Copy some flags from the parent */ newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT; } if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) newtag->bounce_flags |= BF_COULD_BOUNCE; if ((flags & BUS_DMA_ALLOCNOW) != 0) { struct bounce_zone *bz; /* * Round size up to a full page, and add one more page because * there can always be one more boundary crossing than the * number of pages in a transfer. */ maxsize = roundup2(maxsize, PAGE_SIZE) + PAGE_SIZE; /* Must bounce */ if ((error = alloc_bounce_zone(newtag)) != 0) { free(newtag, M_DEVBUF); return (error); } bz = newtag->bounce_zone; if (ptoa(bz->total_bpages) < maxsize) { int pages; pages = atop(maxsize) + 1 - bz->total_bpages; /* Add pages to our bounce pool */ if (alloc_bounce_pages(newtag, pages) < pages) error = ENOMEM; } /* Performed initial allocation */ newtag->bounce_flags |= BF_MIN_ALLOC_COMP; } else error = 0; if (error != 0) free(newtag, M_DEVBUF); else *dmat = newtag; CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat) { - bus_dma_tag_t dmat_copy, parent; +#ifdef KTR + bus_dma_tag_t dmat_copy; +#endif + bus_dma_tag_t parent; int error; error = 0; +#ifdef KTR dmat_copy = dmat; +#endif + if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (bus_dma_tag_t)dmat->common.parent; atomic_subtract_int(&dmat->common.ref_count, 1); if (dmat->common.ref_count == 0) { if (dmat->segments != NULL) free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so * release our reference * count on our parent. */ dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static bool bounce_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) { if (!might_bounce(dmat, NULL, buf, buflen)) return (true); return (!_bus_dmamap_pagesneeded(dmat, NULL, buf, buflen, NULL)); } static bus_dmamap_t alloc_dmamap(bus_dma_tag_t dmat, int flags) { u_long mapsize; bus_dmamap_t map; mapsize = sizeof(*map); mapsize += sizeof(struct sync_list) * dmat->common.nsegments; map = malloc(mapsize, M_DEVBUF, flags | M_ZERO); if (map == NULL) return (NULL); /* Initialize the new map */ STAILQ_INIT(&map->bpages); return (map); } /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bounce_zone *bz; int error, maxpages, pages; error = 0; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, M_NOWAIT); if (dmat->segments == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } } *mapp = alloc_dmamap(dmat, M_NOWAIT); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or * an active address boundary. */ if (dmat->bounce_zone == NULL) { if ((error = alloc_bounce_zone(dmat)) != 0) { free(*mapp, M_DEVBUF); return (error); } } bz = dmat->bounce_zone; /* * Attempt to add pages to our pool on a per-instance basis up to a sane * limit. Even if the tag isn't subject of bouncing due to alignment * and boundary constraints, it could still auto-bounce due to * cacheline alignment, which requires at most two bounce pages. */ if (dmat->common.alignment > 1) maxpages = MAX_BPAGES; else maxpages = MIN(MAX_BPAGES, Maxmem - atop(dmat->common.lowaddr)); if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0 || (bz->map_count > 0 && bz->total_bpages < maxpages)) { pages = atop(roundup2(dmat->common.maxsize, PAGE_SIZE)) + 1; pages = MIN(maxpages - bz->total_bpages, pages); pages = MAX(pages, 2); if (alloc_bounce_pages(dmat, pages) < pages) error = ENOMEM; if ((dmat->bounce_flags & BF_MIN_ALLOC_COMP) == 0) { if (error == 0) { dmat->bounce_flags |= BF_MIN_ALLOC_COMP; } } else error = 0; } bz->map_count++; if (error == 0) { dmat->map_count++; if ((dmat->bounce_flags & BF_COHERENT) != 0) (*mapp)->flags |= DMAMAP_COHERENT; } else { free(*mapp, M_DEVBUF); } CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, error); return (error); } /* * Destroy a handle for mapping from kva/uva/physical * address space into bus device space. */ static int bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { /* Check we are destroying the correct map type */ if ((map->flags & DMAMAP_FROM_DMAMEM) != 0) panic("bounce_bus_dmamap_destroy: Invalid map freed\n"); if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); } /* * Allocate a piece of memory that can be efficiently mapped into * bus device space based on the constraints lited in the dma tag. * A dmamap to for use with dmamap_load is also allocated. */ static int bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { vm_memattr_t attr; int mflags; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; if (dmat->segments == NULL) { dmat->segments = (bus_dma_segment_t *)malloc( sizeof(bus_dma_segment_t) * dmat->common.nsegments, M_DEVBUF, mflags); if (dmat->segments == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } } if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; if (flags & BUS_DMA_NOCACHE) attr = VM_MEMATTR_UNCACHEABLE; else if ((flags & BUS_DMA_COHERENT) != 0 && (dmat->bounce_flags & BF_COHERENT) == 0) /* * If we have a non-coherent tag, and are trying to allocate * a coherent block of memory it needs to be uncached. */ attr = VM_MEMATTR_UNCACHEABLE; else attr = VM_MEMATTR_DEFAULT; /* * Create the map, but don't set the could bounce flag as * this allocation should never bounce; */ *mapp = alloc_dmamap(dmat, mflags); if (*mapp == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); return (ENOMEM); } /* * Mark the map as coherent if we used uncacheable memory or the * tag was already marked as coherent. */ if (attr == VM_MEMATTR_UNCACHEABLE || (dmat->bounce_flags & BF_COHERENT) != 0) (*mapp)->flags |= DMAMAP_COHERENT; (*mapp)->flags |= DMAMAP_FROM_DMAMEM; /* * Allocate the buffer from the malloc(9) allocator if... * - It's small enough to fit into a single power of two sized bucket. * - The alignment is less than or equal to the maximum size * - The low address requirement is fulfilled. * else allocate non-contiguous pages if... * - The page count that could get allocated doesn't exceed * nsegments also when the maximum segment size is less * than PAGE_SIZE. * - The alignment constraint isn't larger than a page boundary. * - There are no boundary-crossing constraints. * else allocate a block of contiguous pages because one or more of the * constraints is something that only the contig allocator can fulfill. * * NOTE: The (dmat->common.alignment <= dmat->maxsize) check * below is just a quick hack. The exact alignment guarantees * of malloc(9) need to be nailed down, and the code below * should be rewritten to take that into account. * * In the meantime warn the user if malloc gets it wrong. */ if ((dmat->alloc_size <= PAGE_SIZE) && (dmat->alloc_alignment <= dmat->alloc_size) && dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc(dmat->alloc_size, M_DEVBUF, mflags); } else if (dmat->common.nsegments >= howmany(dmat->alloc_size, MIN(dmat->common.maxsegsz, PAGE_SIZE)) && dmat->alloc_alignment <= PAGE_SIZE && (dmat->common.boundary % PAGE_SIZE) == 0) { /* Page-based multi-segment allocations allowed */ *vaddr = (void *)kmem_alloc_attr(dmat->alloc_size, mflags, 0ul, dmat->common.lowaddr, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } else { *vaddr = (void *)kmem_alloc_contig(dmat->alloc_size, mflags, 0ul, dmat->common.lowaddr, dmat->alloc_alignment != 0 ? dmat->alloc_alignment : 1ul, dmat->common.boundary, attr); dmat->bounce_flags |= BF_KMEM_ALLOC; } if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); free(*mapp, M_DEVBUF); return (ENOMEM); } else if (vtophys(*vaddr) & (dmat->alloc_alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, 0); return (0); } /* * Free a piece of memory and it's allociated dmamap, that was allocated * via bus_dmamem_alloc. Make the same choice for free/contigfree. */ static void bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { /* * Check the map came from bounce_bus_dmamem_alloc, so the map * should be NULL and the BF_KMEM_ALLOC flag cleared if malloc() * was used and set if kmem_alloc_contig() was used. */ if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) panic("bus_dmamem_free: Invalid map freed\n"); if ((dmat->bounce_flags & BF_KMEM_ALLOC) == 0) free(vaddr, M_DEVBUF); else kmem_free((vm_offset_t)vaddr, dmat->alloc_size); free(map, M_DEVBUF); dmat->map_count--; CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->bounce_flags); } static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int *pagesneeded) { bus_addr_t curaddr; bus_size_t sgsize; int count; /* * Count the number of bounce pages needed in order to * complete this transfer */ count = 0; curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->common.maxsegsz); if (must_bounce(dmat, map, curaddr, sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); if (pagesneeded == NULL) return (true); count++; } curaddr += sgsize; buflen -= sgsize; } if (pagesneeded != NULL) *pagesneeded = count; return (count != 0); } static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags) { if (map->pagesneeded == 0) { _bus_dmamap_pagesneeded(dmat, map, buf, buflen, &map->pagesneeded); CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, void *buf, bus_size_t buflen, int flags) { vm_offset_t vaddr; vm_offset_t vendaddr; bus_addr_t paddr; bus_size_t sg_len; if (map->pagesneeded == 0) { CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->common.lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->common.boundary, dmat->common.alignment); CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer */ vaddr = (vm_offset_t)buf; vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap == kernel_pmap) paddr = pmap_kextract(vaddr); else paddr = pmap_extract(pmap, vaddr); if (must_bounce(dmat, map, paddr, min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)))) != 0) { sg_len = roundup2(sg_len, dmat->common.alignment); map->pagesneeded++; } vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } } static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { /* Reserve Necessary Bounce Pages */ mtx_lock(&bounce_lock); if (flags & BUS_DMA_NOWAIT) { if (reserve_bounce_pages(dmat, map, 0) != 0) { mtx_unlock(&bounce_lock); return (ENOMEM); } } else { if (reserve_bounce_pages(dmat, map, 1) != 0) { /* Queue us for resources */ STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links); mtx_unlock(&bounce_lock); return (EINPROGRESS); } } mtx_unlock(&bounce_lock); return (0); } /* * Add a single contiguous physical range to the segment list. */ static bus_size_t _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr, bus_size_t sgsize, bus_dma_segment_t *segs, int *segp) { bus_addr_t baddr, bmask; int seg; /* * Make sure we don't cross any boundaries. */ bmask = ~(dmat->common.boundary - 1); if (dmat->common.boundary > 0) { baddr = (curaddr + dmat->common.boundary) & bmask; if (sgsize > (baddr - curaddr)) sgsize = (baddr - curaddr); } /* * Insert chunk into a segment, coalescing with * previous segment if possible. */ seg = *segp; if (seg == -1) { seg = 0; segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } else { if (curaddr == segs[seg].ds_addr + segs[seg].ds_len && (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz && (dmat->common.boundary == 0 || (segs[seg].ds_addr & bmask) == (curaddr & bmask))) segs[seg].ds_len += sgsize; else { if (++seg >= dmat->common.nsegments) return (0); segs[seg].ds_addr = curaddr; segs[seg].ds_len = sgsize; } } *segp = seg; return (sgsize); } /* * Utility function to load a physical buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize; bus_addr_t curaddr, sl_end; int error; if (segs == NULL) segs = dmat->segments; if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } sl = map->slist + map->sync_count - 1; sl_end = 0; while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { /* * The attempt to split a physically continuous buffer * seems very controversial, it's unclear whether we * can do this in all cases. Also, memory for bounced * buffers is allocated as pages, so we cannot * guarantee multipage alignment. */ KASSERT(dmat->common.alignment <= PAGE_SIZE, ("bounced buffer cannot have alignment bigger " "than PAGE_SIZE: %lu", dmat->common.alignment)); sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); } else if ((map->flags & DMAMAP_COHERENT) == 0) { if (map->sync_count > 0) sl_end = sl->paddr + sl->datacount; if (map->sync_count == 0 || curaddr != sl_end) { if (++map->sync_count > dmat->common.nsegments) break; sl++; sl->vaddr = 0; sl->paddr = curaddr; sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not in " "vm_page_array", __func__, curaddr)); sl->datacount = sgsize; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; buf += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } /* * Utility function to load a linear buffer. segp contains * the starting segment on entrace, and the ending segment on exit. */ static int bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct sync_list *sl; bus_size_t sgsize; bus_addr_t curaddr, sl_pend; vm_offset_t kvaddr, vaddr, sl_vend; int error; KASSERT((map->flags & DMAMAP_FROM_DMAMEM) != 0 || dmat->common.alignment <= PAGE_SIZE, ("loading user buffer with alignment bigger than PAGE_SIZE is not " "supported")); if (segs == NULL) segs = dmat->segments; if (flags & BUS_DMA_LOAD_MBUF) map->flags |= DMAMAP_MBUF; if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags); if (map->pagesneeded != 0) { error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); } } /* * XXX Optimally we should parse input buffer for physically * continuous segments first and then pass these segment into * load loop. */ sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; sl_pend = 0; sl_vend = 0; while (buflen > 0) { /* * Get the physical address for this segment. */ if (__predict_true(pmap == kernel_pmap)) { curaddr = pmap_kextract(vaddr); kvaddr = vaddr; } else { curaddr = pmap_extract(pmap, vaddr); kvaddr = 0; } /* * Compute the segment size, and adjust counts. */ sgsize = MIN(buflen, dmat->common.maxsegsz); if ((map->flags & DMAMAP_FROM_DMAMEM) == 0) sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, sgsize)) { /* See comment in bounce_bus_dmamap_load_phys */ KASSERT(dmat->common.alignment <= PAGE_SIZE, ("bounced buffer cannot have alignment bigger " "than PAGE_SIZE: %lu", dmat->common.alignment)); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); } else if ((map->flags & DMAMAP_COHERENT) == 0) { if (map->sync_count > 0) { sl_pend = sl->paddr + sl->datacount; sl_vend = sl->vaddr + sl->datacount; } if (map->sync_count == 0 || (kvaddr != 0 && kvaddr != sl_vend) || (curaddr != sl_pend)) { if (++map->sync_count > dmat->common.nsegments) break; sl++; sl->vaddr = kvaddr; sl->paddr = curaddr; if (kvaddr != 0) { sl->pages = NULL; } else { sl->pages = PHYS_TO_VM_PAGE(curaddr); KASSERT(sl->pages != NULL, ("%s: page at PA:0x%08lx is not " "in vm_page_array", __func__, curaddr)); } sl->datacount = sgsize; } else sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); if (sgsize == 0) break; vaddr += sgsize; buflen -= sgsize; } /* * Did we fit? */ if (buflen != 0) { bus_dmamap_unload(dmat, map); return (EFBIG); /* XXX better return value here? */ } return (0); } static void bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { map->mem = *mem; map->dmat = dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dma_segment_t *segs, int nsegs, int error) { if (segs == NULL) segs = dmat->segments; return (segs); } /* * Release the mapping held by map. */ static void bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } map->sync_count = 0; map->flags &= ~DMAMAP_MBUF; } static void dma_preread_safe(vm_offset_t va, vm_size_t size) { /* * Write back any partial cachelines immediately before and * after the DMA region. */ if (va & (dcache_line_size - 1)) cpu_dcache_wb_range(va, 1); if ((va + size) & (dcache_line_size - 1)) cpu_dcache_wb_range(va + size, 1); cpu_dcache_inv_range(va, size); } static void dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) { uint32_t len, offset; vm_page_t m; vm_paddr_t pa; vm_offset_t va, tempva; bus_size_t size; offset = sl->paddr & PAGE_MASK; m = sl->pages; size = sl->datacount; pa = sl->paddr; for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { tempva = 0; if (sl->vaddr == 0) { len = min(PAGE_SIZE - offset, size); tempva = pmap_quick_enter_page(m); va = tempva | offset; KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx", VM_PAGE_TO_PHYS(m) | offset, pa)); } else { len = sl->datacount; va = sl->vaddr; } switch (op) { case BUS_DMASYNC_PREWRITE: case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: cpu_dcache_wb_range(va, len); break; case BUS_DMASYNC_PREREAD: /* * An mbuf may start in the middle of a cacheline. There * will be no cpu writes to the beginning of that line * (which contains the mbuf header) while dma is in * progress. Handle that case by doing a writeback of * just the first cacheline before invalidating the * overall buffer. Any mbuf in a chain may have this * misalignment. Buffers which are not mbufs bounce if * they are not aligned to a cacheline. */ dma_preread_safe(va, len); break; case BUS_DMASYNC_POSTREAD: case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: cpu_dcache_inv_range(va, len); break; default: panic("unsupported combination of sync operations: " "0x%08x\n", op); } if (tempva != 0) pmap_quick_remove_page(tempva); } } static void bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) { struct bounce_page *bpage; struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; if (op == BUS_DMASYNC_POSTWRITE) return; if ((op & BUS_DMASYNC_POSTREAD) != 0) { /* * Wait for any DMA operations to complete before the bcopy. */ dsb(sy); } if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " "performing bounce", __func__, dmat, dmat->common.flags, op); if ((op & BUS_DMASYNC_PREWRITE) != 0) { while (bpage != NULL) { tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)datavaddr, (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); if ((map->flags & DMAMAP_COHERENT) == 0) cpu_dcache_wb_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } else if ((op & BUS_DMASYNC_PREREAD) != 0) { while (bpage != NULL) { if ((map->flags & DMAMAP_COHERENT) == 0) cpu_dcache_wbinv_range(bpage->vaddr, bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { if ((map->flags & DMAMAP_COHERENT) == 0) cpu_dcache_inv_range(bpage->vaddr, bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { tempvaddr = pmap_quick_enter_page( bpage->datapage); datavaddr = tempvaddr | bpage->dataoffs; } bcopy((void *)bpage->vaddr, (void *)datavaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; } } /* * Cache maintenance for normal (non-COHERENT non-bounce) buffers. */ if (map->sync_count != 0) { sl = &map->slist[0]; end = &map->slist[map->sync_count]; CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x " "performing sync", __func__, dmat, op); for ( ; sl != end; ++sl) dma_dcache_sync(sl, op); } if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) { /* * Wait for the bcopy to complete before any DMA operations. */ dsb(sy); } } static void init_bounce_pages(void *dummy __unused) { total_bpages = 0; STAILQ_INIT(&bounce_zone_list); STAILQ_INIT(&bounce_map_waitinglist); STAILQ_INIT(&bounce_map_callbacklist); mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); } SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); static struct sysctl_ctx_list * busdma_sysctl_tree(struct bounce_zone *bz) { return (&bz->sysctl_tree); } static struct sysctl_oid * busdma_sysctl_tree_top(struct bounce_zone *bz) { return (bz->sysctl_tree_top); } static int alloc_bounce_zone(bus_dma_tag_t dmat) { struct bounce_zone *bz; /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { if ((dmat->common.alignment <= bz->alignment) && (dmat->common.lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } } if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); STAILQ_INIT(&bz->bounce_page_list); bz->free_bpages = 0; bz->reserved_bpages = 0; bz->active_bpages = 0; bz->lowaddr = dmat->common.lowaddr; bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE); bz->map_count = 0; snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); busdma_zonecount++; snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); dmat->bounce_zone = bz; sysctl_ctx_init(&bz->sysctl_tree); bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (bz->sysctl_tree_top == NULL) { sysctl_ctx_free(&bz->sysctl_tree); return (0); /* XXX error code? */ } SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, "Total bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, "Free bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, "Reserved bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, "Active bounce pages"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, "Total bounce requests"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, "Total bounce requests that were deferred"); SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "alignment", CTLFLAG_RD, &bz->alignment, ""); return (0); } static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) { struct bounce_zone *bz; int count; bz = dmat->bounce_zone; count = 0; while (numpages > 0) { struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; } bpage->busaddr = pmap_kextract(bpage->vaddr); mtx_lock(&bounce_lock); STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); total_bpages++; bz->total_bpages++; bz->free_bpages++; mtx_unlock(&bounce_lock); count++; numpages--; } return (count); } static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) { struct bounce_zone *bz; int pages; mtx_assert(&bounce_lock, MA_OWNED); bz = dmat->bounce_zone; pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) return (map->pagesneeded - (map->pagesreserved + pages)); bz->free_bpages -= pages; bz->reserved_bpages += pages; map->pagesreserved += pages; pages = map->pagesneeded - map->pagesreserved; return (pages); } static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, bus_addr_t addr, bus_size_t size) { struct bounce_zone *bz; struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); bz = dmat->bounce_zone; if (map->pagesneeded == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesneeded--; if (map->pagesreserved == 0) panic("add_bounce_page: map doesn't need any pages"); map->pagesreserved--; mtx_lock(&bounce_lock); bpage = STAILQ_FIRST(&bz->bounce_page_list); if (bpage == NULL) panic("add_bounce_page: free page list is empty"); STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); bz->reserved_bpages--; bz->active_bpages++; mtx_unlock(&bounce_lock); if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* Page offset needs to be preserved. */ bpage->vaddr |= addr & PAGE_MASK; bpage->busaddr |= addr & PAGE_MASK; } bpage->datavaddr = vaddr; bpage->datapage = PHYS_TO_VM_PAGE(addr); bpage->dataoffs = addr & PAGE_MASK; bpage->datacount = size; STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); return (bpage->busaddr); } static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) { struct bus_dmamap *map; struct bounce_zone *bz; bz = dmat->bounce_zone; bpage->datavaddr = 0; bpage->datacount = 0; if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) { /* * Reset the bounce page to start at offset 0. Other uses * of this bounce page may need to store a full page of * data and/or assume it starts on a page boundary. */ bpage->vaddr &= ~PAGE_MASK; bpage->busaddr &= ~PAGE_MASK; } mtx_lock(&bounce_lock); STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); bz->free_bpages++; bz->active_bpages--; if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); } } mtx_unlock(&bounce_lock); } void busdma_swi(void) { bus_dma_tag_t dmat; struct bus_dmamap *map; mtx_lock(&bounce_lock); while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); (dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock); } struct bus_dma_impl bus_dma_bounce_impl = { .tag_create = bounce_bus_dma_tag_create, .tag_destroy = bounce_bus_dma_tag_destroy, .id_mapped = bounce_bus_dma_id_mapped, .map_create = bounce_bus_dmamap_create, .map_destroy = bounce_bus_dmamap_destroy, .mem_alloc = bounce_bus_dmamem_alloc, .mem_free = bounce_bus_dmamem_free, .load_phys = bounce_bus_dmamap_load_phys, .load_buffer = bounce_bus_dmamap_load_buffer, .load_ma = bus_dmamap_load_ma_triv, .map_waitok = bounce_bus_dmamap_waitok, .map_complete = bounce_bus_dmamap_complete, .map_unload = bounce_bus_dmamap_unload, .map_sync = bounce_bus_dmamap_sync }; diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index ec1afd2d7b3e..c528de5c2e62 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -1,286 +1,288 @@ /*- * Copyright (c) 2014, 2015 The FreeBSD Foundation. * Copyright (c) 2014, 2017 Andrew Turner. * Copyright (c) 2018 Olivier Houchard * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #include #define FREEBSD32_MINUSER 0x00001000 #define FREEBSD32_MAXUSER ((1ul << 32) - PAGE_SIZE) #define FREEBSD32_SHAREDPAGE (FREEBSD32_MAXUSER - PAGE_SIZE) #define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE extern const char *freebsd32_syscallnames[]; extern char aarch32_sigcode[]; extern int sz_aarch32_sigcode; static int freebsd32_fetch_syscall_args(struct thread *td); static void freebsd32_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void freebsd32_set_syscall_retval(struct thread *, int); static boolean_t elf32_arm_abi_supported(struct image_params *, int32_t *, uint32_t *); extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); u_long __read_frequently elf32_hwcap; u_long __read_frequently elf32_hwcap2; static struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_transtrap = NULL, .sv_fixup = elf32_freebsd_fixup, .sv_sendsig = freebsd32_sendsig, .sv_sigcode = aarch32_sigcode, .sv_szsigcode = &sz_aarch32_sigcode, .sv_name = "FreeBSD ELF32", .sv_coredump = elf32_coredump, .sv_elf_core_osabi = ELFOSABI_FREEBSD, .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, .sv_elf_core_prepare_notes = elf32_prepare_notes, .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_minuser = FREEBSD32_MINUSER, .sv_maxuser = FREEBSD32_MAXUSER, .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_READ | VM_PROT_WRITE, .sv_copyout_auxargs = elf32_freebsd_copyout_auxargs, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_setregs, .sv_fixlimit = NULL, // XXX .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32 | SV_SHP | SV_TIMEKEEP | SV_RNG_SEED_VER, .sv_set_syscall_retval = freebsd32_set_syscall_retval, .sv_fetch_syscall_args = freebsd32_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, .sv_hwcap = &elf32_hwcap, .sv_hwcap2 = &elf32_hwcap2, .sv_onexec_old = exec_onexec_old, .sv_onexit = exit_onexit, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd32_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_ARM, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .brand_note = &elf32_freebsd_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE, .header_supported= elf32_arm_abi_supported, }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)elf32_insert_brand_entry, &freebsd32_brand_info); static boolean_t elf32_arm_abi_supported(struct image_params *imgp, int32_t *osrel __unused, uint32_t *fctl0 __unused) { const Elf32_Ehdr *hdr; /* Check if we support AArch32 */ if (ID_AA64PFR0_EL0_VAL(READ_SPECIALREG(id_aa64pfr0_el1)) != ID_AA64PFR0_EL0_64_32) return (FALSE); #define EF_ARM_EABI_VERSION(x) (((x) & EF_ARM_EABIMASK) >> 24) #define EF_ARM_EABI_FREEBSD_MIN 4 hdr = (const Elf32_Ehdr *)imgp->image_header; if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) { if (bootverbose) uprintf("Attempting to execute non EABI binary " "(rev %d) image %s", EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname); return (FALSE); } return (TRUE); } static int freebsd32_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int error, i, nap, narg; unsigned int args[4]; nap = 4; p = td->td_proc; ap = td->td_frame->tf_x; sa = &td->td_sa; /* r7 is the syscall id */ sa->code = td->td_frame->tf_x[7]; sa->original_code = sa->code; if (sa->code == SYS_syscall) { sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { sa->code = ap[1]; nap -= 2; ap += 2; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; narg = sa->callp->sy_narg; for (i = 0; i < nap; i++) sa->args[i] = ap[i]; if (narg > nap) { if (narg - nap > nitems(args)) panic("Too many system call arguiments"); error = copyin((void *)td->td_frame->tf_x[13], args, (narg - nap) * sizeof(int)); + if (error != 0) + return (error); for (i = 0; i < (narg - nap); i++) sa->args[i + nap] = args[i]; } td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } static void freebsd32_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; break; case ERESTART: /* * Reconstruct the pc to point at the swi. */ if ((frame->tf_spsr & PSR_T) != 0) frame->tf_elr -= 2; //THUMB_INSN_SIZE; else frame->tf_elr -= 4; //INSN_SIZE; break; case EJUSTRETURN: /* nothing to do */ break; default: frame->tf_x[0] = error; frame->tf_spsr |= PSR_C; break; } } static void freebsd32_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; struct pcb *pcb = td->td_pcb; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = stack; /* SP_usr is mapped to x13 */ tf->tf_x[13] = stack; /* LR_usr is mapped to x14 */ tf->tf_x[14] = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; tf->tf_spsr = PSR_M_32; if ((uint32_t)imgp->entry_addr & 1) tf->tf_spsr |= PSR_T; #ifdef VFP vfp_reset_state(td, pcb); #endif /* * Clear debug register state. It is not applicable to the new process. */ bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs)); } void elf32_dump_thread(struct thread *td, void *dst, size_t *off) { /* XXX: VFP */ } diff --git a/sys/arm64/arm64/exec_machdep.c b/sys/arm64/arm64/exec_machdep.c index 25fd9401df8c..b4ac8887ab72 100644 --- a/sys/arm64/arm64/exec_machdep.c +++ b/sys/arm64/arm64/exec_machdep.c @@ -1,617 +1,615 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif static void get_fpcontext(struct thread *td, mcontext_t *mcp); static void set_fpcontext(struct thread *td, mcontext_t *mcp); int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); #ifdef COMPAT_FREEBSD32 /* * We may be called here for a 32bits process, if we're using a * 64bits debugger. If so, put PC and SPSR where it expects it. */ if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { regs->x[15] = frame->tf_elr; regs->x[16] = frame->tf_spsr; } #endif return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_spsr &= ~PSR_FLAGS; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { /* * We may be called for a 32bits process if we're using * a 64bits debugger. If so, get PC and SPSR from where * it put it. */ frame->tf_elr = regs->x[15]; frame->tf_spsr |= regs->x[16] & PSR_FLAGS; } else #endif { frame->tf_elr = regs->elr; frame->tf_spsr |= regs->spsr & PSR_FLAGS; } return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ if (td == curthread) vfp_save_state(td, pcb); KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, ("Called fill_fpregs while the kernel is using the VFP")); memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr; regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr; } else #endif memset(regs, 0, sizeof(*regs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate, ("Called set_fpregs while the kernel is using the VFP")); memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr; pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { struct debug_monitor_state *monitor; int i; uint8_t debug_ver, nbkpts, nwtpts; memset(regs, 0, sizeof(*regs)); extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT, &debug_ver); extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT, &nbkpts); extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_WRPs_SHIFT, &nwtpts); /* * The BRPs field contains the number of breakpoints - 1. Armv8-A * allows the hardware to provide 2-16 breakpoints so this won't * overflow an 8 bit value. The same applies to the WRPs field. */ nbkpts++; nwtpts++; regs->db_debug_ver = debug_ver; regs->db_nbkpts = nbkpts; regs->db_nwtpts = nwtpts; monitor = &td->td_pcb->pcb_dbg_regs; if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) { for (i = 0; i < nbkpts; i++) { regs->db_breakregs[i].dbr_addr = monitor->dbg_bvr[i]; regs->db_breakregs[i].dbr_ctrl = monitor->dbg_bcr[i]; } for (i = 0; i < nwtpts; i++) { regs->db_watchregs[i].dbw_addr = monitor->dbg_wvr[i]; regs->db_watchregs[i].dbw_ctrl = monitor->dbg_wcr[i]; } } return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { struct debug_monitor_state *monitor; uint64_t addr; uint32_t ctrl; - int count; int i; monitor = &td->td_pcb->pcb_dbg_regs; - count = 0; monitor->dbg_enable_count = 0; for (i = 0; i < DBG_BRP_MAX; i++) { addr = regs->db_breakregs[i].dbr_addr; ctrl = regs->db_breakregs[i].dbr_ctrl; /* * Don't let the user set a breakpoint on a kernel or * non-canonical user address. */ if (addr >= VM_MAXUSER_ADDRESS) return (EINVAL); /* * The lowest 2 bits are ignored, so record the effective * address. */ addr = rounddown2(addr, 4); /* * Some control fields are ignored, and other bits reserved. * Only unlinked, address-matching breakpoints are supported. * * XXX: fields that appear unvalidated, such as BAS, have * constrained undefined behaviour. If the user mis-programs * these, there is no risk to the system. */ ctrl &= DBG_BCR_EN | DBG_BCR_PMC | DBG_BCR_BAS; if ((ctrl & DBG_BCR_EN) != 0) { /* Only target EL0. */ if ((ctrl & DBG_BCR_PMC) != DBG_BCR_PMC_EL0) return (EINVAL); monitor->dbg_enable_count++; } monitor->dbg_bvr[i] = addr; monitor->dbg_bcr[i] = ctrl; } for (i = 0; i < DBG_WRP_MAX; i++) { addr = regs->db_watchregs[i].dbw_addr; ctrl = regs->db_watchregs[i].dbw_ctrl; /* * Don't let the user set a watchpoint on a kernel or * non-canonical user address. */ if (addr >= VM_MAXUSER_ADDRESS) return (EINVAL); /* * Some control fields are ignored, and other bits reserved. * Only unlinked watchpoints are supported. */ ctrl &= DBG_WCR_EN | DBG_WCR_PAC | DBG_WCR_LSC | DBG_WCR_BAS | DBG_WCR_MASK; if ((ctrl & DBG_WCR_EN) != 0) { /* Only target EL0. */ if ((ctrl & DBG_WCR_PAC) != DBG_WCR_PAC_EL0) return (EINVAL); /* Must set at least one of the load/store bits. */ if ((ctrl & DBG_WCR_LSC) == 0) return (EINVAL); /* * When specifying the address range with BAS, the MASK * field must be zero. */ if ((ctrl & DBG_WCR_BAS) != DBG_WCR_BAS_MASK && (ctrl & DBG_WCR_MASK) != 0) return (EINVAL); monitor->dbg_enable_count++; } monitor->dbg_wvr[i] = addr; monitor->dbg_wcr[i] = ctrl; } if (monitor->dbg_enable_count > 0) monitor->dbg_flags |= DBGMON_ENABLED; return (0); } #ifdef COMPAT_FREEBSD32 int fill_regs32(struct thread *td, struct reg32 *regs) { int i; struct trapframe *tf; tf = td->td_frame; for (i = 0; i < 13; i++) regs->r[i] = tf->tf_x[i]; /* For arm32, SP is r13 and LR is r14 */ regs->r_sp = tf->tf_x[13]; regs->r_lr = tf->tf_x[14]; regs->r_pc = tf->tf_elr; regs->r_cpsr = tf->tf_spsr; return (0); } int set_regs32(struct thread *td, struct reg32 *regs) { int i; struct trapframe *tf; tf = td->td_frame; for (i = 0; i < 13; i++) tf->tf_x[i] = regs->r[i]; /* For arm 32, SP is r13 an LR is r14 */ tf->tf_x[13] = regs->r_sp; tf->tf_x[14] = regs->r_lr; tf->tf_elr = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } /* XXX fill/set dbregs/fpregs are stubbed on 32-bit arm. */ int fill_fpregs32(struct thread *td, struct fpreg32 *regs) { memset(regs, 0, sizeof(*regs)); return (0); } int set_fpregs32(struct thread *td, struct fpreg32 *regs) { return (0); } int fill_dbregs32(struct thread *td, struct dbreg32 *regs) { memset(regs, 0, sizeof(*regs)); return (0); } int set_dbregs32(struct thread *td, struct dbreg32 *regs) { return (0); } #endif void exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack) { struct trapframe *tf = td->td_frame; struct pcb *pcb = td->td_pcb; memset(tf, 0, sizeof(struct trapframe)); tf->tf_x[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; td->td_pcb->pcb_tpidr_el0 = 0; td->td_pcb->pcb_tpidrro_el0 = 0; WRITE_SPECIALREG(tpidrro_el0, 0); WRITE_SPECIALREG(tpidr_el0, 0); #ifdef VFP vfp_reset_state(td, pcb); #endif /* * Clear debug register state. It is not applicable to the new process. */ bzero(&pcb->pcb_dbg_regs, sizeof(pcb->pcb_dbg_regs)); } /* Sanity check these are the same size, they will be memcpy'd to and from */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; get_fpcontext(td, mcp); return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; uint32_t spsr; spsr = mcp->mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & PSR_AARCH32) != 0 || (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF)) return (EINVAL); memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; set_fpcontext(td, mcp); return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, ("Called get_fpcontext while the kernel is using the VFP")); KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Non-userspace FPU flags set in get_fpcontext")); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs, sizeof(mcp->mc_fpregs.fp_q)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, ("Called set_fpcontext while the kernel is using the VFP")); memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs.fp_q)); curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK; } critical_exit(); #endif } int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; int error; if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ bzero(&frame, sizeof(frame)); get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack = td->td_sigstk; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ? (onstack ? SS_ONSTACK : 0) : SS_DISABLE; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } diff --git a/sys/arm64/arm64/freebsd32_machdep.c b/sys/arm64/arm64/freebsd32_machdep.c index cab778747113..81b4ef9ebedb 100644 --- a/sys/arm64/arm64/freebsd32_machdep.c +++ b/sys/arm64/arm64/freebsd32_machdep.c @@ -1,424 +1,420 @@ /*- * Copyright (c) 2018 Olivier Houchard * Copyright (c) 2017 Nuxi, https://nuxi.nl/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #include #include extern void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) #ifdef VFP static void get_fpcontext32(struct thread *td, mcontext32_vfp_t *); #endif /* * Stubs for machine dependent 32-bits system calls. */ int freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) { int error; #define ARM_SYNC_ICACHE 0 #define ARM_DRAIN_WRITEBUF 1 #define ARM_SET_TP 2 #define ARM_GET_TP 3 #define ARM_GET_VFPSTATE 4 switch(uap->op) { case ARM_SET_TP: WRITE_SPECIALREG(tpidr_el0, uap->parms); WRITE_SPECIALREG(tpidrro_el0, uap->parms); return 0; case ARM_SYNC_ICACHE: { struct { uint32_t addr; uint32_t size; } args; if ((error = copyin(uap->parms, &args, sizeof(args))) != 0) return (error); if ((uint64_t)args.addr + (uint64_t)args.size > 0xffffffff) return (EINVAL); cpu_icache_sync_range_checked(args.addr, args.size); return 0; } case ARM_GET_VFPSTATE: { mcontext32_vfp_t mcontext_vfp; struct { uint32_t mc_vfp_size; uint32_t mc_vfp; } args; if ((error = copyin(uap->parms, &args, sizeof(args))) != 0) return (error); if (args.mc_vfp_size != sizeof(mcontext_vfp)) return (EINVAL); #ifdef VFP get_fpcontext32(td, &mcontext_vfp); #else bzero(&mcontext_vfp, sizeof(mcontext_vfp)); #endif error = copyout(&mcontext_vfp, (void *)(uintptr_t)args.mc_vfp, sizeof(mcontext_vfp)); return error; } } return (EINVAL); } #ifdef VFP static void get_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp) { struct pcb *curpcb; int i; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate, ("Called get_fpcontext while the kernel is using the VFP")); KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0, ("Non-userspace FPU flags set in get_fpcontext")); for (i = 0; i < 32; i++) mcp->mcv_reg[i] = (uint64_t)curpcb->pcb_fpustate.vfp_regs[i]; mcp->mcv_fpscr = VFP_FPSCR_FROM_SRCR(curpcb->pcb_fpustate.vfp_fpcr, curpcb->pcb_fpustate.vfp_fpsr); } critical_exit(); } static void set_fpcontext32(struct thread *td, mcontext32_vfp_t *mcp) { struct pcb *pcb; int i; critical_enter(); pcb = td->td_pcb; if (td == curthread) vfp_discard(td); for (i = 0; i < 32; i++) pcb->pcb_fpustate.vfp_regs[i] = mcp->mcv_reg[i]; pcb->pcb_fpustate.vfp_fpsr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr); pcb->pcb_fpustate.vfp_fpcr = VFP_FPSR_FROM_FPSCR(mcp->mcv_fpscr); critical_exit(); } #endif static void get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { - struct pcb *pcb; struct trapframe *tf; int i; - pcb = td->td_pcb; tf = td->td_frame; if ((flags & GET_MC_CLEAR_RET) != 0) { mcp->mc_gregset[0] = 0; mcp->mc_gregset[16] = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gregset[0] = tf->tf_x[0]; mcp->mc_gregset[16] = tf->tf_spsr; } for (i = 1; i < 15; i++) mcp->mc_gregset[i] = tf->tf_x[i]; mcp->mc_gregset[15] = tf->tf_elr; mcp->mc_vfp_size = 0; mcp->mc_vfp_ptr = 0; memset(mcp->mc_spare, 0, sizeof(mcp->mc_spare)); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { struct trapframe *tf; mcontext32_vfp_t mc_vfp; int i; tf = td->td_frame; for (i = 0; i < 15; i++) tf->tf_x[i] = mcp->mc_gregset[i]; tf->tf_elr = mcp->mc_gregset[15]; tf->tf_spsr = mcp->mc_gregset[16]; #ifdef VFP if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != 0) { if (copyin((void *)(uintptr_t)mcp->mc_vfp_ptr, &mc_vfp, sizeof(mc_vfp)) != 0) return (EFAULT); set_fpcontext32(td, &mc_vfp); } #endif return (0); } #define UC_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { memset(&uc, 0, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } return (ret); } int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (0); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { bzero(&uc, sizeof(uc)); get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret); } void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe32 *fp, frame; struct sigacts *psp; struct siginfo32 siginfo; struct sysentvec *sysent; int onstack; int sig; - int code; siginfo_to_siginfo32(&ksi->ksi_info, &siginfo); td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; - code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_x[13]); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe32 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe32 *)td->td_frame->tf_x[13]; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe32 *)((unsigned long)(fp) &~ (8 - 1)); /* Populate the siginfo frame. */ get_mcontext32(td, &frame.sf_uc.uc_mcontext, 0); #ifdef VFP get_fpcontext32(td, &frame.sf_vfp); frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp); frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)(uintptr_t)&fp->sf_vfp; #else frame.sf_uc.uc_mcontext.mc_vfp_size = 0; frame.sf_uc.uc_mcontext.mc_vfp_ptr = (uint32_t)NULL; #endif frame.sf_si = siginfo; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_x[0] = sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_x[5] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_x[13] = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_x[14] = (register_t)sysent->sv_sigcode_base; else tf->tf_x[14] = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); /* Set the mode to enter in the signal handler */ if ((register_t)catcher & 1) tf->tf_spsr |= PSR_T; else tf->tf_spsr &= ~PSR_T; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_x[14], tf->tf_x[13]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifdef COMPAT_43 /* * Mirror the osigreturn definition in kern_sig.c for !i386 platforms. This * mirrors what's connected to the FreeBSD/arm syscall. */ int ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap) { return (nosys(td, (struct nosys_args *)uap)); } #endif diff --git a/sys/arm64/arm64/gic_v3.c b/sys/arm64/arm64/gic_v3.c index 7d9160f8ae17..27f41c58fe92 100644 --- a/sys/arm64/arm64/gic_v3.c +++ b/sys/arm64/arm64/gic_v3.c @@ -1,1569 +1,1565 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * * This software was developed by Andrew Turner under * the sponsorship of the FreeBSD Foundation. * * This software was developed by Semihalf under * the sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #ifdef DEV_ACPI #include #include #endif #include "gic_if.h" #include "pic_if.h" #include "msi_if.h" #include #include "gic_v3_reg.h" #include "gic_v3_var.h" static bus_get_domain_t gic_v3_get_domain; static bus_read_ivar_t gic_v3_read_ivar; static bus_write_ivar_t gic_v3_write_ivar; static pic_disable_intr_t gic_v3_disable_intr; static pic_enable_intr_t gic_v3_enable_intr; static pic_map_intr_t gic_v3_map_intr; static pic_setup_intr_t gic_v3_setup_intr; static pic_teardown_intr_t gic_v3_teardown_intr; static pic_post_filter_t gic_v3_post_filter; static pic_post_ithread_t gic_v3_post_ithread; static pic_pre_ithread_t gic_v3_pre_ithread; static pic_bind_intr_t gic_v3_bind_intr; #ifdef SMP static pic_init_secondary_t gic_v3_init_secondary; static pic_ipi_send_t gic_v3_ipi_send; static pic_ipi_setup_t gic_v3_ipi_setup; #endif static gic_reserve_msi_range_t gic_v3_reserve_msi_range; static gic_alloc_msi_t gic_v3_gic_alloc_msi; static gic_release_msi_t gic_v3_gic_release_msi; static gic_alloc_msix_t gic_v3_gic_alloc_msix; static gic_release_msix_t gic_v3_gic_release_msix; static msi_alloc_msi_t gic_v3_alloc_msi; static msi_release_msi_t gic_v3_release_msi; static msi_alloc_msix_t gic_v3_alloc_msix; static msi_release_msix_t gic_v3_release_msix; static msi_map_msi_t gic_v3_map_msi; static u_int gic_irq_cpu; #ifdef SMP static u_int sgi_to_ipi[GIC_LAST_SGI - GIC_FIRST_SGI + 1]; static u_int sgi_first_unused = GIC_FIRST_SGI; #endif static device_method_t gic_v3_methods[] = { /* Device interface */ DEVMETHOD(device_detach, gic_v3_detach), /* Bus interface */ DEVMETHOD(bus_get_domain, gic_v3_get_domain), DEVMETHOD(bus_read_ivar, gic_v3_read_ivar), DEVMETHOD(bus_write_ivar, gic_v3_write_ivar), /* Interrupt controller interface */ DEVMETHOD(pic_disable_intr, gic_v3_disable_intr), DEVMETHOD(pic_enable_intr, gic_v3_enable_intr), DEVMETHOD(pic_map_intr, gic_v3_map_intr), DEVMETHOD(pic_setup_intr, gic_v3_setup_intr), DEVMETHOD(pic_teardown_intr, gic_v3_teardown_intr), DEVMETHOD(pic_post_filter, gic_v3_post_filter), DEVMETHOD(pic_post_ithread, gic_v3_post_ithread), DEVMETHOD(pic_pre_ithread, gic_v3_pre_ithread), #ifdef SMP DEVMETHOD(pic_bind_intr, gic_v3_bind_intr), DEVMETHOD(pic_init_secondary, gic_v3_init_secondary), DEVMETHOD(pic_ipi_send, gic_v3_ipi_send), DEVMETHOD(pic_ipi_setup, gic_v3_ipi_setup), #endif /* MSI/MSI-X */ DEVMETHOD(msi_alloc_msi, gic_v3_alloc_msi), DEVMETHOD(msi_release_msi, gic_v3_release_msi), DEVMETHOD(msi_alloc_msix, gic_v3_alloc_msix), DEVMETHOD(msi_release_msix, gic_v3_release_msix), DEVMETHOD(msi_map_msi, gic_v3_map_msi), /* GIC */ DEVMETHOD(gic_reserve_msi_range, gic_v3_reserve_msi_range), DEVMETHOD(gic_alloc_msi, gic_v3_gic_alloc_msi), DEVMETHOD(gic_release_msi, gic_v3_gic_release_msi), DEVMETHOD(gic_alloc_msix, gic_v3_gic_alloc_msix), DEVMETHOD(gic_release_msix, gic_v3_gic_release_msix), /* End */ DEVMETHOD_END }; DEFINE_CLASS_0(gic, gic_v3_driver, gic_v3_methods, sizeof(struct gic_v3_softc)); /* * Driver-specific definitions. */ MALLOC_DEFINE(M_GIC_V3, "GICv3", GIC_V3_DEVSTR); /* * Helper functions and definitions. */ /* Destination registers, either Distributor or Re-Distributor */ enum gic_v3_xdist { DIST = 0, REDIST, }; struct gic_v3_irqsrc { struct intr_irqsrc gi_isrc; uint32_t gi_irq; enum intr_polarity gi_pol; enum intr_trigger gi_trig; #define GI_FLAG_MSI (1 << 1) /* This interrupt source should only */ /* be used for MSI/MSI-X interrupts */ #define GI_FLAG_MSI_USED (1 << 2) /* This irq is already allocated */ /* for a MSI/MSI-X interrupt */ u_int gi_flags; }; /* Helper routines starting with gic_v3_ */ static int gic_v3_dist_init(struct gic_v3_softc *); static int gic_v3_redist_alloc(struct gic_v3_softc *); static int gic_v3_redist_find(struct gic_v3_softc *); static int gic_v3_redist_init(struct gic_v3_softc *); static int gic_v3_cpu_init(struct gic_v3_softc *); static void gic_v3_wait_for_rwp(struct gic_v3_softc *, enum gic_v3_xdist); /* A sequence of init functions for primary (boot) CPU */ typedef int (*gic_v3_initseq_t) (struct gic_v3_softc *); /* Primary CPU initialization sequence */ static gic_v3_initseq_t gic_v3_primary_init[] = { gic_v3_dist_init, gic_v3_redist_alloc, gic_v3_redist_init, gic_v3_cpu_init, NULL }; #ifdef SMP /* Secondary CPU initialization sequence */ static gic_v3_initseq_t gic_v3_secondary_init[] = { gic_v3_redist_init, gic_v3_cpu_init, NULL }; #endif uint32_t gic_r_read_4(device_t dev, bus_size_t offset) { struct gic_v3_softc *sc; struct resource *rdist; sc = device_get_softc(dev); rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; return (bus_read_4(rdist, offset)); } uint64_t gic_r_read_8(device_t dev, bus_size_t offset) { struct gic_v3_softc *sc; struct resource *rdist; sc = device_get_softc(dev); rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; return (bus_read_8(rdist, offset)); } void gic_r_write_4(device_t dev, bus_size_t offset, uint32_t val) { struct gic_v3_softc *sc; struct resource *rdist; sc = device_get_softc(dev); rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; bus_write_4(rdist, offset, val); } void gic_r_write_8(device_t dev, bus_size_t offset, uint64_t val) { struct gic_v3_softc *sc; struct resource *rdist; sc = device_get_softc(dev); rdist = &sc->gic_redists.pcpu[PCPU_GET(cpuid)]->res; bus_write_8(rdist, offset, val); } static void gic_v3_reserve_msi_range(device_t dev, u_int start, u_int count) { struct gic_v3_softc *sc; int i; sc = device_get_softc(dev); KASSERT((start + count) < sc->gic_nirqs, ("%s: Trying to allocate too many MSI IRQs: %d + %d > %d", __func__, start, count, sc->gic_nirqs)); for (i = 0; i < count; i++) { KASSERT(sc->gic_irqs[start + i].gi_isrc.isrc_handlers == 0, ("%s: MSI interrupt %d already has a handler", __func__, count + i)); KASSERT(sc->gic_irqs[start + i].gi_pol == INTR_POLARITY_CONFORM, ("%s: MSI interrupt %d already has a polarity", __func__, count + i)); KASSERT(sc->gic_irqs[start + i].gi_trig == INTR_TRIGGER_CONFORM, ("%s: MSI interrupt %d already has a trigger", __func__, count + i)); sc->gic_irqs[start + i].gi_pol = INTR_POLARITY_HIGH; sc->gic_irqs[start + i].gi_trig = INTR_TRIGGER_EDGE; sc->gic_irqs[start + i].gi_flags |= GI_FLAG_MSI; } } /* * Device interface. */ int gic_v3_attach(device_t dev) { struct gic_v3_softc *sc; gic_v3_initseq_t *init_func; uint32_t typer; int rid; int err; size_t i; u_int irq; const char *name; sc = device_get_softc(dev); sc->gic_registered = FALSE; sc->dev = dev; err = 0; /* Initialize mutex */ mtx_init(&sc->gic_mtx, "GICv3 lock", NULL, MTX_SPIN); /* * Allocate array of struct resource. * One entry for Distributor and all remaining for Re-Distributor. */ sc->gic_res = malloc( sizeof(*sc->gic_res) * (sc->gic_redists.nregions + 1), M_GIC_V3, M_WAITOK); /* Now allocate corresponding resources */ for (i = 0, rid = 0; i < (sc->gic_redists.nregions + 1); i++, rid++) { sc->gic_res[rid] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->gic_res[rid] == NULL) return (ENXIO); } /* * Distributor interface */ sc->gic_dist = sc->gic_res[0]; /* * Re-Dristributor interface */ /* Allocate space under region descriptions */ sc->gic_redists.regions = malloc( sizeof(*sc->gic_redists.regions) * sc->gic_redists.nregions, M_GIC_V3, M_WAITOK); /* Fill-up bus_space information for each region. */ for (i = 0, rid = 1; i < sc->gic_redists.nregions; i++, rid++) sc->gic_redists.regions[i] = sc->gic_res[rid]; /* Get the number of supported SPI interrupts */ typer = gic_d_read(sc, 4, GICD_TYPER); sc->gic_nirqs = GICD_TYPER_I_NUM(typer); if (sc->gic_nirqs > GIC_I_NUM_MAX) sc->gic_nirqs = GIC_I_NUM_MAX; sc->gic_irqs = malloc(sizeof(*sc->gic_irqs) * sc->gic_nirqs, M_GIC_V3, M_WAITOK | M_ZERO); name = device_get_nameunit(dev); for (irq = 0; irq < sc->gic_nirqs; irq++) { struct intr_irqsrc *isrc; sc->gic_irqs[irq].gi_irq = irq; sc->gic_irqs[irq].gi_pol = INTR_POLARITY_CONFORM; sc->gic_irqs[irq].gi_trig = INTR_TRIGGER_CONFORM; isrc = &sc->gic_irqs[irq].gi_isrc; if (irq <= GIC_LAST_SGI) { err = intr_isrc_register(isrc, sc->dev, INTR_ISRCF_IPI, "%s,i%u", name, irq - GIC_FIRST_SGI); } else if (irq <= GIC_LAST_PPI) { err = intr_isrc_register(isrc, sc->dev, INTR_ISRCF_PPI, "%s,p%u", name, irq - GIC_FIRST_PPI); } else { err = intr_isrc_register(isrc, sc->dev, 0, "%s,s%u", name, irq - GIC_FIRST_SPI); } if (err != 0) { /* XXX call intr_isrc_deregister() */ free(sc->gic_irqs, M_DEVBUF); return (err); } } mtx_init(&sc->gic_mbi_mtx, "GICv3 mbi lock", NULL, MTX_DEF); if (sc->gic_mbi_start > 0) { gic_v3_reserve_msi_range(dev, sc->gic_mbi_start, sc->gic_mbi_end - sc->gic_mbi_start); if (bootverbose) { device_printf(dev, "using spi %u to %u\n", sc->gic_mbi_start, sc->gic_mbi_end); } } /* * Read the Peripheral ID2 register. This is an implementation * defined register, but seems to be implemented in all GICv3 * parts and Linux expects it to be there. */ sc->gic_pidr2 = gic_d_read(sc, 4, GICD_PIDR2); /* Get the number of supported interrupt identifier bits */ sc->gic_idbits = GICD_TYPER_IDBITS(typer); if (bootverbose) { device_printf(dev, "SPIs: %u, IDs: %u\n", sc->gic_nirqs, (1 << sc->gic_idbits) - 1); } /* Train init sequence for boot CPU */ for (init_func = gic_v3_primary_init; *init_func != NULL; init_func++) { err = (*init_func)(sc); if (err != 0) return (err); } return (0); } int gic_v3_detach(device_t dev) { struct gic_v3_softc *sc; size_t i; int rid; sc = device_get_softc(dev); if (device_is_attached(dev)) { /* * XXX: We should probably deregister PIC */ if (sc->gic_registered) panic("Trying to detach registered PIC"); } for (rid = 0; rid < (sc->gic_redists.nregions + 1); rid++) bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->gic_res[rid]); for (i = 0; i <= mp_maxid; i++) free(sc->gic_redists.pcpu[i], M_GIC_V3); free(sc->gic_res, M_GIC_V3); free(sc->gic_redists.regions, M_GIC_V3); return (0); } static int gic_v3_get_domain(device_t dev, device_t child, int *domain) { struct gic_v3_devinfo *di; di = device_get_ivars(child); if (di->gic_domain < 0) return (ENOENT); *domain = di->gic_domain; return (0); } static int gic_v3_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct gic_v3_softc *sc; sc = device_get_softc(dev); switch (which) { case GICV3_IVAR_NIRQS: *result = (intr_nirq - sc->gic_nirqs) / sc->gic_nchildren; return (0); case GICV3_IVAR_REDIST: *result = (uintptr_t)sc->gic_redists.pcpu[PCPU_GET(cpuid)]; return (0); case GIC_IVAR_HW_REV: KASSERT( GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv3 || GICR_PIDR2_ARCH(sc->gic_pidr2) == GICR_PIDR2_ARCH_GICv4, ("gic_v3_read_ivar: Invalid GIC architecture: %d (%.08X)", GICR_PIDR2_ARCH(sc->gic_pidr2), sc->gic_pidr2)); *result = GICR_PIDR2_ARCH(sc->gic_pidr2); return (0); case GIC_IVAR_BUS: KASSERT(sc->gic_bus != GIC_BUS_UNKNOWN, ("gic_v3_read_ivar: Unknown bus type")); KASSERT(sc->gic_bus <= GIC_BUS_MAX, ("gic_v3_read_ivar: Invalid bus type %u", sc->gic_bus)); *result = sc->gic_bus; return (0); } return (ENOENT); } static int gic_v3_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { - struct gic_v3_softc *sc; - - sc = device_get_softc(dev); - switch(which) { case GICV3_IVAR_NIRQS: case GICV3_IVAR_REDIST: case GIC_IVAR_HW_REV: case GIC_IVAR_BUS: return (EINVAL); } return (ENOENT); } int arm_gic_v3_intr(void *arg) { struct gic_v3_softc *sc = arg; struct gic_v3_irqsrc *gi; struct intr_pic *pic; uint64_t active_irq; struct trapframe *tf; pic = sc->gic_pic; while (1) { if (CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) { /* * Hardware: Cavium ThunderX * Chip revision: Pass 1.0 (early version) * Pass 1.1 (production) * ERRATUM: 22978, 23154 */ __asm __volatile( "nop;nop;nop;nop;nop;nop;nop;nop; \n" "mrs %0, ICC_IAR1_EL1 \n" "nop;nop;nop;nop; \n" "dsb sy \n" : "=&r" (active_irq)); } else { active_irq = gic_icc_read(IAR1); } if (active_irq >= GIC_FIRST_LPI) { intr_child_irq_handler(pic, active_irq); continue; } if (__predict_false(active_irq >= sc->gic_nirqs)) return (FILTER_HANDLED); tf = curthread->td_intr_frame; gi = &sc->gic_irqs[active_irq]; if (active_irq <= GIC_LAST_SGI) { /* Call EOI for all IPI before dispatch. */ gic_icc_write(EOIR1, (uint64_t)active_irq); #ifdef SMP intr_ipi_dispatch(sgi_to_ipi[gi->gi_irq], tf); #else device_printf(sc->dev, "SGI %ju on UP system detected\n", (uintmax_t)(active_irq - GIC_FIRST_SGI)); #endif } else if (active_irq >= GIC_FIRST_PPI && active_irq <= GIC_LAST_SPI) { if (gi->gi_trig == INTR_TRIGGER_EDGE) gic_icc_write(EOIR1, gi->gi_irq); if (intr_isrc_dispatch(&gi->gi_isrc, tf) != 0) { if (gi->gi_trig != INTR_TRIGGER_EDGE) gic_icc_write(EOIR1, gi->gi_irq); gic_v3_disable_intr(sc->dev, &gi->gi_isrc); device_printf(sc->dev, "Stray irq %lu disabled\n", active_irq); } } } } #ifdef FDT static int gic_map_fdt(device_t dev, u_int ncells, pcell_t *cells, u_int *irqp, enum intr_polarity *polp, enum intr_trigger *trigp) { u_int irq; if (ncells < 3) return (EINVAL); /* * The 1st cell is the interrupt type: * 0 = SPI * 1 = PPI * The 2nd cell contains the interrupt number: * [0 - 987] for SPI * [0 - 15] for PPI * The 3rd cell is the flags, encoded as follows: * bits[3:0] trigger type and level flags * 1 = edge triggered * 2 = edge triggered (PPI only) * 4 = level-sensitive * 8 = level-sensitive (PPI only) */ switch (cells[0]) { case 0: irq = GIC_FIRST_SPI + cells[1]; /* SPI irq is checked later. */ break; case 1: irq = GIC_FIRST_PPI + cells[1]; if (irq > GIC_LAST_PPI) { device_printf(dev, "unsupported PPI interrupt " "number %u\n", cells[1]); return (EINVAL); } break; default: device_printf(dev, "unsupported interrupt type " "configuration %u\n", cells[0]); return (EINVAL); } switch (cells[2] & FDT_INTR_MASK) { case FDT_INTR_EDGE_RISING: *trigp = INTR_TRIGGER_EDGE; *polp = INTR_POLARITY_HIGH; break; case FDT_INTR_EDGE_FALLING: *trigp = INTR_TRIGGER_EDGE; *polp = INTR_POLARITY_LOW; break; case FDT_INTR_LEVEL_HIGH: *trigp = INTR_TRIGGER_LEVEL; *polp = INTR_POLARITY_HIGH; break; case FDT_INTR_LEVEL_LOW: *trigp = INTR_TRIGGER_LEVEL; *polp = INTR_POLARITY_LOW; break; default: device_printf(dev, "unsupported trigger/polarity " "configuration 0x%02x\n", cells[2]); return (EINVAL); } /* Check the interrupt is valid */ if (irq >= GIC_FIRST_SPI && *polp != INTR_POLARITY_HIGH) return (EINVAL); *irqp = irq; return (0); } #endif static int gic_map_msi(device_t dev, struct intr_map_data_msi *msi_data, u_int *irqp, enum intr_polarity *polp, enum intr_trigger *trigp) { struct gic_v3_irqsrc *gi; /* SPI-mapped MSI */ gi = (struct gic_v3_irqsrc *)msi_data->isrc; if (gi == NULL) return (ENXIO); *irqp = gi->gi_irq; /* MSI/MSI-X interrupts are always edge triggered with high polarity */ *polp = INTR_POLARITY_HIGH; *trigp = INTR_TRIGGER_EDGE; return (0); } static int do_gic_v3_map_intr(device_t dev, struct intr_map_data *data, u_int *irqp, enum intr_polarity *polp, enum intr_trigger *trigp) { struct gic_v3_softc *sc; enum intr_polarity pol; enum intr_trigger trig; struct intr_map_data_msi *dam; #ifdef FDT struct intr_map_data_fdt *daf; #endif #ifdef DEV_ACPI struct intr_map_data_acpi *daa; #endif u_int irq; sc = device_get_softc(dev); switch (data->type) { #ifdef FDT case INTR_MAP_DATA_FDT: daf = (struct intr_map_data_fdt *)data; if (gic_map_fdt(dev, daf->ncells, daf->cells, &irq, &pol, &trig) != 0) return (EINVAL); break; #endif #ifdef DEV_ACPI case INTR_MAP_DATA_ACPI: daa = (struct intr_map_data_acpi *)data; irq = daa->irq; pol = daa->pol; trig = daa->trig; break; #endif case INTR_MAP_DATA_MSI: /* SPI-mapped MSI */ dam = (struct intr_map_data_msi *)data; if (gic_map_msi(dev, dam, &irq, &pol, &trig) != 0) return (EINVAL); break; default: return (EINVAL); } if (irq >= sc->gic_nirqs) return (EINVAL); switch (pol) { case INTR_POLARITY_CONFORM: case INTR_POLARITY_LOW: case INTR_POLARITY_HIGH: break; default: return (EINVAL); } switch (trig) { case INTR_TRIGGER_CONFORM: case INTR_TRIGGER_EDGE: case INTR_TRIGGER_LEVEL: break; default: return (EINVAL); } *irqp = irq; if (polp != NULL) *polp = pol; if (trigp != NULL) *trigp = trig; return (0); } static int gic_v3_map_intr(device_t dev, struct intr_map_data *data, struct intr_irqsrc **isrcp) { struct gic_v3_softc *sc; int error; u_int irq; error = do_gic_v3_map_intr(dev, data, &irq, NULL, NULL); if (error == 0) { sc = device_get_softc(dev); *isrcp = GIC_INTR_ISRC(sc, irq); } return (error); } static int gic_v3_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct gic_v3_softc *sc = device_get_softc(dev); struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; enum intr_trigger trig; enum intr_polarity pol; uint32_t reg; u_int irq; int error; if (data == NULL) return (ENOTSUP); error = do_gic_v3_map_intr(dev, data, &irq, &pol, &trig); if (error != 0) return (error); if (gi->gi_irq != irq || pol == INTR_POLARITY_CONFORM || trig == INTR_TRIGGER_CONFORM) return (EINVAL); /* Compare config if this is not first setup. */ if (isrc->isrc_handlers != 0) { if (pol != gi->gi_pol || trig != gi->gi_trig) return (EINVAL); else return (0); } /* For MSI/MSI-X we should have already configured these */ if ((gi->gi_flags & GI_FLAG_MSI) == 0) { gi->gi_pol = pol; gi->gi_trig = trig; } /* * XXX - In case that per CPU interrupt is going to be enabled in time * when SMP is already started, we need some IPI call which * enables it on others CPUs. Further, it's more complicated as * pic_enable_source() and pic_disable_source() should act on * per CPU basis only. Thus, it should be solved here somehow. */ if (isrc->isrc_flags & INTR_ISRCF_PPI) CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); if (irq >= GIC_FIRST_PPI && irq <= GIC_LAST_SPI) { mtx_lock_spin(&sc->gic_mtx); /* Set the trigger and polarity */ if (irq <= GIC_LAST_PPI) reg = gic_r_read(sc, 4, GICR_SGI_BASE_SIZE + GICD_ICFGR(irq)); else reg = gic_d_read(sc, 4, GICD_ICFGR(irq)); if (trig == INTR_TRIGGER_LEVEL) reg &= ~(2 << ((irq % 16) * 2)); else reg |= 2 << ((irq % 16) * 2); if (irq <= GIC_LAST_PPI) { gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ICFGR(irq), reg); gic_v3_wait_for_rwp(sc, REDIST); } else { gic_d_write(sc, 4, GICD_ICFGR(irq), reg); gic_v3_wait_for_rwp(sc, DIST); } mtx_unlock_spin(&sc->gic_mtx); gic_v3_bind_intr(dev, isrc); } return (0); } static int gic_v3_teardown_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; if (isrc->isrc_handlers == 0 && (gi->gi_flags & GI_FLAG_MSI) == 0) { gi->gi_pol = INTR_POLARITY_CONFORM; gi->gi_trig = INTR_TRIGGER_CONFORM; } return (0); } static void gic_v3_disable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_softc *sc; struct gic_v3_irqsrc *gi; u_int irq; sc = device_get_softc(dev); gi = (struct gic_v3_irqsrc *)isrc; irq = gi->gi_irq; if (irq <= GIC_LAST_PPI) { /* SGIs and PPIs in corresponding Re-Distributor */ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ICENABLER(irq), GICD_I_MASK(irq)); gic_v3_wait_for_rwp(sc, REDIST); } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) { /* SPIs in distributor */ gic_d_write(sc, 4, GICD_ICENABLER(irq), GICD_I_MASK(irq)); gic_v3_wait_for_rwp(sc, DIST); } else panic("%s: Unsupported IRQ %u", __func__, irq); } static void gic_v3_enable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_softc *sc; struct gic_v3_irqsrc *gi; u_int irq; sc = device_get_softc(dev); gi = (struct gic_v3_irqsrc *)isrc; irq = gi->gi_irq; if (irq <= GIC_LAST_PPI) { /* SGIs and PPIs in corresponding Re-Distributor */ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_ISENABLER(irq), GICD_I_MASK(irq)); gic_v3_wait_for_rwp(sc, REDIST); } else if (irq >= GIC_FIRST_SPI && irq <= GIC_LAST_SPI) { /* SPIs in distributor */ gic_d_write(sc, 4, GICD_ISENABLER(irq), GICD_I_MASK(irq)); gic_v3_wait_for_rwp(sc, DIST); } else panic("%s: Unsupported IRQ %u", __func__, irq); } static void gic_v3_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; gic_v3_disable_intr(dev, isrc); gic_icc_write(EOIR1, gi->gi_irq); } static void gic_v3_post_ithread(device_t dev, struct intr_irqsrc *isrc) { gic_v3_enable_intr(dev, isrc); } static void gic_v3_post_filter(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; if (gi->gi_trig == INTR_TRIGGER_EDGE) return; gic_icc_write(EOIR1, gi->gi_irq); } static int gic_v3_bind_intr(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_softc *sc; struct gic_v3_irqsrc *gi; int cpu; gi = (struct gic_v3_irqsrc *)isrc; if (gi->gi_irq <= GIC_LAST_PPI) return (EINVAL); KASSERT(gi->gi_irq >= GIC_FIRST_SPI && gi->gi_irq <= GIC_LAST_SPI, ("%s: Attempting to bind an invalid IRQ", __func__)); sc = device_get_softc(dev); if (CPU_EMPTY(&isrc->isrc_cpu)) { gic_irq_cpu = intr_irq_next_cpu(gic_irq_cpu, &all_cpus); CPU_SETOF(gic_irq_cpu, &isrc->isrc_cpu); gic_d_write(sc, 8, GICD_IROUTER(gi->gi_irq), CPU_AFFINITY(gic_irq_cpu)); } else { /* * We can only bind to a single CPU so select * the first CPU found. */ cpu = CPU_FFS(&isrc->isrc_cpu) - 1; gic_d_write(sc, 8, GICD_IROUTER(gi->gi_irq), CPU_AFFINITY(cpu)); } return (0); } #ifdef SMP static void gic_v3_init_secondary(device_t dev) { device_t child; struct gic_v3_softc *sc; gic_v3_initseq_t *init_func; struct intr_irqsrc *isrc; u_int cpu, irq; int err, i; sc = device_get_softc(dev); cpu = PCPU_GET(cpuid); /* Train init sequence for boot CPU */ for (init_func = gic_v3_secondary_init; *init_func != NULL; init_func++) { err = (*init_func)(sc); if (err != 0) { device_printf(dev, "Could not initialize GIC for CPU%u\n", cpu); return; } } /* Unmask attached SGI interrupts. */ for (irq = GIC_FIRST_SGI; irq <= GIC_LAST_SGI; irq++) { isrc = GIC_INTR_ISRC(sc, irq); if (intr_isrc_init_on_cpu(isrc, cpu)) gic_v3_enable_intr(dev, isrc); } /* Unmask attached PPI interrupts. */ for (irq = GIC_FIRST_PPI; irq <= GIC_LAST_PPI; irq++) { isrc = GIC_INTR_ISRC(sc, irq); if (intr_isrc_init_on_cpu(isrc, cpu)) gic_v3_enable_intr(dev, isrc); } for (i = 0; i < sc->gic_nchildren; i++) { child = sc->gic_children[i]; PIC_INIT_SECONDARY(child); } } static void gic_v3_ipi_send(device_t dev, struct intr_irqsrc *isrc, cpuset_t cpus, u_int ipi) { struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; uint64_t aff, val, irq; int i; #define GIC_AFF_MASK (CPU_AFF3_MASK | CPU_AFF2_MASK | CPU_AFF1_MASK) #define GIC_AFFINITY(i) (CPU_AFFINITY(i) & GIC_AFF_MASK) aff = GIC_AFFINITY(0); irq = gi->gi_irq; val = 0; /* Iterate through all CPUs in set */ for (i = 0; i <= mp_maxid; i++) { /* Move to the next affinity group */ if (aff != GIC_AFFINITY(i)) { /* Send the IPI */ if (val != 0) { gic_icc_write(SGI1R, val); val = 0; } aff = GIC_AFFINITY(i); } /* Send the IPI to this cpu */ if (CPU_ISSET(i, &cpus)) { #define ICC_SGI1R_AFFINITY(aff) \ (((uint64_t)CPU_AFF3(aff) << ICC_SGI1R_EL1_AFF3_SHIFT) | \ ((uint64_t)CPU_AFF2(aff) << ICC_SGI1R_EL1_AFF2_SHIFT) | \ ((uint64_t)CPU_AFF1(aff) << ICC_SGI1R_EL1_AFF1_SHIFT)) /* Set the affinity when the first at this level */ if (val == 0) val = ICC_SGI1R_AFFINITY(aff) | irq << ICC_SGI1R_EL1_SGIID_SHIFT; /* Set the bit to send the IPI to te CPU */ val |= 1 << CPU_AFF0(CPU_AFFINITY(i)); } } /* Send the IPI to the last cpu affinity group */ if (val != 0) gic_icc_write(SGI1R, val); #undef GIC_AFF_MASK #undef GIC_AFFINITY } static int gic_v3_ipi_setup(device_t dev, u_int ipi, struct intr_irqsrc **isrcp) { struct intr_irqsrc *isrc; struct gic_v3_softc *sc = device_get_softc(dev); if (sgi_first_unused > GIC_LAST_SGI) return (ENOSPC); isrc = GIC_INTR_ISRC(sc, sgi_first_unused); sgi_to_ipi[sgi_first_unused++] = ipi; CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu); *isrcp = isrc; return (0); } #endif /* SMP */ /* * Helper routines */ static void gic_v3_wait_for_rwp(struct gic_v3_softc *sc, enum gic_v3_xdist xdist) { struct resource *res; u_int cpuid; size_t us_left = 1000000; cpuid = PCPU_GET(cpuid); switch (xdist) { case DIST: res = sc->gic_dist; break; case REDIST: res = &sc->gic_redists.pcpu[cpuid]->res; break; default: KASSERT(0, ("%s: Attempt to wait for unknown RWP", __func__)); return; } while ((bus_read_4(res, GICD_CTLR) & GICD_CTLR_RWP) != 0) { DELAY(1); if (us_left-- == 0) panic("GICD Register write pending for too long"); } } /* CPU interface. */ static __inline void gic_v3_cpu_priority(uint64_t mask) { /* Set prority mask */ gic_icc_write(PMR, mask & ICC_PMR_EL1_PRIO_MASK); } static int gic_v3_cpu_enable_sre(struct gic_v3_softc *sc) { uint64_t sre; u_int cpuid; cpuid = PCPU_GET(cpuid); /* * Set the SRE bit to enable access to GIC CPU interface * via system registers. */ sre = READ_SPECIALREG(icc_sre_el1); sre |= ICC_SRE_EL1_SRE; WRITE_SPECIALREG(icc_sre_el1, sre); isb(); /* * Now ensure that the bit is set. */ sre = READ_SPECIALREG(icc_sre_el1); if ((sre & ICC_SRE_EL1_SRE) == 0) { /* We are done. This was disabled in EL2 */ device_printf(sc->dev, "ERROR: CPU%u cannot enable CPU interface " "via system registers\n", cpuid); return (ENXIO); } else if (bootverbose) { device_printf(sc->dev, "CPU%u enabled CPU interface via system registers\n", cpuid); } return (0); } static int gic_v3_cpu_init(struct gic_v3_softc *sc) { int err; /* Enable access to CPU interface via system registers */ err = gic_v3_cpu_enable_sre(sc); if (err != 0) return (err); /* Priority mask to minimum - accept all interrupts */ gic_v3_cpu_priority(GIC_PRIORITY_MIN); /* Disable EOI mode */ gic_icc_clear(CTLR, ICC_CTLR_EL1_EOIMODE); /* Enable group 1 (insecure) interrups */ gic_icc_set(IGRPEN1, ICC_IGRPEN0_EL1_EN); return (0); } /* Distributor */ static int gic_v3_dist_init(struct gic_v3_softc *sc) { uint64_t aff; u_int i; /* * 1. Disable the Distributor */ gic_d_write(sc, 4, GICD_CTLR, 0); gic_v3_wait_for_rwp(sc, DIST); /* * 2. Configure the Distributor */ /* Set all SPIs to be Group 1 Non-secure */ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_IGROUPRn) gic_d_write(sc, 4, GICD_IGROUPR(i), 0xFFFFFFFF); /* Set all global interrupts to be level triggered, active low. */ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ICFGRn) gic_d_write(sc, 4, GICD_ICFGR(i), 0x00000000); /* Set priority to all shared interrupts */ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_IPRIORITYn) { /* Set highest priority */ gic_d_write(sc, 4, GICD_IPRIORITYR(i), GIC_PRIORITY_MAX); } /* * Disable all interrupts. Leave PPI and SGIs as they are enabled in * Re-Distributor registers. */ for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i += GICD_I_PER_ISENABLERn) gic_d_write(sc, 4, GICD_ICENABLER(i), 0xFFFFFFFF); gic_v3_wait_for_rwp(sc, DIST); /* * 3. Enable Distributor */ /* Enable Distributor with ARE, Group 1 */ gic_d_write(sc, 4, GICD_CTLR, GICD_CTLR_ARE_NS | GICD_CTLR_G1A | GICD_CTLR_G1); /* * 4. Route all interrupts to boot CPU. */ aff = CPU_AFFINITY(0); for (i = GIC_FIRST_SPI; i < sc->gic_nirqs; i++) gic_d_write(sc, 8, GICD_IROUTER(i), aff); return (0); } /* Re-Distributor */ static int gic_v3_redist_alloc(struct gic_v3_softc *sc) { u_int cpuid; /* Allocate struct resource for all CPU's Re-Distributor registers */ for (cpuid = 0; cpuid <= mp_maxid; cpuid++) if (CPU_ISSET(cpuid, &all_cpus) != 0) sc->gic_redists.pcpu[cpuid] = malloc(sizeof(*sc->gic_redists.pcpu[0]), M_GIC_V3, M_WAITOK); else sc->gic_redists.pcpu[cpuid] = NULL; return (0); } static int gic_v3_redist_find(struct gic_v3_softc *sc) { struct resource r_res; bus_space_handle_t r_bsh; uint64_t aff; uint64_t typer; uint32_t pidr2; u_int cpuid; size_t i; cpuid = PCPU_GET(cpuid); aff = CPU_AFFINITY(cpuid); /* Affinity in format for comparison with typer */ aff = (CPU_AFF3(aff) << 24) | (CPU_AFF2(aff) << 16) | (CPU_AFF1(aff) << 8) | CPU_AFF0(aff); if (bootverbose) { device_printf(sc->dev, "Start searching for Re-Distributor\n"); } /* Iterate through Re-Distributor regions */ for (i = 0; i < sc->gic_redists.nregions; i++) { /* Take a copy of the region's resource */ r_res = *sc->gic_redists.regions[i]; r_bsh = rman_get_bushandle(&r_res); pidr2 = bus_read_4(&r_res, GICR_PIDR2); switch (GICR_PIDR2_ARCH(pidr2)) { case GICR_PIDR2_ARCH_GICv3: /* fall through */ case GICR_PIDR2_ARCH_GICv4: break; default: device_printf(sc->dev, "No Re-Distributor found for CPU%u\n", cpuid); return (ENODEV); } do { typer = bus_read_8(&r_res, GICR_TYPER); if ((typer >> GICR_TYPER_AFF_SHIFT) == aff) { KASSERT(sc->gic_redists.pcpu[cpuid] != NULL, ("Invalid pointer to per-CPU redistributor")); /* Copy res contents to its final destination */ sc->gic_redists.pcpu[cpuid]->res = r_res; sc->gic_redists.pcpu[cpuid]->lpi_enabled = false; if (bootverbose) { device_printf(sc->dev, "CPU%u Re-Distributor has been found\n", cpuid); } return (0); } r_bsh += (GICR_RD_BASE_SIZE + GICR_SGI_BASE_SIZE); if ((typer & GICR_TYPER_VLPIS) != 0) { r_bsh += (GICR_VLPI_BASE_SIZE + GICR_RESERVED_SIZE); } rman_set_bushandle(&r_res, r_bsh); } while ((typer & GICR_TYPER_LAST) == 0); } device_printf(sc->dev, "No Re-Distributor found for CPU%u\n", cpuid); return (ENXIO); } static int gic_v3_redist_wake(struct gic_v3_softc *sc) { uint32_t waker; size_t us_left = 1000000; waker = gic_r_read(sc, 4, GICR_WAKER); /* Wake up Re-Distributor for this CPU */ waker &= ~GICR_WAKER_PS; gic_r_write(sc, 4, GICR_WAKER, waker); /* * When clearing ProcessorSleep bit it is required to wait for * ChildrenAsleep to become zero following the processor power-on. */ while ((gic_r_read(sc, 4, GICR_WAKER) & GICR_WAKER_CA) != 0) { DELAY(1); if (us_left-- == 0) { panic("Could not wake Re-Distributor for CPU%u", PCPU_GET(cpuid)); } } if (bootverbose) { device_printf(sc->dev, "CPU%u Re-Distributor woke up\n", PCPU_GET(cpuid)); } return (0); } static int gic_v3_redist_init(struct gic_v3_softc *sc) { int err; size_t i; err = gic_v3_redist_find(sc); if (err != 0) return (err); err = gic_v3_redist_wake(sc); if (err != 0) return (err); /* Configure SGIs and PPIs to be Group1 Non-secure */ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_IGROUPR0, 0xFFFFFFFF); /* Disable SPIs */ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ICENABLER0, GICR_I_ENABLER_PPI_MASK); /* Enable SGIs */ gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICR_ISENABLER0, GICR_I_ENABLER_SGI_MASK); /* Set priority for SGIs and PPIs */ for (i = 0; i <= GIC_LAST_PPI; i += GICR_I_PER_IPRIORITYn) { gic_r_write(sc, 4, GICR_SGI_BASE_SIZE + GICD_IPRIORITYR(i), GIC_PRIORITY_MAX); } gic_v3_wait_for_rwp(sc, REDIST); return (0); } /* * SPI-mapped Message Based Interrupts -- a GICv3 MSI/MSI-X controller. */ static int gic_v3_gic_alloc_msi(device_t dev, u_int mbi_start, u_int mbi_count, int count, int maxcount, struct intr_irqsrc **isrc) { struct gic_v3_softc *sc; int i, irq, end_irq; bool found; KASSERT(powerof2(count), ("%s: bad count", __func__)); KASSERT(powerof2(maxcount), ("%s: bad maxcount", __func__)); sc = device_get_softc(dev); mtx_lock(&sc->gic_mbi_mtx); found = false; for (irq = mbi_start; irq < mbi_start + mbi_count; irq++) { /* Start on an aligned interrupt */ if ((irq & (maxcount - 1)) != 0) continue; /* Assume we found a valid range until shown otherwise */ found = true; /* Check this range is valid */ for (end_irq = irq; end_irq != irq + count; end_irq++) { /* No free interrupts */ if (end_irq == mbi_start + mbi_count) { found = false; break; } KASSERT((sc->gic_irqs[end_irq].gi_flags & GI_FLAG_MSI)!= 0, ("%s: Non-MSI interrupt found", __func__)); /* This is already used */ if ((sc->gic_irqs[end_irq].gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED) { found = false; break; } } if (found) break; } /* Not enough interrupts were found */ if (!found || irq == mbi_start + mbi_count) { mtx_unlock(&sc->gic_mbi_mtx); return (ENXIO); } for (i = 0; i < count; i++) { /* Mark the interrupt as used */ sc->gic_irqs[irq + i].gi_flags |= GI_FLAG_MSI_USED; } mtx_unlock(&sc->gic_mbi_mtx); for (i = 0; i < count; i++) isrc[i] = (struct intr_irqsrc *)&sc->gic_irqs[irq + i]; return (0); } static int gic_v3_gic_release_msi(device_t dev, int count, struct intr_irqsrc **isrc) { struct gic_v3_softc *sc; struct gic_v3_irqsrc *gi; int i; sc = device_get_softc(dev); mtx_lock(&sc->gic_mbi_mtx); for (i = 0; i < count; i++) { gi = (struct gic_v3_irqsrc *)isrc[i]; KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED, ("%s: Trying to release an unused MSI-X interrupt", __func__)); gi->gi_flags &= ~GI_FLAG_MSI_USED; } mtx_unlock(&sc->gic_mbi_mtx); return (0); } static int gic_v3_gic_alloc_msix(device_t dev, u_int mbi_start, u_int mbi_count, struct intr_irqsrc **isrcp) { struct gic_v3_softc *sc; int irq; sc = device_get_softc(dev); mtx_lock(&sc->gic_mbi_mtx); /* Find an unused interrupt */ for (irq = mbi_start; irq < mbi_start + mbi_count; irq++) { KASSERT((sc->gic_irqs[irq].gi_flags & GI_FLAG_MSI) != 0, ("%s: Non-MSI interrupt found", __func__)); if ((sc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) == 0) break; } /* No free interrupt was found */ if (irq == mbi_start + mbi_count) { mtx_unlock(&sc->gic_mbi_mtx); return (ENXIO); } /* Mark the interrupt as used */ sc->gic_irqs[irq].gi_flags |= GI_FLAG_MSI_USED; mtx_unlock(&sc->gic_mbi_mtx); *isrcp = (struct intr_irqsrc *)&sc->gic_irqs[irq]; return (0); } static int gic_v3_gic_release_msix(device_t dev, struct intr_irqsrc *isrc) { struct gic_v3_softc *sc; struct gic_v3_irqsrc *gi; sc = device_get_softc(dev); gi = (struct gic_v3_irqsrc *)isrc; KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED, ("%s: Trying to release an unused MSI-X interrupt", __func__)); mtx_lock(&sc->gic_mbi_mtx); gi->gi_flags &= ~GI_FLAG_MSI_USED; mtx_unlock(&sc->gic_mbi_mtx); return (0); } static int gic_v3_alloc_msi(device_t dev, device_t child, int count, int maxcount, device_t *pic, struct intr_irqsrc **isrc) { struct gic_v3_softc *sc; int error; sc = device_get_softc(dev); error = gic_v3_gic_alloc_msi(dev, sc->gic_mbi_start, sc->gic_mbi_end - sc->gic_mbi_start, count, maxcount, isrc); if (error != 0) return (error); *pic = dev; return (0); } static int gic_v3_release_msi(device_t dev, device_t child, int count, struct intr_irqsrc **isrc) { return (gic_v3_gic_release_msi(dev, count, isrc)); } static int gic_v3_alloc_msix(device_t dev, device_t child, device_t *pic, struct intr_irqsrc **isrc) { struct gic_v3_softc *sc; int error; sc = device_get_softc(dev); error = gic_v3_gic_alloc_msix(dev, sc->gic_mbi_start, sc->gic_mbi_end - sc->gic_mbi_start, isrc); if (error != 0) return (error); *pic = dev; return (0); } static int gic_v3_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc) { return (gic_v3_gic_release_msix(dev, isrc)); } static int gic_v3_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, uint64_t *addr, uint32_t *data) { struct gic_v3_softc *sc = device_get_softc(dev); struct gic_v3_irqsrc *gi = (struct gic_v3_irqsrc *)isrc; *addr = vtophys(rman_get_virtual(sc->gic_dist)) + GICD_SETSPI_NSR; *data = gi->gi_irq; return (0); } diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c index 0f30ca746e9e..516cfbd9e358 100644 --- a/sys/arm64/arm64/gicv3_its.c +++ b/sys/arm64/arm64/gicv3_its.c @@ -1,2032 +1,2028 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * * This software was developed by Andrew Turner under * the sponsorship of the FreeBSD Foundation. * * This software was developed by Semihalf under * the sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include "opt_platform.h" #include "opt_iommu.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #include #include #ifdef IOMMU #include #include #endif #include "pcib_if.h" #include "pic_if.h" #include "msi_if.h" MALLOC_DEFINE(M_GICV3_ITS, "GICv3 ITS", "ARM GICv3 Interrupt Translation Service"); #define LPI_NIRQS (64 * 1024) /* The size and alignment of the command circular buffer */ #define ITS_CMDQ_SIZE (64 * 1024) /* Must be a multiple of 4K */ #define ITS_CMDQ_ALIGN (64 * 1024) #define LPI_CONFTAB_SIZE LPI_NIRQS #define LPI_CONFTAB_ALIGN (64 * 1024) #define LPI_CONFTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ /* 1 bit per SPI, PPI, and SGI (8k), and 1 bit per LPI (LPI_CONFTAB_SIZE) */ #define LPI_PENDTAB_SIZE ((LPI_NIRQS + GIC_FIRST_LPI) / 8) #define LPI_PENDTAB_ALIGN (64 * 1024) #define LPI_PENDTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ #define LPI_INT_TRANS_TAB_ALIGN 256 #define LPI_INT_TRANS_TAB_MAX_ADDR ((1ul << 48) - 1) /* ITS commands encoding */ #define ITS_CMD_MOVI (0x01) #define ITS_CMD_SYNC (0x05) #define ITS_CMD_MAPD (0x08) #define ITS_CMD_MAPC (0x09) #define ITS_CMD_MAPTI (0x0a) #define ITS_CMD_MAPI (0x0b) #define ITS_CMD_INV (0x0c) #define ITS_CMD_INVALL (0x0d) /* Command */ #define CMD_COMMAND_MASK (0xFFUL) /* PCI device ID */ #define CMD_DEVID_SHIFT (32) #define CMD_DEVID_MASK (0xFFFFFFFFUL << CMD_DEVID_SHIFT) /* Size of IRQ ID bitfield */ #define CMD_SIZE_MASK (0xFFUL) /* Virtual LPI ID */ #define CMD_ID_MASK (0xFFFFFFFFUL) /* Physical LPI ID */ #define CMD_PID_SHIFT (32) #define CMD_PID_MASK (0xFFFFFFFFUL << CMD_PID_SHIFT) /* Collection */ #define CMD_COL_MASK (0xFFFFUL) /* Target (CPU or Re-Distributor) */ #define CMD_TARGET_SHIFT (16) #define CMD_TARGET_MASK (0xFFFFFFFFUL << CMD_TARGET_SHIFT) /* Interrupt Translation Table address */ #define CMD_ITT_MASK (0xFFFFFFFFFF00UL) /* Valid command bit */ #define CMD_VALID_SHIFT (63) #define CMD_VALID_MASK (1UL << CMD_VALID_SHIFT) #define ITS_TARGET_NONE 0xFBADBEEF /* LPI chunk owned by ITS device */ struct lpi_chunk { u_int lpi_base; u_int lpi_free; /* First free LPI in set */ u_int lpi_num; /* Total number of LPIs in chunk */ u_int lpi_busy; /* Number of busy LPIs in chink */ }; /* ITS device */ struct its_dev { TAILQ_ENTRY(its_dev) entry; /* PCI device */ device_t pci_dev; /* Device ID (i.e. PCI device ID) */ uint32_t devid; /* List of assigned LPIs */ struct lpi_chunk lpis; /* Virtual address of ITT */ vm_offset_t itt; size_t itt_size; }; /* * ITS command descriptor. * Idea for command description passing taken from Linux. */ struct its_cmd_desc { uint8_t cmd_type; union { struct { struct its_dev *its_dev; struct its_col *col; uint32_t id; } cmd_desc_movi; struct { struct its_col *col; } cmd_desc_sync; struct { struct its_col *col; uint8_t valid; } cmd_desc_mapc; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; uint32_t id; } cmd_desc_mapvi; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; } cmd_desc_mapi; struct { struct its_dev *its_dev; uint8_t valid; } cmd_desc_mapd; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; } cmd_desc_inv; struct { struct its_col *col; } cmd_desc_invall; }; }; /* ITS command. Each command is 32 bytes long */ struct its_cmd { uint64_t cmd_dword[4]; /* ITS command double word */ }; /* An ITS private table */ struct its_ptable { vm_offset_t ptab_vaddr; unsigned long ptab_size; }; /* ITS collection description. */ struct its_col { uint64_t col_target; /* Target Re-Distributor */ uint64_t col_id; /* Collection ID */ }; struct gicv3_its_irqsrc { struct intr_irqsrc gi_isrc; u_int gi_id; u_int gi_lpi; struct its_dev *gi_its_dev; TAILQ_ENTRY(gicv3_its_irqsrc) gi_link; }; struct gicv3_its_softc { device_t dev; struct intr_pic *sc_pic; struct resource *sc_its_res; cpuset_t sc_cpus; struct domainset *sc_ds; u_int gic_irq_cpu; struct its_ptable sc_its_ptab[GITS_BASER_NUM]; struct its_col *sc_its_cols[MAXCPU]; /* Per-CPU collections */ /* * TODO: We should get these from the parent as we only want a * single copy of each across the interrupt controller. */ uint8_t *sc_conf_base; vm_offset_t sc_pend_base[MAXCPU]; /* Command handling */ struct mtx sc_its_cmd_lock; struct its_cmd *sc_its_cmd_base; /* Command circular buffer address */ size_t sc_its_cmd_next_idx; vmem_t *sc_irq_alloc; struct gicv3_its_irqsrc **sc_irqs; u_int sc_irq_base; u_int sc_irq_length; u_int sc_irq_count; struct mtx sc_its_dev_lock; TAILQ_HEAD(its_dev_list, its_dev) sc_its_dev_list; TAILQ_HEAD(free_irqs, gicv3_its_irqsrc) sc_free_irqs; #define ITS_FLAGS_CMDQ_FLUSH 0x00000001 #define ITS_FLAGS_LPI_CONF_FLUSH 0x00000002 #define ITS_FLAGS_ERRATA_CAVIUM_22375 0x00000004 u_int sc_its_flags; bool trace_enable; vm_page_t ma; /* fake msi page */ }; static void *conf_base; typedef void (its_quirk_func_t)(device_t); static its_quirk_func_t its_quirk_cavium_22375; static const struct { const char *desc; uint32_t iidr; uint32_t iidr_mask; its_quirk_func_t *func; } its_quirks[] = { { /* Cavium ThunderX Pass 1.x */ .desc = "Cavium ThunderX errata: 22375, 24313", .iidr = GITS_IIDR_RAW(GITS_IIDR_IMPL_CAVIUM, GITS_IIDR_PROD_THUNDER, GITS_IIDR_VAR_THUNDER_1, 0), .iidr_mask = ~GITS_IIDR_REVISION_MASK, .func = its_quirk_cavium_22375, }, }; #define gic_its_read_4(sc, reg) \ bus_read_4((sc)->sc_its_res, (reg)) #define gic_its_read_8(sc, reg) \ bus_read_8((sc)->sc_its_res, (reg)) #define gic_its_write_4(sc, reg, val) \ bus_write_4((sc)->sc_its_res, (reg), (val)) #define gic_its_write_8(sc, reg, val) \ bus_write_8((sc)->sc_its_res, (reg), (val)) static device_attach_t gicv3_its_attach; static device_detach_t gicv3_its_detach; static pic_disable_intr_t gicv3_its_disable_intr; static pic_enable_intr_t gicv3_its_enable_intr; static pic_map_intr_t gicv3_its_map_intr; static pic_setup_intr_t gicv3_its_setup_intr; static pic_post_filter_t gicv3_its_post_filter; static pic_post_ithread_t gicv3_its_post_ithread; static pic_pre_ithread_t gicv3_its_pre_ithread; static pic_bind_intr_t gicv3_its_bind_intr; #ifdef SMP static pic_init_secondary_t gicv3_its_init_secondary; #endif static msi_alloc_msi_t gicv3_its_alloc_msi; static msi_release_msi_t gicv3_its_release_msi; static msi_alloc_msix_t gicv3_its_alloc_msix; static msi_release_msix_t gicv3_its_release_msix; static msi_map_msi_t gicv3_its_map_msi; #ifdef IOMMU static msi_iommu_init_t gicv3_iommu_init; static msi_iommu_deinit_t gicv3_iommu_deinit; #endif static void its_cmd_movi(device_t, struct gicv3_its_irqsrc *); static void its_cmd_mapc(device_t, struct its_col *, uint8_t); static void its_cmd_mapti(device_t, struct gicv3_its_irqsrc *); static void its_cmd_mapd(device_t, struct its_dev *, uint8_t); static void its_cmd_inv(device_t, struct its_dev *, struct gicv3_its_irqsrc *); static void its_cmd_invall(device_t, struct its_col *); static device_method_t gicv3_its_methods[] = { /* Device interface */ DEVMETHOD(device_detach, gicv3_its_detach), /* Interrupt controller interface */ DEVMETHOD(pic_disable_intr, gicv3_its_disable_intr), DEVMETHOD(pic_enable_intr, gicv3_its_enable_intr), DEVMETHOD(pic_map_intr, gicv3_its_map_intr), DEVMETHOD(pic_setup_intr, gicv3_its_setup_intr), DEVMETHOD(pic_post_filter, gicv3_its_post_filter), DEVMETHOD(pic_post_ithread, gicv3_its_post_ithread), DEVMETHOD(pic_pre_ithread, gicv3_its_pre_ithread), #ifdef SMP DEVMETHOD(pic_bind_intr, gicv3_its_bind_intr), DEVMETHOD(pic_init_secondary, gicv3_its_init_secondary), #endif /* MSI/MSI-X */ DEVMETHOD(msi_alloc_msi, gicv3_its_alloc_msi), DEVMETHOD(msi_release_msi, gicv3_its_release_msi), DEVMETHOD(msi_alloc_msix, gicv3_its_alloc_msix), DEVMETHOD(msi_release_msix, gicv3_its_release_msix), DEVMETHOD(msi_map_msi, gicv3_its_map_msi), #ifdef IOMMU DEVMETHOD(msi_iommu_init, gicv3_iommu_init), DEVMETHOD(msi_iommu_deinit, gicv3_iommu_deinit), #endif /* End */ DEVMETHOD_END }; static DEFINE_CLASS_0(gic, gicv3_its_driver, gicv3_its_methods, sizeof(struct gicv3_its_softc)); static void gicv3_its_cmdq_init(struct gicv3_its_softc *sc) { vm_paddr_t cmd_paddr; uint64_t reg, tmp; /* Set up the command circular buffer */ sc->sc_its_cmd_base = contigmalloc_domainset(ITS_CMDQ_SIZE, M_GICV3_ITS, sc->sc_ds, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, ITS_CMDQ_ALIGN, 0); sc->sc_its_cmd_next_idx = 0; cmd_paddr = vtophys(sc->sc_its_cmd_base); /* Set the base of the command buffer */ reg = GITS_CBASER_VALID | (GITS_CBASER_CACHE_NIWAWB << GITS_CBASER_CACHE_SHIFT) | cmd_paddr | (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT) | (ITS_CMDQ_SIZE / 4096 - 1); gic_its_write_8(sc, GITS_CBASER, reg); /* Read back to check for fixed value fields */ tmp = gic_its_read_8(sc, GITS_CBASER); if ((tmp & GITS_CBASER_SHARE_MASK) != (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT)) { /* Check if the hardware reported non-shareable */ if ((tmp & GITS_CBASER_SHARE_MASK) == (GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT)) { /* If so remove the cache attribute */ reg &= ~GITS_CBASER_CACHE_MASK; reg &= ~GITS_CBASER_SHARE_MASK; /* Set to Non-cacheable, Non-shareable */ reg |= GITS_CBASER_CACHE_NIN << GITS_CBASER_CACHE_SHIFT; reg |= GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT; gic_its_write_8(sc, GITS_CBASER, reg); } /* The command queue has to be flushed after each command */ sc->sc_its_flags |= ITS_FLAGS_CMDQ_FLUSH; } /* Get the next command from the start of the buffer */ gic_its_write_8(sc, GITS_CWRITER, 0x0); } static int gicv3_its_table_init(device_t dev, struct gicv3_its_softc *sc) { vm_offset_t table; vm_paddr_t paddr; uint64_t cache, reg, share, tmp, type; size_t esize, its_tbl_size, nidents, nitspages, npages; int i, page_size; int devbits; if ((sc->sc_its_flags & ITS_FLAGS_ERRATA_CAVIUM_22375) != 0) { /* * GITS_TYPER[17:13] of ThunderX reports that device IDs * are to be 21 bits in length. The entry size of the ITS * table can be read from GITS_BASERn[52:48] and on ThunderX * is supposed to be 8 bytes in length (for device table). * Finally the page size that is to be used by ITS to access * this table will be set to 64KB. * * This gives 0x200000 entries of size 0x8 bytes covered by * 256 pages each of which 64KB in size. The number of pages * (minus 1) should then be written to GITS_BASERn[7:0]. In * that case this value would be 0xFF but on ThunderX the * maximum value that HW accepts is 0xFD. * * Set an arbitrary number of device ID bits to 20 in order * to limit the number of entries in ITS device table to * 0x100000 and the table size to 8MB. */ devbits = 20; cache = 0; } else { devbits = GITS_TYPER_DEVB(gic_its_read_8(sc, GITS_TYPER)); cache = GITS_BASER_CACHE_WAWB; } share = GITS_BASER_SHARE_IS; page_size = PAGE_SIZE_64K; for (i = 0; i < GITS_BASER_NUM; i++) { reg = gic_its_read_8(sc, GITS_BASER(i)); /* The type of table */ type = GITS_BASER_TYPE(reg); /* The table entry size */ esize = GITS_BASER_ESIZE(reg); switch(type) { case GITS_BASER_TYPE_DEV: nidents = (1 << devbits); its_tbl_size = esize * nidents; its_tbl_size = roundup2(its_tbl_size, PAGE_SIZE_64K); break; case GITS_BASER_TYPE_VP: case GITS_BASER_TYPE_PP: /* Undocumented? */ case GITS_BASER_TYPE_IC: its_tbl_size = page_size; break; default: continue; } npages = howmany(its_tbl_size, PAGE_SIZE); /* Allocate the table */ table = (vm_offset_t)contigmalloc_domainset(npages * PAGE_SIZE, M_GICV3_ITS, sc->sc_ds, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, PAGE_SIZE_64K, 0); sc->sc_its_ptab[i].ptab_vaddr = table; sc->sc_its_ptab[i].ptab_size = npages * PAGE_SIZE; paddr = vtophys(table); while (1) { nitspages = howmany(its_tbl_size, page_size); /* Clear the fields we will be setting */ reg &= ~(GITS_BASER_VALID | GITS_BASER_CACHE_MASK | GITS_BASER_TYPE_MASK | GITS_BASER_ESIZE_MASK | GITS_BASER_PA_MASK | GITS_BASER_SHARE_MASK | GITS_BASER_PSZ_MASK | GITS_BASER_SIZE_MASK); /* Set the new values */ reg |= GITS_BASER_VALID | (cache << GITS_BASER_CACHE_SHIFT) | (type << GITS_BASER_TYPE_SHIFT) | ((esize - 1) << GITS_BASER_ESIZE_SHIFT) | paddr | (share << GITS_BASER_SHARE_SHIFT) | (nitspages - 1); switch (page_size) { case PAGE_SIZE_4K: /* 4KB */ reg |= GITS_BASER_PSZ_4K << GITS_BASER_PSZ_SHIFT; break; case PAGE_SIZE_16K: /* 16KB */ reg |= GITS_BASER_PSZ_16K << GITS_BASER_PSZ_SHIFT; break; case PAGE_SIZE_64K: /* 64KB */ reg |= GITS_BASER_PSZ_64K << GITS_BASER_PSZ_SHIFT; break; } gic_its_write_8(sc, GITS_BASER(i), reg); /* Read back to check */ tmp = gic_its_read_8(sc, GITS_BASER(i)); /* Do the shareability masks line up? */ if ((tmp & GITS_BASER_SHARE_MASK) != (reg & GITS_BASER_SHARE_MASK)) { share = (tmp & GITS_BASER_SHARE_MASK) >> GITS_BASER_SHARE_SHIFT; continue; } if ((tmp & GITS_BASER_PSZ_MASK) != (reg & GITS_BASER_PSZ_MASK)) { switch (page_size) { case PAGE_SIZE_16K: page_size = PAGE_SIZE_4K; continue; case PAGE_SIZE_64K: page_size = PAGE_SIZE_16K; continue; } } if (tmp != reg) { device_printf(dev, "GITS_BASER%d: " "unable to be updated: %lx != %lx\n", i, reg, tmp); return (ENXIO); } /* We should have made all needed changes */ break; } } return (0); } static void gicv3_its_conftable_init(struct gicv3_its_softc *sc) { void *conf_table; conf_table = atomic_load_ptr(&conf_base); if (conf_table == NULL) { conf_table = contigmalloc(LPI_CONFTAB_SIZE, M_GICV3_ITS, M_WAITOK, 0, LPI_CONFTAB_MAX_ADDR, LPI_CONFTAB_ALIGN, 0); if (atomic_cmpset_ptr((uintptr_t *)&conf_base, (uintptr_t)NULL, (uintptr_t)conf_table) == 0) { contigfree(conf_table, LPI_CONFTAB_SIZE, M_GICV3_ITS); conf_table = atomic_load_ptr(&conf_base); } } sc->sc_conf_base = conf_table; /* Set the default configuration */ memset(sc->sc_conf_base, GIC_PRIORITY_MAX | LPI_CONF_GROUP1, LPI_CONFTAB_SIZE); /* Flush the table to memory */ cpu_dcache_wb_range((vm_offset_t)sc->sc_conf_base, LPI_CONFTAB_SIZE); } static void gicv3_its_pendtables_init(struct gicv3_its_softc *sc) { int i; for (i = 0; i <= mp_maxid; i++) { if (CPU_ISSET(i, &sc->sc_cpus) == 0) continue; sc->sc_pend_base[i] = (vm_offset_t)contigmalloc( LPI_PENDTAB_SIZE, M_GICV3_ITS, M_WAITOK | M_ZERO, 0, LPI_PENDTAB_MAX_ADDR, LPI_PENDTAB_ALIGN, 0); /* Flush so the ITS can see the memory */ cpu_dcache_wb_range((vm_offset_t)sc->sc_pend_base[i], LPI_PENDTAB_SIZE); } } static void its_init_cpu_lpi(device_t dev, struct gicv3_its_softc *sc) { device_t gicv3; uint64_t xbaser, tmp; uint32_t ctlr; u_int cpuid; gicv3 = device_get_parent(dev); cpuid = PCPU_GET(cpuid); /* Disable LPIs */ ctlr = gic_r_read_4(gicv3, GICR_CTLR); ctlr &= ~GICR_CTLR_LPI_ENABLE; gic_r_write_4(gicv3, GICR_CTLR, ctlr); /* Make sure changes are observable my the GIC */ dsb(sy); /* * Set the redistributor base */ xbaser = vtophys(sc->sc_conf_base) | (GICR_PROPBASER_SHARE_IS << GICR_PROPBASER_SHARE_SHIFT) | (GICR_PROPBASER_CACHE_NIWAWB << GICR_PROPBASER_CACHE_SHIFT) | (flsl(LPI_CONFTAB_SIZE | GIC_FIRST_LPI) - 1); gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); /* Check the cache attributes we set */ tmp = gic_r_read_8(gicv3, GICR_PROPBASER); if ((tmp & GICR_PROPBASER_SHARE_MASK) != (xbaser & GICR_PROPBASER_SHARE_MASK)) { if ((tmp & GICR_PROPBASER_SHARE_MASK) == (GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT)) { /* We need to mark as non-cacheable */ xbaser &= ~(GICR_PROPBASER_SHARE_MASK | GICR_PROPBASER_CACHE_MASK); /* Non-cacheable */ xbaser |= GICR_PROPBASER_CACHE_NIN << GICR_PROPBASER_CACHE_SHIFT; /* Non-sareable */ xbaser |= GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT; gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); } sc->sc_its_flags |= ITS_FLAGS_LPI_CONF_FLUSH; } /* * Set the LPI pending table base */ xbaser = vtophys(sc->sc_pend_base[cpuid]) | (GICR_PENDBASER_CACHE_NIWAWB << GICR_PENDBASER_CACHE_SHIFT) | (GICR_PENDBASER_SHARE_IS << GICR_PENDBASER_SHARE_SHIFT); gic_r_write_8(gicv3, GICR_PENDBASER, xbaser); tmp = gic_r_read_8(gicv3, GICR_PENDBASER); if ((tmp & GICR_PENDBASER_SHARE_MASK) == (GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT)) { /* Clear the cahce and shareability bits */ xbaser &= ~(GICR_PENDBASER_CACHE_MASK | GICR_PENDBASER_SHARE_MASK); /* Mark as non-shareable */ xbaser |= GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT; /* And non-cacheable */ xbaser |= GICR_PENDBASER_CACHE_NIN << GICR_PENDBASER_CACHE_SHIFT; } /* Enable LPIs */ ctlr = gic_r_read_4(gicv3, GICR_CTLR); ctlr |= GICR_CTLR_LPI_ENABLE; gic_r_write_4(gicv3, GICR_CTLR, ctlr); /* Make sure the GIC has seen everything */ dsb(sy); } static int its_init_cpu(device_t dev, struct gicv3_its_softc *sc) { device_t gicv3; vm_paddr_t target; u_int cpuid; struct redist_pcpu *rpcpu; gicv3 = device_get_parent(dev); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sc->sc_cpus)) return (0); /* Check if the ITS is enabled on this CPU */ if ((gic_r_read_8(gicv3, GICR_TYPER) & GICR_TYPER_PLPIS) == 0) return (ENXIO); rpcpu = gicv3_get_redist(dev); /* Do per-cpu LPI init once */ if (!rpcpu->lpi_enabled) { its_init_cpu_lpi(dev, sc); rpcpu->lpi_enabled = true; } if ((gic_its_read_8(sc, GITS_TYPER) & GITS_TYPER_PTA) != 0) { /* This ITS wants the redistributor physical address */ target = vtophys(rman_get_virtual(&rpcpu->res)); } else { /* This ITS wants the unique processor number */ target = GICR_TYPER_CPUNUM(gic_r_read_8(gicv3, GICR_TYPER)) << CMD_TARGET_SHIFT; } sc->sc_its_cols[cpuid]->col_target = target; sc->sc_its_cols[cpuid]->col_id = cpuid; its_cmd_mapc(dev, sc->sc_its_cols[cpuid], 1); its_cmd_invall(dev, sc->sc_its_cols[cpuid]); return (0); } static int gicv3_its_sysctl_trace_enable(SYSCTL_HANDLER_ARGS) { struct gicv3_its_softc *sc; int rv; sc = arg1; rv = sysctl_handle_bool(oidp, &sc->trace_enable, 0, req); if (rv != 0 || req->newptr == NULL) return (rv); if (sc->trace_enable) gic_its_write_8(sc, GITS_TRKCTLR, 3); else gic_its_write_8(sc, GITS_TRKCTLR, 0); return (0); } static int gicv3_its_sysctl_trace_regs(SYSCTL_HANDLER_ARGS) { struct gicv3_its_softc *sc; struct sbuf *sb; int err; sc = arg1; sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) { device_printf(sc->dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(sb, "\n"); sbuf_printf(sb, "GITS_TRKCTLR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKCTLR)); sbuf_printf(sb, "GITS_TRKR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKR)); sbuf_printf(sb, "GITS_TRKDIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKDIDR)); sbuf_printf(sb, "GITS_TRKPIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKPIDR)); sbuf_printf(sb, "GITS_TRKVIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKVIDR)); sbuf_printf(sb, "GITS_TRKTGTR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKTGTR)); err = sbuf_finish(sb); if (err) device_printf(sc->dev, "Error finishing sbuf: %d\n", err); sbuf_delete(sb); return(err); } static int gicv3_its_init_sysctl(struct gicv3_its_softc *sc) { struct sysctl_oid *oid, *child; struct sysctl_ctx_list *ctx_list; ctx_list = device_get_sysctl_ctx(sc->dev); child = device_get_sysctl_tree(sc->dev); oid = SYSCTL_ADD_NODE(ctx_list, SYSCTL_CHILDREN(child), OID_AUTO, "tracing", CTLFLAG_RD| CTLFLAG_MPSAFE, NULL, "Messages tracing"); if (oid == NULL) return (ENXIO); /* Add registers */ SYSCTL_ADD_PROC(ctx_list, SYSCTL_CHILDREN(oid), OID_AUTO, "enable", CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, gicv3_its_sysctl_trace_enable, "CU", "Enable tracing"); SYSCTL_ADD_PROC(ctx_list, SYSCTL_CHILDREN(oid), OID_AUTO, "capture", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, gicv3_its_sysctl_trace_regs, "", "Captured tracing registers."); return (0); } static int gicv3_its_attach(device_t dev) { struct gicv3_its_softc *sc; int domain, err, i, rid; uint64_t phys; uint32_t iidr; sc = device_get_softc(dev); sc->sc_irq_length = gicv3_get_nirqs(dev); sc->sc_irq_base = GIC_FIRST_LPI; sc->sc_irq_base += device_get_unit(dev) * sc->sc_irq_length; rid = 0; sc->sc_its_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_its_res == NULL) { device_printf(dev, "Could not allocate memory\n"); return (ENXIO); } phys = rounddown2(vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER, PAGE_SIZE); sc->ma = malloc(sizeof(struct vm_page), M_DEVBUF, M_WAITOK | M_ZERO); vm_page_initfake(sc->ma, phys, VM_MEMATTR_DEFAULT); CPU_COPY(&all_cpus, &sc->sc_cpus); iidr = gic_its_read_4(sc, GITS_IIDR); for (i = 0; i < nitems(its_quirks); i++) { if ((iidr & its_quirks[i].iidr_mask) == its_quirks[i].iidr) { if (bootverbose) { device_printf(dev, "Applying %s\n", its_quirks[i].desc); } its_quirks[i].func(dev); break; } } if (bus_get_domain(dev, &domain) == 0 && domain < MAXMEMDOM) { sc->sc_ds = DOMAINSET_PREF(domain); } else { sc->sc_ds = DOMAINSET_RR(); } /* Allocate the private tables */ err = gicv3_its_table_init(dev, sc); if (err != 0) return (err); /* Protects access to the device list */ mtx_init(&sc->sc_its_dev_lock, "ITS device lock", NULL, MTX_SPIN); /* Protects access to the ITS command circular buffer. */ mtx_init(&sc->sc_its_cmd_lock, "ITS cmd lock", NULL, MTX_SPIN); /* Allocate the command circular buffer */ gicv3_its_cmdq_init(sc); /* Allocate the per-CPU collections */ for (int cpu = 0; cpu <= mp_maxid; cpu++) if (CPU_ISSET(cpu, &sc->sc_cpus) != 0) sc->sc_its_cols[cpu] = malloc_domainset( sizeof(*sc->sc_its_cols[0]), M_GICV3_ITS, DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK | M_ZERO); else sc->sc_its_cols[cpu] = NULL; /* Enable the ITS */ gic_its_write_4(sc, GITS_CTLR, gic_its_read_4(sc, GITS_CTLR) | GITS_CTLR_EN); /* Create the LPI configuration table */ gicv3_its_conftable_init(sc); /* And the pending tebles */ gicv3_its_pendtables_init(sc); /* Enable LPIs on this CPU */ its_init_cpu(dev, sc); TAILQ_INIT(&sc->sc_its_dev_list); TAILQ_INIT(&sc->sc_free_irqs); /* * Create the vmem object to allocate INTRNG IRQs from. We try to * use all IRQs not already used by the GICv3. * XXX: This assumes there are no other interrupt controllers in the * system. */ sc->sc_irq_alloc = vmem_create(device_get_nameunit(dev), 0, gicv3_get_nirqs(dev), 1, 0, M_FIRSTFIT | M_WAITOK); sc->sc_irqs = malloc(sizeof(*sc->sc_irqs) * sc->sc_irq_length, M_GICV3_ITS, M_WAITOK | M_ZERO); /* For GIC-500 install tracking sysctls. */ if ((iidr & (GITS_IIDR_PRODUCT_MASK | GITS_IIDR_IMPLEMENTOR_MASK)) == GITS_IIDR_RAW(GITS_IIDR_IMPL_ARM, GITS_IIDR_PROD_GIC500, 0, 0)) gicv3_its_init_sysctl(sc); return (0); } static int gicv3_its_detach(device_t dev) { return (ENXIO); } static void its_quirk_cavium_22375(device_t dev) { struct gicv3_its_softc *sc; int domain; sc = device_get_softc(dev); sc->sc_its_flags |= ITS_FLAGS_ERRATA_CAVIUM_22375; /* * We need to limit which CPUs we send these interrupts to on * the original dual socket ThunderX as it is unable to * forward them between the two sockets. */ if (bus_get_domain(dev, &domain) == 0) { if (domain < MAXMEMDOM) { CPU_COPY(&cpuset_domain[domain], &sc->sc_cpus); } else { CPU_ZERO(&sc->sc_cpus); } } } static void gicv3_its_disable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; uint8_t *conf; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; conf = sc->sc_conf_base; conf[girq->gi_lpi] &= ~LPI_CONF_ENABLE; if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); } else { /* DSB inner shareable, store */ dsb(ishst); } its_cmd_inv(dev, girq->gi_its_dev, girq); } static void gicv3_its_enable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; uint8_t *conf; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; conf = sc->sc_conf_base; conf[girq->gi_lpi] |= LPI_CONF_ENABLE; if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); } else { /* DSB inner shareable, store */ dsb(ishst); } its_cmd_inv(dev, girq->gi_its_dev, girq); } static int gicv3_its_intr(void *arg, uintptr_t irq) { struct gicv3_its_softc *sc = arg; struct gicv3_its_irqsrc *girq; struct trapframe *tf; irq -= sc->sc_irq_base; girq = sc->sc_irqs[irq]; if (girq == NULL) panic("gicv3_its_intr: Invalid interrupt %ld", irq + sc->sc_irq_base); tf = curthread->td_intr_frame; intr_isrc_dispatch(&girq->gi_isrc, tf); return (FILTER_HANDLED); } static void gicv3_its_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; - struct gicv3_its_softc *sc; - sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); } static void gicv3_its_post_ithread(device_t dev, struct intr_irqsrc *isrc) { } static void gicv3_its_post_filter(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; - struct gicv3_its_softc *sc; - sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); } static int gicv3_its_select_cpu(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; sc = device_get_softc(dev); if (CPU_EMPTY(&isrc->isrc_cpu)) { sc->gic_irq_cpu = intr_irq_next_cpu(sc->gic_irq_cpu, &sc->sc_cpus); CPU_SETOF(sc->gic_irq_cpu, &isrc->isrc_cpu); } return (0); } static int gicv3_its_bind_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; gicv3_its_select_cpu(dev, isrc); girq = (struct gicv3_its_irqsrc *)isrc; its_cmd_movi(dev, girq); return (0); } static int gicv3_its_map_intr(device_t dev, struct intr_map_data *data, struct intr_irqsrc **isrcp) { /* * This should never happen, we only call this function to map * interrupts found before the controller driver is ready. */ panic("gicv3_its_map_intr: Unable to map a MSI interrupt"); } static int gicv3_its_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { /* Bind the interrupt to a CPU */ gicv3_its_bind_intr(dev, isrc); return (0); } #ifdef SMP static void gicv3_its_init_secondary(device_t dev) { struct gicv3_its_softc *sc; sc = device_get_softc(dev); /* * This is fatal as otherwise we may bind interrupts to this CPU. * We need a way to tell the interrupt framework to only bind to a * subset of given CPUs when it performs the shuffle. */ if (its_init_cpu(dev, sc) != 0) panic("gicv3_its_init_secondary: No usable ITS on CPU%d", PCPU_GET(cpuid)); } #endif static uint32_t its_get_devid(device_t pci_dev) { uintptr_t id; if (pci_get_id(pci_dev, PCI_ID_MSI, &id) != 0) panic("%s: %s: Unable to get the MSI DeviceID", __func__, device_get_nameunit(pci_dev)); return (id); } static struct its_dev * its_device_find(device_t dev, device_t child) { struct gicv3_its_softc *sc; struct its_dev *its_dev = NULL; sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_FOREACH(its_dev, &sc->sc_its_dev_list, entry) { if (its_dev->pci_dev == child) break; } mtx_unlock_spin(&sc->sc_its_dev_lock); return (its_dev); } static struct its_dev * its_device_get(device_t dev, device_t child, u_int nvecs) { struct gicv3_its_softc *sc; struct its_dev *its_dev; vmem_addr_t irq_base; size_t esize; sc = device_get_softc(dev); its_dev = its_device_find(dev, child); if (its_dev != NULL) return (its_dev); its_dev = malloc(sizeof(*its_dev), M_GICV3_ITS, M_NOWAIT | M_ZERO); if (its_dev == NULL) return (NULL); its_dev->pci_dev = child; its_dev->devid = its_get_devid(child); its_dev->lpis.lpi_busy = 0; its_dev->lpis.lpi_num = nvecs; its_dev->lpis.lpi_free = nvecs; if (vmem_alloc(sc->sc_irq_alloc, nvecs, M_FIRSTFIT | M_NOWAIT, &irq_base) != 0) { free(its_dev, M_GICV3_ITS); return (NULL); } its_dev->lpis.lpi_base = irq_base; /* Get ITT entry size */ esize = GITS_TYPER_ITTES(gic_its_read_8(sc, GITS_TYPER)); /* * Allocate ITT for this device. * PA has to be 256 B aligned. At least two entries for device. */ its_dev->itt_size = roundup2(MAX(nvecs, 2) * esize, 256); its_dev->itt = (vm_offset_t)contigmalloc_domainset(its_dev->itt_size, M_GICV3_ITS, sc->sc_ds, M_NOWAIT | M_ZERO, 0, LPI_INT_TRANS_TAB_MAX_ADDR, LPI_INT_TRANS_TAB_ALIGN, 0); if (its_dev->itt == 0) { vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, nvecs); free(its_dev, M_GICV3_ITS); return (NULL); } /* Make sure device sees zeroed ITT. */ if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) cpu_dcache_wb_range(its_dev->itt, its_dev->itt_size); mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_INSERT_TAIL(&sc->sc_its_dev_list, its_dev, entry); mtx_unlock_spin(&sc->sc_its_dev_lock); /* Map device to its ITT */ its_cmd_mapd(dev, its_dev, 1); return (its_dev); } static void its_device_release(device_t dev, struct its_dev *its_dev) { struct gicv3_its_softc *sc; KASSERT(its_dev->lpis.lpi_busy == 0, ("its_device_release: Trying to release an inuse ITS device")); /* Unmap device in ITS */ its_cmd_mapd(dev, its_dev, 0); sc = device_get_softc(dev); /* Remove the device from the list of devices */ mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_REMOVE(&sc->sc_its_dev_list, its_dev, entry); mtx_unlock_spin(&sc->sc_its_dev_lock); /* Free ITT */ KASSERT(its_dev->itt != 0, ("Invalid ITT in valid ITS device")); contigfree((void *)its_dev->itt, its_dev->itt_size, M_GICV3_ITS); /* Free the IRQ allocation */ vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, its_dev->lpis.lpi_num); free(its_dev, M_GICV3_ITS); } static struct gicv3_its_irqsrc * gicv3_its_alloc_irqsrc(device_t dev, struct gicv3_its_softc *sc, u_int irq) { struct gicv3_its_irqsrc *girq = NULL; KASSERT(sc->sc_irqs[irq] == NULL, ("%s: Interrupt %u already allocated", __func__, irq)); mtx_lock_spin(&sc->sc_its_dev_lock); if (!TAILQ_EMPTY(&sc->sc_free_irqs)) { girq = TAILQ_FIRST(&sc->sc_free_irqs); TAILQ_REMOVE(&sc->sc_free_irqs, girq, gi_link); } mtx_unlock_spin(&sc->sc_its_dev_lock); if (girq == NULL) { girq = malloc(sizeof(*girq), M_GICV3_ITS, M_NOWAIT | M_ZERO); if (girq == NULL) return (NULL); girq->gi_id = -1; if (intr_isrc_register(&girq->gi_isrc, dev, 0, "%s,%u", device_get_nameunit(dev), irq) != 0) { free(girq, M_GICV3_ITS); return (NULL); } } girq->gi_lpi = irq + sc->sc_irq_base - GIC_FIRST_LPI; sc->sc_irqs[irq] = girq; return (girq); } static void gicv3_its_release_irqsrc(struct gicv3_its_softc *sc, struct gicv3_its_irqsrc *girq) { u_int irq; mtx_assert(&sc->sc_its_dev_lock, MA_OWNED); irq = girq->gi_lpi + GIC_FIRST_LPI - sc->sc_irq_base; sc->sc_irqs[irq] = NULL; girq->gi_id = -1; girq->gi_its_dev = NULL; TAILQ_INSERT_TAIL(&sc->sc_free_irqs, girq, gi_link); } static int gicv3_its_alloc_msi(device_t dev, device_t child, int count, int maxcount, device_t *pic, struct intr_irqsrc **srcs) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; u_int irq; int i; its_dev = its_device_get(dev, child, count); if (its_dev == NULL) return (ENXIO); KASSERT(its_dev->lpis.lpi_free >= count, ("gicv3_its_alloc_msi: No free LPIs")); sc = device_get_softc(dev); irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - its_dev->lpis.lpi_free; /* Allocate the irqsrc for each MSI */ for (i = 0; i < count; i++, irq++) { its_dev->lpis.lpi_free--; srcs[i] = (struct intr_irqsrc *)gicv3_its_alloc_irqsrc(dev, sc, irq); if (srcs[i] == NULL) break; } /* The allocation failed, release them */ if (i != count) { mtx_lock_spin(&sc->sc_its_dev_lock); for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)srcs[i]; if (girq == NULL) break; gicv3_its_release_irqsrc(sc, girq); srcs[i] = NULL; } mtx_unlock_spin(&sc->sc_its_dev_lock); return (ENXIO); } /* Finish the allocation now we have all MSI irqsrcs */ for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)srcs[i]; girq->gi_id = i; girq->gi_its_dev = its_dev; /* Map the message to the given IRQ */ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); its_cmd_mapti(dev, girq); } its_dev->lpis.lpi_busy += count; *pic = dev; return (0); } static int gicv3_its_release_msi(device_t dev, device_t child, int count, struct intr_irqsrc **isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; int i; its_dev = its_device_find(dev, child); KASSERT(its_dev != NULL, ("gicv3_its_release_msi: Releasing a MSI interrupt with " "no ITS device")); KASSERT(its_dev->lpis.lpi_busy >= count, ("gicv3_its_release_msi: Releasing more interrupts than " "were allocated: releasing %d, allocated %d", count, its_dev->lpis.lpi_busy)); sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_dev_lock); for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)isrc[i]; gicv3_its_release_irqsrc(sc, girq); } mtx_unlock_spin(&sc->sc_its_dev_lock); its_dev->lpis.lpi_busy -= count; if (its_dev->lpis.lpi_busy == 0) its_device_release(dev, its_dev); return (0); } static int gicv3_its_alloc_msix(device_t dev, device_t child, device_t *pic, struct intr_irqsrc **isrcp) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; u_int nvecs, irq; nvecs = pci_msix_count(child); its_dev = its_device_get(dev, child, nvecs); if (its_dev == NULL) return (ENXIO); KASSERT(its_dev->lpis.lpi_free > 0, ("gicv3_its_alloc_msix: No free LPIs")); sc = device_get_softc(dev); irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - its_dev->lpis.lpi_free; girq = gicv3_its_alloc_irqsrc(dev, sc, irq); if (girq == NULL) return (ENXIO); girq->gi_id = its_dev->lpis.lpi_busy; girq->gi_its_dev = its_dev; its_dev->lpis.lpi_free--; its_dev->lpis.lpi_busy++; /* Map the message to the given IRQ */ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); its_cmd_mapti(dev, girq); *pic = dev; *isrcp = (struct intr_irqsrc *)girq; return (0); } static int gicv3_its_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; its_dev = its_device_find(dev, child); KASSERT(its_dev != NULL, ("gicv3_its_release_msix: Releasing a MSI-X interrupt with " "no ITS device")); KASSERT(its_dev->lpis.lpi_busy > 0, ("gicv3_its_release_msix: Releasing more interrupts than " "were allocated: allocated %d", its_dev->lpis.lpi_busy)); sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; mtx_lock_spin(&sc->sc_its_dev_lock); gicv3_its_release_irqsrc(sc, girq); mtx_unlock_spin(&sc->sc_its_dev_lock); its_dev->lpis.lpi_busy--; if (its_dev->lpis.lpi_busy == 0) its_device_release(dev, its_dev); return (0); } static int gicv3_its_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, uint64_t *addr, uint32_t *data) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; *addr = vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER; *data = girq->gi_id; return (0); } #ifdef IOMMU static int gicv3_iommu_init(device_t dev, device_t child, struct iommu_domain **domain) { struct gicv3_its_softc *sc; struct iommu_ctx *ctx; int error; sc = device_get_softc(dev); ctx = iommu_get_dev_ctx(child); error = iommu_map_msi(ctx, PAGE_SIZE, GITS_TRANSLATER, IOMMU_MAP_ENTRY_WRITE, IOMMU_MF_CANWAIT, &sc->ma); *domain = iommu_get_ctx_domain(ctx); return (error); } static void gicv3_iommu_deinit(device_t dev, device_t child) { struct iommu_ctx *ctx; ctx = iommu_get_dev_ctx(child); iommu_unmap_msi(ctx); } #endif /* * Commands handling. */ static __inline void cmd_format_command(struct its_cmd *cmd, uint8_t cmd_type) { /* Command field: DW0 [7:0] */ cmd->cmd_dword[0] &= htole64(~CMD_COMMAND_MASK); cmd->cmd_dword[0] |= htole64(cmd_type); } static __inline void cmd_format_devid(struct its_cmd *cmd, uint32_t devid) { /* Device ID field: DW0 [63:32] */ cmd->cmd_dword[0] &= htole64(~CMD_DEVID_MASK); cmd->cmd_dword[0] |= htole64((uint64_t)devid << CMD_DEVID_SHIFT); } static __inline void cmd_format_size(struct its_cmd *cmd, uint16_t size) { /* Size field: DW1 [4:0] */ cmd->cmd_dword[1] &= htole64(~CMD_SIZE_MASK); cmd->cmd_dword[1] |= htole64((size & CMD_SIZE_MASK)); } static __inline void cmd_format_id(struct its_cmd *cmd, uint32_t id) { /* ID field: DW1 [31:0] */ cmd->cmd_dword[1] &= htole64(~CMD_ID_MASK); cmd->cmd_dword[1] |= htole64(id); } static __inline void cmd_format_pid(struct its_cmd *cmd, uint32_t pid) { /* Physical ID field: DW1 [63:32] */ cmd->cmd_dword[1] &= htole64(~CMD_PID_MASK); cmd->cmd_dword[1] |= htole64((uint64_t)pid << CMD_PID_SHIFT); } static __inline void cmd_format_col(struct its_cmd *cmd, uint16_t col_id) { /* Collection field: DW2 [16:0] */ cmd->cmd_dword[2] &= htole64(~CMD_COL_MASK); cmd->cmd_dword[2] |= htole64(col_id); } static __inline void cmd_format_target(struct its_cmd *cmd, uint64_t target) { /* Target Address field: DW2 [47:16] */ cmd->cmd_dword[2] &= htole64(~CMD_TARGET_MASK); cmd->cmd_dword[2] |= htole64(target & CMD_TARGET_MASK); } static __inline void cmd_format_itt(struct its_cmd *cmd, uint64_t itt) { /* ITT Address field: DW2 [47:8] */ cmd->cmd_dword[2] &= htole64(~CMD_ITT_MASK); cmd->cmd_dword[2] |= htole64(itt & CMD_ITT_MASK); } static __inline void cmd_format_valid(struct its_cmd *cmd, uint8_t valid) { /* Valid field: DW2 [63] */ cmd->cmd_dword[2] &= htole64(~CMD_VALID_MASK); cmd->cmd_dword[2] |= htole64((uint64_t)valid << CMD_VALID_SHIFT); } static inline bool its_cmd_queue_full(struct gicv3_its_softc *sc) { size_t read_idx, next_write_idx; /* Get the index of the next command */ next_write_idx = (sc->sc_its_cmd_next_idx + 1) % (ITS_CMDQ_SIZE / sizeof(struct its_cmd)); /* And the index of the current command being read */ read_idx = gic_its_read_4(sc, GITS_CREADR) / sizeof(struct its_cmd); /* * The queue is full when the write offset points * at the command before the current read offset. */ return (next_write_idx == read_idx); } static inline void its_cmd_sync(struct gicv3_its_softc *sc, struct its_cmd *cmd) { if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)cmd, sizeof(*cmd)); } else { /* DSB inner shareable, store */ dsb(ishst); } } static inline uint64_t its_cmd_cwriter_offset(struct gicv3_its_softc *sc, struct its_cmd *cmd) { uint64_t off; off = (cmd - sc->sc_its_cmd_base) * sizeof(*cmd); return (off); } static void its_cmd_wait_completion(device_t dev, struct its_cmd *cmd_first, struct its_cmd *cmd_last) { struct gicv3_its_softc *sc; uint64_t first, last, read; size_t us_left; sc = device_get_softc(dev); /* * XXX ARM64TODO: This is obviously a significant delay. * The reason for that is that currently the time frames for * the command to complete are not known. */ us_left = 1000000; first = its_cmd_cwriter_offset(sc, cmd_first); last = its_cmd_cwriter_offset(sc, cmd_last); for (;;) { read = gic_its_read_8(sc, GITS_CREADR); if (first < last) { if (read < first || read >= last) break; } else if (read < first && read >= last) break; if (us_left-- == 0) { /* This means timeout */ device_printf(dev, "Timeout while waiting for CMD completion.\n"); return; } DELAY(1); } } static struct its_cmd * its_cmd_alloc_locked(device_t dev) { struct gicv3_its_softc *sc; struct its_cmd *cmd; size_t us_left; sc = device_get_softc(dev); /* * XXX ARM64TODO: This is obviously a significant delay. * The reason for that is that currently the time frames for * the command to complete (and therefore free the descriptor) * are not known. */ us_left = 1000000; mtx_assert(&sc->sc_its_cmd_lock, MA_OWNED); while (its_cmd_queue_full(sc)) { if (us_left-- == 0) { /* Timeout while waiting for free command */ device_printf(dev, "Timeout while waiting for free command\n"); return (NULL); } DELAY(1); } cmd = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; sc->sc_its_cmd_next_idx++; sc->sc_its_cmd_next_idx %= ITS_CMDQ_SIZE / sizeof(struct its_cmd); return (cmd); } static uint64_t its_cmd_prepare(struct its_cmd *cmd, struct its_cmd_desc *desc) { uint64_t target; uint8_t cmd_type; u_int size; cmd_type = desc->cmd_type; target = ITS_TARGET_NONE; switch (cmd_type) { case ITS_CMD_MOVI: /* Move interrupt ID to another collection */ target = desc->cmd_desc_movi.col->col_target; cmd_format_command(cmd, ITS_CMD_MOVI); cmd_format_id(cmd, desc->cmd_desc_movi.id); cmd_format_col(cmd, desc->cmd_desc_movi.col->col_id); cmd_format_devid(cmd, desc->cmd_desc_movi.its_dev->devid); break; case ITS_CMD_SYNC: /* Wait for previous commands completion */ target = desc->cmd_desc_sync.col->col_target; cmd_format_command(cmd, ITS_CMD_SYNC); cmd_format_target(cmd, target); break; case ITS_CMD_MAPD: /* Assign ITT to device */ cmd_format_command(cmd, ITS_CMD_MAPD); cmd_format_itt(cmd, vtophys(desc->cmd_desc_mapd.its_dev->itt)); /* * Size describes number of bits to encode interrupt IDs * supported by the device minus one. * When V (valid) bit is zero, this field should be written * as zero. */ if (desc->cmd_desc_mapd.valid != 0) { size = fls(desc->cmd_desc_mapd.its_dev->lpis.lpi_num); size = MAX(1, size) - 1; } else size = 0; cmd_format_size(cmd, size); cmd_format_devid(cmd, desc->cmd_desc_mapd.its_dev->devid); cmd_format_valid(cmd, desc->cmd_desc_mapd.valid); break; case ITS_CMD_MAPC: /* Map collection to Re-Distributor */ target = desc->cmd_desc_mapc.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPC); cmd_format_col(cmd, desc->cmd_desc_mapc.col->col_id); cmd_format_valid(cmd, desc->cmd_desc_mapc.valid); cmd_format_target(cmd, target); break; case ITS_CMD_MAPTI: target = desc->cmd_desc_mapvi.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPTI); cmd_format_devid(cmd, desc->cmd_desc_mapvi.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_mapvi.id); cmd_format_pid(cmd, desc->cmd_desc_mapvi.pid); cmd_format_col(cmd, desc->cmd_desc_mapvi.col->col_id); break; case ITS_CMD_MAPI: target = desc->cmd_desc_mapi.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPI); cmd_format_devid(cmd, desc->cmd_desc_mapi.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_mapi.pid); cmd_format_col(cmd, desc->cmd_desc_mapi.col->col_id); break; case ITS_CMD_INV: target = desc->cmd_desc_inv.col->col_target; cmd_format_command(cmd, ITS_CMD_INV); cmd_format_devid(cmd, desc->cmd_desc_inv.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_inv.pid); break; case ITS_CMD_INVALL: cmd_format_command(cmd, ITS_CMD_INVALL); cmd_format_col(cmd, desc->cmd_desc_invall.col->col_id); break; default: panic("its_cmd_prepare: Invalid command: %x", cmd_type); } return (target); } static int its_cmd_send(device_t dev, struct its_cmd_desc *desc) { struct gicv3_its_softc *sc; struct its_cmd *cmd, *cmd_sync, *cmd_write; struct its_col col_sync; struct its_cmd_desc desc_sync; uint64_t target, cwriter; sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_cmd_lock); cmd = its_cmd_alloc_locked(dev); if (cmd == NULL) { device_printf(dev, "could not allocate ITS command\n"); mtx_unlock_spin(&sc->sc_its_cmd_lock); return (EBUSY); } target = its_cmd_prepare(cmd, desc); its_cmd_sync(sc, cmd); if (target != ITS_TARGET_NONE) { cmd_sync = its_cmd_alloc_locked(dev); if (cmd_sync != NULL) { desc_sync.cmd_type = ITS_CMD_SYNC; col_sync.col_target = target; desc_sync.cmd_desc_sync.col = &col_sync; its_cmd_prepare(cmd_sync, &desc_sync); its_cmd_sync(sc, cmd_sync); } } /* Update GITS_CWRITER */ cwriter = sc->sc_its_cmd_next_idx * sizeof(struct its_cmd); gic_its_write_8(sc, GITS_CWRITER, cwriter); cmd_write = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; mtx_unlock_spin(&sc->sc_its_cmd_lock); its_cmd_wait_completion(dev, cmd, cmd_write); return (0); } /* Handlers to send commands */ static void its_cmd_movi(device_t dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; sc = device_get_softc(dev); col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; desc.cmd_type = ITS_CMD_MOVI; desc.cmd_desc_movi.its_dev = girq->gi_its_dev; desc.cmd_desc_movi.col = col; desc.cmd_desc_movi.id = girq->gi_id; its_cmd_send(dev, &desc); } static void its_cmd_mapc(device_t dev, struct its_col *col, uint8_t valid) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_MAPC; desc.cmd_desc_mapc.col = col; /* * Valid bit set - map the collection. * Valid bit cleared - unmap the collection. */ desc.cmd_desc_mapc.valid = valid; its_cmd_send(dev, &desc); } static void its_cmd_mapti(device_t dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; u_int col_id; sc = device_get_softc(dev); col_id = CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1; col = sc->sc_its_cols[col_id]; desc.cmd_type = ITS_CMD_MAPTI; desc.cmd_desc_mapvi.its_dev = girq->gi_its_dev; desc.cmd_desc_mapvi.col = col; /* The EventID sent to the device */ desc.cmd_desc_mapvi.id = girq->gi_id; /* The physical interrupt presented to softeware */ desc.cmd_desc_mapvi.pid = girq->gi_lpi + GIC_FIRST_LPI; its_cmd_send(dev, &desc); } static void its_cmd_mapd(device_t dev, struct its_dev *its_dev, uint8_t valid) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_MAPD; desc.cmd_desc_mapd.its_dev = its_dev; desc.cmd_desc_mapd.valid = valid; its_cmd_send(dev, &desc); } static void its_cmd_inv(device_t dev, struct its_dev *its_dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; sc = device_get_softc(dev); col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; desc.cmd_type = ITS_CMD_INV; /* The EventID sent to the device */ desc.cmd_desc_inv.pid = girq->gi_id; desc.cmd_desc_inv.its_dev = its_dev; desc.cmd_desc_inv.col = col; its_cmd_send(dev, &desc); } static void its_cmd_invall(device_t dev, struct its_col *col) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_INVALL; desc.cmd_desc_invall.col = col; its_cmd_send(dev, &desc); } #ifdef FDT static device_probe_t gicv3_its_fdt_probe; static device_attach_t gicv3_its_fdt_attach; static device_method_t gicv3_its_fdt_methods[] = { /* Device interface */ DEVMETHOD(device_probe, gicv3_its_fdt_probe), DEVMETHOD(device_attach, gicv3_its_fdt_attach), /* End */ DEVMETHOD_END }; #define its_baseclasses its_fdt_baseclasses DEFINE_CLASS_1(its, gicv3_its_fdt_driver, gicv3_its_fdt_methods, sizeof(struct gicv3_its_softc), gicv3_its_driver); #undef its_baseclasses static devclass_t gicv3_its_fdt_devclass; EARLY_DRIVER_MODULE(its_fdt, gic, gicv3_its_fdt_driver, gicv3_its_fdt_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); static int gicv3_its_fdt_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its")) return (ENXIO); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); return (BUS_PROBE_DEFAULT); } static int gicv3_its_fdt_attach(device_t dev) { struct gicv3_its_softc *sc; phandle_t xref; int err; sc = device_get_softc(dev); sc->dev = dev; err = gicv3_its_attach(dev); if (err != 0) return (err); /* Register this device as a interrupt controller */ xref = OF_xref_from_node(ofw_bus_get_node(dev)); sc->sc_pic = intr_pic_register(dev, xref); intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); /* Register this device to handle MSI interrupts */ intr_msi_register(dev, xref); return (0); } #endif #ifdef DEV_ACPI static device_probe_t gicv3_its_acpi_probe; static device_attach_t gicv3_its_acpi_attach; static device_method_t gicv3_its_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, gicv3_its_acpi_probe), DEVMETHOD(device_attach, gicv3_its_acpi_attach), /* End */ DEVMETHOD_END }; #define its_baseclasses its_acpi_baseclasses DEFINE_CLASS_1(its, gicv3_its_acpi_driver, gicv3_its_acpi_methods, sizeof(struct gicv3_its_softc), gicv3_its_driver); #undef its_baseclasses static devclass_t gicv3_its_acpi_devclass; EARLY_DRIVER_MODULE(its_acpi, gic, gicv3_its_acpi_driver, gicv3_its_acpi_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); static int gicv3_its_acpi_probe(device_t dev) { if (gic_get_bus(dev) != GIC_BUS_ACPI) return (EINVAL); if (gic_get_hw_rev(dev) < 3) return (EINVAL); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); return (BUS_PROBE_DEFAULT); } static int gicv3_its_acpi_attach(device_t dev) { struct gicv3_its_softc *sc; struct gic_v3_devinfo *di; int err; sc = device_get_softc(dev); sc->dev = dev; err = gicv3_its_attach(dev); if (err != 0) return (err); di = device_get_ivars(dev); sc->sc_pic = intr_pic_register(dev, di->msi_xref); intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); /* Register this device to handle MSI interrupts */ intr_msi_register(dev, di->msi_xref); return (0); } #endif diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index 4eebfe219934..05e8c8efe5b4 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -1,944 +1,942 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_acpi.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef DEV_ACPI #include #include #endif #ifdef FDT #include #include #include #include #endif #include #include "pic_if.h" #define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */ /* don't panic if one fails to start */ static uint32_t mp_quirks; #ifdef FDT static struct { const char *compat; uint32_t quirks; } fdt_quirks[] = { { "arm,foundation-aarch64", MP_QUIRK_CPULIST }, { "arm,fvp-base", MP_QUIRK_CPULIST }, /* This is incorrect in some DTS files */ { "arm,vfp-base", MP_QUIRK_CPULIST }, { NULL, 0 }, }; #endif typedef void intr_ipi_send_t(void *, cpuset_t, u_int); typedef void intr_ipi_handler_t(void *); #define INTR_IPI_NAMELEN (MAXCOMLEN + 1) struct intr_ipi { intr_ipi_handler_t * ii_handler; void * ii_handler_arg; intr_ipi_send_t * ii_send; void * ii_send_arg; char ii_name[INTR_IPI_NAMELEN]; u_long * ii_count; }; static struct intr_ipi ipi_sources[INTR_IPI_COUNT]; static struct intr_ipi *intr_ipi_lookup(u_int); static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, void *); static void ipi_ast(void *); static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); struct pcb stoppcbs[MAXCPU]; #ifdef FDT static u_int fdt_cpuid; #endif void mpentry(unsigned long cpuid); void init_secondary(uint64_t); /* Synchronize AP startup. */ static struct mtx ap_boot_mtx; /* Stacks for AP initialization, discarded once idle threads are started. */ void *bootstack; static void *bootstacks[MAXCPU]; /* Count of started APs, used to synchronize access to bootstack. */ static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static bool is_boot_cpu(uint64_t target_cpu) { return (cpuid_to_pcpu[0]->pc_mpidr == (target_cpu & CPU_AFF_MASK)); } static void release_aps(void *dummy __unused) { int i, started; /* Only release CPUs if they exist */ if (mp_ncpus == 1) return; intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL); intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL); intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL); intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ __asm __volatile( "dsb ishst \n" "sev \n" ::: "memory"); printf("Release APs..."); started = 0; for (i = 0; i < 2000; i++) { if (smp_started) { printf("done\n"); return; } /* * Don't time out while we are making progress. Some large * systems can take a while to start all CPUs. */ if (smp_cpus > started) { i = 0; started = smp_cpus; } DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t cpu) { struct pcpu *pcpup; pmap_t pmap0; u_int mpidr; /* * Verify that the value passed in 'cpu' argument (aka context_id) is * valid. Some older U-Boot based PSCI implementations are buggy, * they can pass random value in it. */ mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK; if (cpu >= MAXCPU || cpuid_to_pcpu[cpu] == NULL || cpuid_to_pcpu[cpu]->pc_mpidr != mpidr) { for (cpu = 0; cpu < mp_maxid; cpu++) if (cpuid_to_pcpu[cpu] != NULL && cpuid_to_pcpu[cpu]->pc_mpidr == mpidr) break; if ( cpu >= MAXCPU) panic("MPIDR for this CPU is not in pcpu table"); } pcpup = cpuid_to_pcpu[cpu]; /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. * * We need this before signalling the CPU is ready to * let the boot CPU use the results. */ pcpup->pc_midr = get_midr(); identify_cpu(cpu); /* Ensure the stores in identify_cpu have completed */ atomic_thread_fence_acq_rel(); /* Signal the BSP and spin until it has released all APs. */ atomic_add_int(&aps_started, 1); while (!atomic_load_int(&aps_ready)) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; schedinit_ap(); /* Initialize curpmap to match TTBR0's current setting. */ pmap0 = vmspace_pmap(&vmspace0); KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); pcpup->pc_curpmap = pmap0; install_cpu_errata(); intr_pic_init_secondary(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); #ifdef VFP vfp_init(); #endif dbg_init(); pan_enable(); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); kcsan_cpu_init(cpu); /* * Assert that smp_after_idle_runnable condition is reasonable. */ MPASS(PCPU_GET(curpcb) == NULL); /* Enter the scheduler */ sched_ap_entry(); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } static void smp_after_idle_runnable(void *arg __unused) { struct pcpu *pc; int cpu; for (cpu = 1; cpu < mp_ncpus; cpu++) { if (bootstacks[cpu] != NULL) { pc = pcpu_find(cpu); while (atomic_load_ptr(&pc->pc_curpcb) == NULL) cpu_spinwait(); kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); } } } SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, smp_after_idle_runnable, NULL); /* * Send IPI thru interrupt controller. */ static void pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) { KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); /* * Ensure that this CPU's stores will be visible to IPI * recipients before starting to send the interrupts. */ dsb(ishst); PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi); } /* * Setup IPI handler on interrupt controller. * * Not SMP coherent. */ static void intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand, void *arg) { struct intr_irqsrc *isrc; struct intr_ipi *ii; int error; KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi)); error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc); if (error != 0) return; isrc->isrc_handlers++; ii = intr_ipi_lookup(ipi); KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi)); ii->ii_handler = hand; ii->ii_handler_arg = arg; ii->ii_send = pic_ipi_send; ii->ii_send_arg = isrc; strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN); ii->ii_count = intr_ipi_setup_counters(name); PIC_ENABLE_INTR(intr_irq_root_dev, isrc); } static void intr_ipi_send(cpuset_t cpus, u_int ipi) { struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); ii->ii_send(ii->ii_send_arg, cpus, ipi); } static void ipi_ast(void *dummy __unused) { CTR0(KTR_SMP, "IPI_AST"); } static void ipi_hardclock(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); } static void ipi_preempt(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); } static void ipi_rendezvous(void *dummy __unused) { CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); } static void ipi_stop(void *dummy __unused) { u_int cpu; CTR0(KTR_SMP, "IPI_STOP"); cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); #ifdef DDB dbg_register_sync(NULL); #endif CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); } struct cpu_group * cpu_topo(void) { struct cpu_group *dom, *root; int i; root = smp_topo_alloc(1); dom = smp_topo_alloc(vm_ndomains); root->cg_parent = NULL; root->cg_child = dom; CPU_COPY(&all_cpus, &root->cg_mask); root->cg_count = mp_ncpus; root->cg_children = vm_ndomains; root->cg_level = CG_SHARE_NONE; root->cg_flags = 0; /* * Redundant layers will be collapsed by the caller so we don't need a * special case for a single domain. */ for (i = 0; i < vm_ndomains; i++, dom++) { dom->cg_parent = root; dom->cg_child = NULL; CPU_COPY(&cpuset_domain[i], &dom->cg_mask); dom->cg_count = CPU_COUNT(&dom->cg_mask); dom->cg_children = 0; dom->cg_level = CG_SHARE_L3; dom->cg_flags = 0; } return (root); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */ return (1); } /* * Starts a given CPU. If the CPU is already running, i.e. it is the boot CPU, * do nothing. Returns true if the CPU is present and running. */ static bool start_cpu(u_int cpuid, uint64_t target_cpu, int domain) { struct pcpu *pcpup; vm_paddr_t pa; int err, naps; /* Check we are able to start this cpu */ if (cpuid > mp_maxid) return (false); /* Skip boot CPU */ if (is_boot_cpu(target_cpu)) return (true); KASSERT(cpuid < MAXCPU, ("Too many CPUs")); pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain), sizeof(*pcpup), M_WAITOK | M_ZERO); pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset( DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); bootstacks[cpuid] = (void *)kmem_malloc_domainset( DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO); naps = atomic_load_int(&aps_started); bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* * Panic here if INVARIANTS are enabled and PSCI failed to * start the requested CPU. psci_cpu_on() returns PSCI_MISSING * to indicate we are unable to use it to start the given CPU. */ KASSERT(err == PSCI_MISSING || (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, ("Failed to start CPU %u (%lx), error %d\n", cpuid, target_cpu, err)); pcpu_destroy(pcpup); kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); bootstacks[cpuid] = NULL; mp_ncpus--; return (false); } /* Wait for the AP to switch to its boot stack. */ while (atomic_load_int(&aps_started) < naps + 1) cpu_spinwait(); CPU_SET(cpuid, &all_cpus); return (true); } #ifdef DEV_ACPI static void madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cpuid; u_int id; int domain; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; cpuid = arg; if (is_boot_cpu(intr->ArmMpidr)) id = 0; else id = *cpuid; domain = 0; #ifdef NUMA if (vm_ndomains > 1) domain = acpi_pxm_get_cpu_locality(intr->Uid); #endif if (start_cpu(id, intr->ArmMpidr, domain)) { MPASS(cpuid_to_pcpu[id] != NULL); cpuid_to_pcpu[id]->pc_acpi_id = intr->Uid; /* * Don't increment for the boot CPU, its CPU ID is * reserved. */ if (!is_boot_cpu(intr->ArmMpidr)) (*cpuid)++; } break; default: break; } } static void cpu_init_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cpuid; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return; madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return; } /* Boot CPU is always 0 */ cpuid = 1; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, madt_handler, &cpuid); acpi_unmap_table(madt); #if MAXMEMDOM > 1 acpi_pxm_set_cpu_locality(); #endif } #endif #ifdef FDT static boolean_t start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t target_cpu; int domain; int cpuid; target_cpu = reg[0]; if (addr_size == 2) { target_cpu <<= 32; target_cpu |= reg[1]; } if (is_boot_cpu(target_cpu)) cpuid = 0; else cpuid = fdt_cpuid; if (!start_cpu(cpuid, target_cpu, 0)) return (FALSE); /* * Don't increment for the boot CPU, its CPU ID is reserved. */ if (!is_boot_cpu(target_cpu)) fdt_cpuid++; /* Try to read the numa node of this cpu */ if (vm_ndomains == 1 || OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0) domain = 0; cpuid_to_pcpu[cpuid]->pc_domain = domain; if (domain < MAXMEMDOM) CPU_SET(cpuid, &cpuset_domain[domain]); return (TRUE); } static void cpu_init_fdt(void) { phandle_t node; int i; node = OF_peer(0); for (i = 0; fdt_quirks[i].compat != NULL; i++) { if (ofw_bus_node_is_compatible(node, fdt_quirks[i].compat) != 0) { mp_quirks = fdt_quirks[i].quirks; } } fdt_cpuid = 1; ofw_cpu_early_foreach(start_cpu_fdt, true); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* CPU 0 is always boot CPU. */ CPU_SET(0, &all_cpus); cpuid_to_pcpu[0]->pc_mpidr = READ_SPECIALREG(mpidr_el1) & CPU_AFF_MASK; switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: mp_quirks = MP_QUIRK_CPULIST; cpu_init_acpi(); break; #endif #ifdef FDT case ARM64_BUS_FDT: cpu_init_fdt(); break; #endif default: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } #ifdef DEV_ACPI static void cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { - ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cores = arg; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: - intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; (*cores)++; break; default: break; } } static u_int cpu_count_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cores; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return (0); madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return (0); } cores = 0; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, cpu_count_acpi_handler, &cores); acpi_unmap_table(madt); return (cores); } #endif void cpu_mp_setmaxid(void) { int cores; mp_ncpus = 1; mp_maxid = 0; switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: cores = cpu_count_acpi(); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the ACPI tables\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; } break; #endif #ifdef FDT case ARM64_BUS_FDT: cores = ofw_cpu_early_foreach(NULL, false); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the device tree\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; } break; #endif default: if (bootverbose) printf("No CPU data, limiting to 1 core\n"); break; } if (TUNABLE_INT_FETCH("hw.ncpu", &cores)) { if (cores > 0 && cores < mp_ncpus) { mp_ncpus = cores; mp_maxid = cores - 1; } } } /* * Lookup IPI source. */ static struct intr_ipi * intr_ipi_lookup(u_int ipi) { if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); return (&ipi_sources[ipi]); } /* * interrupt controller dispatch function for IPIs. It should * be called straight from the interrupt controller, when associated * interrupt source is learned. Or from anybody who has an interrupt * source mapped. */ void intr_ipi_dispatch(u_int ipi, struct trapframe *tf) { void *arg; struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid)); /* * Supply ipi filter with trapframe argument * if none is registered. */ arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf; ii->ii_handler(arg); } #ifdef notyet /* * Map IPI into interrupt controller. * * Not SMP coherent. */ static int ipi_map(struct intr_irqsrc *isrc, u_int ipi) { boolean_t is_percpu; int error; if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); isrc->isrc_type = INTR_ISRCT_NAMESPACE; isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI; isrc->isrc_nspc_num = ipi_next_num; error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu); if (error == 0) { isrc->isrc_dev = intr_irq_root_dev; ipi_next_num++; } return (error); } /* * Setup IPI handler to interrupt source. * * Note that there could be more ways how to send and receive IPIs * on a platform like fast interrupts for example. In that case, * one can call this function with ASIF_NOALLOC flag set and then * call intr_ipi_dispatch() when appropriate. * * Not SMP coherent. */ int intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter, void *arg, u_int flags) { struct intr_irqsrc *isrc; int error; if (filter == NULL) return(EINVAL); isrc = intr_ipi_lookup(ipi); if (isrc->isrc_ipifilter != NULL) return (EEXIST); if ((flags & AISHF_NOALLOC) == 0) { error = ipi_map(isrc, ipi); if (error != 0) return (error); } isrc->isrc_ipifilter = filter; isrc->isrc_arg = arg; isrc->isrc_handlers = 1; isrc->isrc_count = intr_ipi_setup_counters(name); isrc->isrc_index = 0; /* it should not be used in IPI case */ if (isrc->isrc_dev != NULL) { PIC_ENABLE_INTR(isrc->isrc_dev, isrc); PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc); } return (0); } #endif /* Sending IPI */ void ipi_all_but_self(u_int ipi) { cpuset_t cpus; cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); intr_ipi_send(cpus, ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); }