Index: sys/arm64/arm64/mp_machdep.c =================================================================== --- sys/arm64/arm64/mp_machdep.c +++ sys/arm64/arm64/mp_machdep.c @@ -146,6 +146,13 @@ /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready; +static vm_offset_t pcpu_mem[MAXMEMDOM]; +static vm_offset_t pcpu_off[MAXMEMDOM]; +#ifdef INVARIANTS +static int pcpu_alloc_id; +vm_offset_t pcpu_allocations[MAXCPU]; +#endif + /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; @@ -484,6 +491,49 @@ return (1); } +/* + * Allocate memory for the PCPU and DPCPU data. We allocate a level 2 + * sized and aligned region so later allocations won't cause this memory + * to be promoted to a level 2 block. This can cause recursive exceptions + * as the exception handler needs to dereference the PCPU region and + * the promotion code needs to temporary mark the mapping invalid in the + * page table before marking the new level 2 block valid. This is normally + * fine as this is fast so the memory has been remapped by the time the + * exception handler is run, however when running on a VM we may exit to the + * host while this mapping is invalid meaning the memory is not remapped + * in time. + * + * Work around this by making sure the allocation takes an entire level 2 + * block so won't be promoted. To reduce the wasted memory reuse this for + * as many CPUs in the same comain as possible. + */ +static vm_offset_t +alloc_pcpu(int domain, vm_size_t size) +{ + vm_offset_t addr; + + domain = domain % MAXMEMDOM; + + if (pcpu_mem[domain] == 0 || + (pcpu_off[domain] + size) >= (pcpu_mem[domain] + L2_SIZE)) { + pcpu_mem[domain] = kmem_alloc_contig_domainset( + DOMAINSET_PREF(domain), L2_SIZE, M_WAITOK | M_ZERO, + 0, ~(vm_paddr_t)0, L2_SIZE, 0, VM_MEMATTR_DEFAULT); + pcpu_off[domain] = pcpu_mem[domain]; +#ifdef INVARIANTS + /* Record the allocation to check we don't promote it */ + pcpu_allocations[pcpu_alloc_id] = pcpu_mem[domain]; + pcpu_alloc_id++; +#endif + } + + addr = pcpu_off[domain]; + /* Align the next allocation */ + pcpu_off[domain] = roundup2(addr + size, _Alignof(void *)); + + return (addr); +} + /* * Starts a given CPU. If the CPU is already running, i.e. it is the boot CPU, * do nothing. Returns true if the CPU is present and running. @@ -505,13 +555,11 @@ KASSERT(cpuid < MAXCPU, ("Too many CPUs")); - pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain), - sizeof(*pcpup), M_WAITOK | M_ZERO); + pcpup = (void *)alloc_pcpu(domain, sizeof(*pcpup)); pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; - dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset( - DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO); + dpcpu[cpuid - 1] = (void *)alloc_pcpu(domain, DPCPU_SIZE); dpcpu_init(dpcpu[cpuid - 1], cpuid); bootstacks[cpuid] = (void *)kmem_malloc_domainset( @@ -535,7 +583,6 @@ cpuid, target_cpu, err)); pcpu_destroy(pcpup); - kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); bootstacks[cpuid] = NULL; Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -3639,6 +3639,20 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); +#ifdef INVARIANTS + for (int i = 0; i < MAXCPU && pcpu_allocations[i] != 0; i++) { + if (va < pcpu_allocations[i]) { + KASSERT(va + size < pcpu_allocations[i], + ("%s: Updating start of pcpu: %lx-%lx %lx", + __func__, va, va + size, pcpu_allocations[i])); + } else { + KASSERT(va >= pcpu_allocations[i] + L2_SIZE, + ("%s: Updating end of pcpu: %lx-%lx %lx", __func__, + va, va+size, pcpu_allocations[i] + L2_SIZE)); + } + } +#endif + /* * Ensure we don't get switched out with the page table in an * inconsistent state. We also need to ensure no interrupts fire Index: sys/arm64/include/machdep.h =================================================================== --- sys/arm64/include/machdep.h +++ sys/arm64/include/machdep.h @@ -48,6 +48,11 @@ ARM64_BUS_ACPI, }; +/* Used by pmap.c to check we don't promote/demote any pcpu memory */ +#ifdef INVARIANTS +extern vm_offset_t pcpu_allocations[MAXCPU]; +#endif + extern enum arm64_bus arm64_bus_method; void dbg_init(void);