Index: sys/arm64/arm64/locore.S =================================================================== --- sys/arm64/arm64/locore.S +++ sys/arm64/arm64/locore.S @@ -214,11 +214,10 @@ br x15 mp_virtdone: - ldr x4, =secondary_stacks - mov x5, #(PAGE_SIZE * KSTACK_PAGES) - mul x5, x0, x5 - add sp, x4, x5 - + /* Start using the AP boot stack */ + ldr x4, =bootstack + ldr x4, [x4] + mov sp, x4 b init_secondary END(mpentry) #endif Index: sys/arm64/arm64/mp_machdep.c =================================================================== --- sys/arm64/arm64/mp_machdep.c +++ sys/arm64/arm64/mp_machdep.c @@ -123,7 +123,6 @@ static void ipi_rendezvous(void *); static void ipi_stop(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; /* @@ -136,10 +135,18 @@ void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +/* Synchronize AP startup. */ +static struct mtx ap_boot_mtx; + +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; @@ -205,14 +212,14 @@ "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* Initialize curpmap to match TTBR0's current setting. */ pmap0 = vmspace_pmap(&vmspace0); @@ -250,6 +257,11 @@ kcsan_cpu_init(cpu); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); @@ -257,6 +269,24 @@ /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + /* * Send IPI thru interrupt controller. */ @@ -391,7 +421,7 @@ struct pcpu *pcpup; vm_paddr_t pa; u_int cpuid; - int err; + int err, naps; /* Check we are able to start this cpu */ if (id > mp_maxid) @@ -405,7 +435,7 @@ /* * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other - * CPUs ordered as the are likely grouped into clusters so it can be + * CPUs ordered as they are likely grouped into clusters so it can be * useful to keep that property, e.g. for the GICv3 driver to send * an IPI to all CPUs in the cluster. */ @@ -420,29 +450,41 @@ dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); - err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* * Panic here if INVARIANTS are enabled and PSCI failed to - * start the requested CPU. If psci_cpu_on returns PSCI_MISSING + * start the requested CPU. psci_cpu_on() returns PSCI_MISSING * to indicate we are unable to use it to start the given CPU. */ KASSERT(err == PSCI_MISSING || (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, - ("Failed to start CPU %u (%lx)\n", id, target_cpu)); + ("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err)); pcpu_destroy(pcpup); kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; + kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); + bootstacks[cpuid] = NULL; mp_ncpus--; /* Notify the user that the CPU failed to start */ - printf("Failed to start CPU %u (%lx)\n", id, target_cpu); - } else + printf("Failed to start CPU %u (%lx), error %d\n", + id, target_cpu, err); + } else { + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); + } return (true); } Index: sys/riscv/riscv/locore.S =================================================================== --- sys/riscv/riscv/locore.S +++ sys/riscv/riscv/locore.S @@ -301,14 +301,8 @@ beqz t1, 1b /* Setup stack pointer */ - lla t0, secondary_stacks - li t1, (PAGE_SIZE * KSTACK_PAGES) - mulw t2, t1, a0 - add t0, t0, t2 - add t0, t0, t1 - sub t0, t0, s9 - li t1, KERNBASE - add sp, t0, t1 + lla t0, bootstack + ld sp, 0(t0) /* Setup supervisor trap vector */ lla t0, mpva Index: sys/riscv/riscv/mp_machdep.c =================================================================== --- sys/riscv/riscv/mp_machdep.c +++ sys/riscv/riscv/mp_machdep.c @@ -87,7 +87,6 @@ static int ipi_handler(void *); -struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; extern uint32_t boot_hart; @@ -98,13 +97,19 @@ #endif static device_t cpu_list[MAXCPU]; -void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +static struct mtx ap_boot_mtx; + +/* Stacks for AP initialization, discarded once idle threads are started. */ +void *bootstack; +static void *bootstacks[MAXCPU]; + +/* Count of started APs, used to synchronize access to bootstack. */ +static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ -volatile int aps_ready = 0; +static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; @@ -233,14 +238,14 @@ csr_set(sie, SIE_SSIE); csr_set(sip, SIE_SSIE); - /* Spin until the BSP releases the APs */ - while (!aps_ready) + /* Signal the BSP and spin until it has released all APs. */ + atomic_add_int(&aps_started, 1); + while (!atomic_load_int(&aps_ready)) __asm __volatile("wfi"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; - pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; /* * Identify current CPU. This is necessary to setup @@ -274,6 +279,11 @@ mtx_unlock_spin(&ap_boot_mtx); + /* + * Assert that smp_after_idle_runnable condition is reasonable. + */ + MPASS(PCPU_GET(curpcb) == NULL); + /* Enter the scheduler */ sched_throw(NULL); @@ -281,6 +291,24 @@ /* NOTREACHED */ } +static void +smp_after_idle_runnable(void *arg __unused) +{ + struct pcpu *pc; + int cpu; + + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if (bootstacks[cpu] != NULL) { + pc = pcpu_find(cpu); + while (atomic_load_ptr(&pc->pc_curpcb) == NULL) + cpu_spinwait(); + kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); + } + } +} +SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, + smp_after_idle_runnable, NULL); + static int ipi_handler(void *arg) { @@ -373,6 +401,7 @@ struct pcpu *pcpup; uint64_t hart; u_int cpuid; + int naps; /* Check if this hart supports MMU. */ if (OF_getproplen(node, "mmu-type") < 0) @@ -419,8 +448,17 @@ dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); + bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); + + naps = atomic_load_int(&aps_started); + bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; + printf("Starting CPU %u (hart %lx)\n", cpuid, hart); - __riscv_boot_ap[hart] = 1; + atomic_store_32(&__riscv_boot_ap[hart], 1); + + /* Wait for the AP to switch to its boot stack. */ + while (atomic_load_int(&aps_started) < naps + 1) + cpu_spinwait(); CPU_SET(cpuid, &all_cpus); CPU_SET(hart, &all_harts);