Changeset View
Changeset View
Standalone View
Standalone View
head/sys/arm64/arm64/mp_machdep.c
Show First 20 Lines • Show All 117 Lines • ▼ Show 20 Lines | static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, | ||||
void *); | void *); | ||||
static void ipi_ast(void *); | static void ipi_ast(void *); | ||||
static void ipi_hardclock(void *); | static void ipi_hardclock(void *); | ||||
static void ipi_preempt(void *); | static void ipi_preempt(void *); | ||||
static void ipi_rendezvous(void *); | static void ipi_rendezvous(void *); | ||||
static void ipi_stop(void *); | static void ipi_stop(void *); | ||||
struct mtx ap_boot_mtx; | |||||
struct pcb stoppcbs[MAXCPU]; | struct pcb stoppcbs[MAXCPU]; | ||||
/* | /* | ||||
* Not all systems boot from the first CPU in the device tree. To work around | * Not all systems boot from the first CPU in the device tree. To work around | ||||
* this we need to find which CPU we have booted from so when we later | * this we need to find which CPU we have booted from so when we later | ||||
* enable the secondary CPUs we skip this one. | * enable the secondary CPUs we skip this one. | ||||
*/ | */ | ||||
static int cpu0 = -1; | static int cpu0 = -1; | ||||
void mpentry(unsigned long cpuid); | void mpentry(unsigned long cpuid); | ||||
void init_secondary(uint64_t); | void init_secondary(uint64_t); | ||||
uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); | /* Synchronize AP startup. */ | ||||
static struct mtx ap_boot_mtx; | |||||
/* Stacks for AP initialization, discarded once idle threads are started. */ | |||||
void *bootstack; | |||||
static void *bootstacks[MAXCPU]; | |||||
/* Count of started APs, used to synchronize access to bootstack. */ | |||||
static volatile int aps_started; | |||||
/* Set to 1 once we're ready to let the APs out of the pen. */ | /* Set to 1 once we're ready to let the APs out of the pen. */ | ||||
volatile int aps_ready = 0; | static volatile int aps_ready; | ||||
/* Temporary variables for init_secondary() */ | /* Temporary variables for init_secondary() */ | ||||
void *dpcpu[MAXCPU - 1]; | void *dpcpu[MAXCPU - 1]; | ||||
static void | static void | ||||
release_aps(void *dummy __unused) | release_aps(void *dummy __unused) | ||||
{ | { | ||||
int i, started; | int i, started; | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | init_secondary(uint64_t cpu) | ||||
/* | /* | ||||
* Set the pcpu pointer with a backup in tpidr_el1 to be | * Set the pcpu pointer with a backup in tpidr_el1 to be | ||||
* loaded when entering the kernel from userland. | * loaded when entering the kernel from userland. | ||||
*/ | */ | ||||
__asm __volatile( | __asm __volatile( | ||||
"mov x18, %0 \n" | "mov x18, %0 \n" | ||||
"msr tpidr_el1, %0" :: "r"(pcpup)); | "msr tpidr_el1, %0" :: "r"(pcpup)); | ||||
/* Spin until the BSP releases the APs */ | /* Signal the BSP and spin until it has released all APs. */ | ||||
while (!aps_ready) | atomic_add_int(&aps_started, 1); | ||||
while (!atomic_load_int(&aps_ready)) | |||||
__asm __volatile("wfe"); | __asm __volatile("wfe"); | ||||
/* Initialize curthread */ | /* Initialize curthread */ | ||||
KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); | KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); | ||||
pcpup->pc_curthread = pcpup->pc_idlethread; | pcpup->pc_curthread = pcpup->pc_idlethread; | ||||
pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; | |||||
/* Initialize curpmap to match TTBR0's current setting. */ | /* Initialize curpmap to match TTBR0's current setting. */ | ||||
pmap0 = vmspace_pmap(&vmspace0); | pmap0 = vmspace_pmap(&vmspace0); | ||||
KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), | KASSERT(pmap_to_ttbr0(pmap0) == READ_SPECIALREG(ttbr0_el1), | ||||
("pmap0 doesn't match cpu %ld's ttbr0", cpu)); | ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); | ||||
pcpup->pc_curpmap = pmap0; | pcpup->pc_curpmap = pmap0; | ||||
/* | /* | ||||
Show All 21 Lines | #endif | ||||
if (smp_cpus == mp_ncpus) { | if (smp_cpus == mp_ncpus) { | ||||
/* enable IPI's, tlb shootdown, freezes etc */ | /* enable IPI's, tlb shootdown, freezes etc */ | ||||
atomic_store_rel_int(&smp_started, 1); | atomic_store_rel_int(&smp_started, 1); | ||||
} | } | ||||
mtx_unlock_spin(&ap_boot_mtx); | mtx_unlock_spin(&ap_boot_mtx); | ||||
kcsan_cpu_init(cpu); | kcsan_cpu_init(cpu); | ||||
/* | |||||
* Assert that smp_after_idle_runnable condition is reasonable. | |||||
*/ | |||||
MPASS(PCPU_GET(curpcb) == NULL); | |||||
/* Enter the scheduler */ | /* Enter the scheduler */ | ||||
sched_throw(NULL); | sched_throw(NULL); | ||||
panic("scheduler returned us to init_secondary"); | panic("scheduler returned us to init_secondary"); | ||||
/* NOTREACHED */ | /* NOTREACHED */ | ||||
} | } | ||||
static void | |||||
smp_after_idle_runnable(void *arg __unused) | |||||
{ | |||||
struct pcpu *pc; | |||||
int cpu; | |||||
for (cpu = 1; cpu < mp_ncpus; cpu++) { | |||||
if (bootstacks[cpu] != NULL) { | |||||
pc = pcpu_find(cpu); | |||||
while (atomic_load_ptr(&pc->pc_curpcb) == NULL) | |||||
mmel: Hi Mark,
Unfortunately, this creates a race condition. In cpu_throw(), pc_curpcb[1] is set… | |||||
markjAuthorUnsubmitted Done Inline ActionsI see. Can you confirm whether this approach works on your hardware? https://reviews.freebsd.org/D35435 markj: I see. Can you confirm whether this approach works on your hardware? https://reviews.freebsd. | |||||
cpu_spinwait(); | |||||
kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); | |||||
} | |||||
} | |||||
} | |||||
SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, | |||||
smp_after_idle_runnable, NULL); | |||||
/* | /* | ||||
* Send IPI thru interrupt controller. | * Send IPI thru interrupt controller. | ||||
*/ | */ | ||||
static void | static void | ||||
pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) | pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) | ||||
{ | { | ||||
KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); | KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); | ||||
▲ Show 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
static bool | static bool | ||||
start_cpu(u_int id, uint64_t target_cpu) | start_cpu(u_int id, uint64_t target_cpu) | ||||
{ | { | ||||
struct pcpu *pcpup; | struct pcpu *pcpup; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
u_int cpuid; | u_int cpuid; | ||||
int err; | int err, naps; | ||||
/* Check we are able to start this cpu */ | /* Check we are able to start this cpu */ | ||||
if (id > mp_maxid) | if (id > mp_maxid) | ||||
return (false); | return (false); | ||||
KASSERT(id < MAXCPU, ("Too many CPUs")); | KASSERT(id < MAXCPU, ("Too many CPUs")); | ||||
/* We are already running on cpu 0 */ | /* We are already running on cpu 0 */ | ||||
if (id == cpu0) | if (id == cpu0) | ||||
return (true); | return (true); | ||||
/* | /* | ||||
* Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other | * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other | ||||
* CPUs ordered as the are likely grouped into clusters so it can be | * CPUs ordered as they are likely grouped into clusters so it can be | ||||
* useful to keep that property, e.g. for the GICv3 driver to send | * useful to keep that property, e.g. for the GICv3 driver to send | ||||
* an IPI to all CPUs in the cluster. | * an IPI to all CPUs in the cluster. | ||||
*/ | */ | ||||
cpuid = id; | cpuid = id; | ||||
if (cpuid < cpu0) | if (cpuid < cpu0) | ||||
cpuid += mp_maxid + 1; | cpuid += mp_maxid + 1; | ||||
cpuid -= cpu0; | cpuid -= cpu0; | ||||
pcpup = &__pcpu[cpuid]; | pcpup = &__pcpu[cpuid]; | ||||
pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); | pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); | ||||
dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); | dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); | ||||
dpcpu_init(dpcpu[cpuid - 1], cpuid); | dpcpu_init(dpcpu[cpuid - 1], cpuid); | ||||
bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); | |||||
naps = atomic_load_int(&aps_started); | |||||
bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; | |||||
printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); | printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); | ||||
pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); | pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); | ||||
err = psci_cpu_on(target_cpu, pa, cpuid); | err = psci_cpu_on(target_cpu, pa, cpuid); | ||||
if (err != PSCI_RETVAL_SUCCESS) { | if (err != PSCI_RETVAL_SUCCESS) { | ||||
/* | /* | ||||
* Panic here if INVARIANTS are enabled and PSCI failed to | * Panic here if INVARIANTS are enabled and PSCI failed to | ||||
* start the requested CPU. If psci_cpu_on returns PSCI_MISSING | * start the requested CPU. psci_cpu_on() returns PSCI_MISSING | ||||
* to indicate we are unable to use it to start the given CPU. | * to indicate we are unable to use it to start the given CPU. | ||||
*/ | */ | ||||
KASSERT(err == PSCI_MISSING || | KASSERT(err == PSCI_MISSING || | ||||
(mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, | (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, | ||||
("Failed to start CPU %u (%lx)\n", id, target_cpu)); | ("Failed to start CPU %u (%lx), error %d\n", | ||||
id, target_cpu, err)); | |||||
pcpu_destroy(pcpup); | pcpu_destroy(pcpup); | ||||
kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); | kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); | ||||
dpcpu[cpuid - 1] = NULL; | dpcpu[cpuid - 1] = NULL; | ||||
kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); | |||||
bootstacks[cpuid] = NULL; | |||||
mp_ncpus--; | mp_ncpus--; | ||||
/* Notify the user that the CPU failed to start */ | /* Notify the user that the CPU failed to start */ | ||||
printf("Failed to start CPU %u (%lx)\n", id, target_cpu); | printf("Failed to start CPU %u (%lx), error %d\n", | ||||
} else | id, target_cpu, err); | ||||
} else { | |||||
/* Wait for the AP to switch to its boot stack. */ | |||||
while (atomic_load_int(&aps_started) < naps + 1) | |||||
cpu_spinwait(); | |||||
CPU_SET(cpuid, &all_cpus); | CPU_SET(cpuid, &all_cpus); | ||||
} | |||||
return (true); | return (true); | ||||
} | } | ||||
#ifdef DEV_ACPI | #ifdef DEV_ACPI | ||||
static void | static void | ||||
madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) | madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 387 Lines • Show Last 20 Lines |
Hi Mark,
Unfortunately, this creates a race condition. In cpu_throw(), pc_curpcb[1] is set before switching to kernel stack[2] and moreover the stack is used between these two points (because pmap_switch() is implemented in C). This means that the bootstrap stack can be freed while the AP is still using it.
I've encountered this problem on my own RK3399 boards, where the primary CPU cluster (i.e. bootstrap CPU) starts at a fast clock speed, but the secondary CPU cluster starts at a very low frequency (tens of MHz)...
[1] https://cgit.freebsd.org/src/tree/sys/arm64/arm64/swtch.S#n84
[2] https://cgit.freebsd.org/src/tree/sys/arm64/arm64/swtch.S#n93