diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c --- a/sys/arm64/arm64/genassym.c +++ b/sys/arm64/arm64/genassym.c @@ -76,3 +76,4 @@ ASSYM(TF_SPSR, offsetof(struct trapframe, tf_spsr)); ASSYM(TF_ESR, offsetof(struct trapframe, tf_esr)); ASSYM(TF_X, offsetof(struct trapframe, tf_x)); +ASSYM(CPU_AFF_MASK, CPU_AFF_MASK); diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S --- a/sys/arm64/arm64/locore.S +++ b/sys/arm64/arm64/locore.S @@ -191,6 +191,33 @@ END(_start) #ifdef SMP +/* + * mpentry_spin(void) + * + * Called by a core when it's brought online via spin-table method. Since + * u-boot fills `x0` with the entry pointer (here), we ignore it, and check + * validity from `ap_cpuid` set by the BSP. Each core will loop and check for + * its cpuid, matching against a masked `mpidr_el1` value. It's assumed that + * `ap_cpuid` is filled with a value that matches the masked `mpidr_el1` + * (CPU_AFF_MASK), provided by the device tree. + * + * `x0` ends as the cpu number (contents of `ap_cpuid`), which mpentry() + * expects. + */ +ENTRY(mpentry_spin) + mrs x1, mpidr_el1 + ldr x2, =CPU_AFF_MASK + and x1, x1, x2 + adrp x2, ap_cpuid + add x2, x2, :lo12:ap_cpuid +1: + dc cvac, x2 + ldr x0, [x2] + cmp x0, x1 + mov x0, #0 + beq mpentry + b 1b +END(mpentry_spin) /* * mpentry(unsigned long) * diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -107,6 +107,7 @@ #endif void mpentry(unsigned long cpuid); +void mpentry_spin(unsigned long cpuid); void init_secondary(uint64_t); /* Synchronize AP startup. */ @@ -115,6 +116,8 @@ /* Used to initialize the PCPU ahead of calling init_secondary(). */ void *bootpcpu; +uint64_t ap_cpuid; + /* Stacks for AP initialization, discarded once idle threads are started. */ void *bootstack; static void *bootstacks[MAXCPU]; @@ -419,13 +422,21 @@ enable_cpu_spin(uint64_t cpu, vm_paddr_t entry, vm_paddr_t release_paddr) { vm_paddr_t *release_addr; + vm_offset_t release_vaddr; - release_addr = pmap_mapdev(release_paddr, sizeof(*release_addr)); - if (release_addr == NULL) + cpu_dcache_wb_range(&bootpcpu, sizeof(bootpcpu)); + ap_cpuid = cpu & CPU_AFF_MASK; + cpu_dcache_wb_range(&ap_cpuid, sizeof(ap_cpuid)); + release_vaddr = kva_alloc(PAGE_SIZE); + if (release_vaddr == 0) return (ENOMEM); + pmap_kenter(release_vaddr, PAGE_SIZE, trunc_page(release_paddr), + VM_MEMATTR_DEFAULT); + release_addr = (vm_paddr_t *)(release_vaddr + (release_paddr & PAGE_MASK)); *release_addr = entry; - pmap_unmapdev(release_addr, sizeof(*release_addr)); + pmap_kremove(release_vaddr); + kva_free(release_vaddr, PAGE_SIZE); __asm __volatile( "dsb sy \n" @@ -475,7 +486,10 @@ bootstack = (char *)bootstacks[cpuid] + MP_BOOTSTACK_SIZE; printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); - pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); + if (release_addr != 0) + pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry_spin); + else + pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); /* * A limited set of hardware we support can only do spintables and @@ -595,9 +609,9 @@ static bool start_cpu_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { + char table_prop[sizeof("spin-table")]; uint64_t target_cpu; vm_paddr_t release_addr; - char *enable_method; int domain; int cpuid; @@ -619,16 +633,14 @@ */ release_addr = 0; if (!psci_present && cpuid != 0) { - if (OF_getprop_alloc(node, "enable-method", - (void **)&enable_method) <= 0) + if (OF_getprop(node, "enable-method", + (void *)table_prop, sizeof(table_prop)) <= 0) return (false); - if (strcmp(enable_method, "spin-table") != 0) { - OF_prop_free(enable_method); + if (strcmp(table_prop, "spin-table") != 0) { return (false); } - OF_prop_free(enable_method); populate_release_addr(node, &release_addr); if (release_addr == 0) { printf("Failed to fetch release address for CPU %u",