Index: sys/riscv/riscv/intr_machdep.c =================================================================== --- sys/riscv/riscv/intr_machdep.c +++ sys/riscv/riscv/intr_machdep.c @@ -60,6 +60,8 @@ #include #endif +extern uint32_t cpuid_to_hart[MAXCPU]; + void intr_irq_handler(struct trapframe *tf); struct intc_irqsrc { @@ -207,7 +209,7 @@ CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - mask = (1 << (pc->pc_cpuid)); + mask = (1 << cpuid_to_hart[pc->pc_cpuid]); sbi_send_ipi(&mask); @@ -252,7 +254,7 @@ CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); - mask |= (1 << (pc->pc_cpuid)); + mask |= (1 << (cpuid_to_hart[pc->pc_cpuid])); } } sbi_send_ipi(&mask); Index: sys/riscv/riscv/locore.S =================================================================== --- sys/riscv/riscv/locore.S +++ sys/riscv/riscv/locore.S @@ -59,11 +59,17 @@ sub t1, t1, t0 li t2, KERNBASE sub s9, t2, t1 /* s9 = physmem base */ - mv s10, a0 /* s10 = hart id */ - mv s11, a1 /* s11 = dtbp */ - /* Direct secondary cores to mpentry */ - bnez s10, mpentry + /* + * a0 = hart id + * a1 = dtbp + */ + + /* Pick a cpu to run the boot process. */ + la t0, cpu_lottery + li t1, 1 + amoadd.w t0, t1, 0(t0) + bnez t0, mpentry /* * Page tables @@ -123,7 +129,7 @@ /* Create an L2 page superpage for DTB */ la s1, pagetable_l2_devmap - mv s2, s11 + mv s2, a1 srli s2, s2, PAGE_SHIFT li t0, (PTE_KERN) @@ -171,12 +177,18 @@ addi sp, sp, -PCB_SIZE /* Clear BSS */ - la a0, _C_LABEL(__bss_start) + la s0, _C_LABEL(__bss_start) la s1, _C_LABEL(_end) 1: - sd zero, 0(a0) - addi a0, a0, 8 - bltu a0, s1, 1b + sd zero, 0(s0) + addi s0, s0, 8 + bltu s0, s1, 1b + +#ifdef SMP + /* Store boot hart id. */ + la t0, boot_hart + sw a0, 0(t0) +#endif /* Fill riscv_bootparams */ addi sp, sp, -40 @@ -190,7 +202,7 @@ li t0, (VM_MAX_KERNEL_ADDRESS - 2 * L2_SIZE) sd t0, 24(sp) /* dtbp_virt */ - sd s11, 32(sp) /* dtbp_phys */ + sd a1, 32(sp) /* dtbp_phys */ mv a0, sp call _C_LABEL(initriscv) /* Off we go */ @@ -233,9 +245,11 @@ pagetable_l2_devmap: .space PAGE_SIZE - .align 3 + .align 3 virt_map: - .quad virt_map + .quad virt_map +cpu_lottery: + .space 4 /* Not in use, but required for linking. */ .align 3 @@ -278,7 +292,8 @@ /* Setup stack pointer */ la t0, secondary_stacks li t1, (PAGE_SIZE * KSTACK_PAGES) - mulw t1, t1, s10 + mulw t2, t1, a0 + add t0, t0, t2 add t0, t0, t1 sub t0, t0, s9 li t1, KERNBASE Index: sys/riscv/riscv/mp_machdep.c =================================================================== --- sys/riscv/riscv/mp_machdep.c +++ sys/riscv/riscv/mp_machdep.c @@ -74,6 +74,8 @@ extern struct pcpu __pcpu[]; uint32_t __riscv_boot_ap[MAXCPU]; +uint32_t boot_hart; /* The hart we booted on. */ +uint32_t cpuid_to_hart[MAXCPU]; static enum { CPUS_UNKNOWN, @@ -99,7 +101,7 @@ void mpentry(unsigned long cpuid); void init_secondary(uint64_t); -uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); +uint8_t secondary_stacks[MAXCPU][PAGE_SIZE * KSTACK_PAGES] __aligned(16); /* Set to 1 once we're ready to let the APs out of the pen. */ volatile int aps_ready = 0; @@ -196,8 +198,11 @@ /* Wake up the other CPUs */ mask = 0; - for (i = 1; i < mp_ncpus; i++) - mask |= (1 << i); + for (i = 0; i < mp_ncpus; i++) { + if (cpuid_to_hart[i] == boot_hart) + continue; + mask |= (1 << cpuid_to_hart[i]); + } sbi_send_ipi(&mask); @@ -219,12 +224,19 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void -init_secondary(uint64_t cpu) +init_secondary(uint64_t hart) { struct pcpu *pcpup; + u_int cpuid; + + /* Renumber this cpu */ + cpuid = hart; + if (cpuid < boot_hart) + cpuid += mp_maxid + 1; + cpuid -= boot_hart; /* Setup the pcpu pointer */ - pcpup = &__pcpu[cpu]; + pcpup = &__pcpu[cpuid]; __asm __volatile("mv gp, %0" :: "r"(pcpup)); /* Workaround: make sure wfi doesn't halt the hart */ @@ -366,11 +378,12 @@ static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { - uint64_t target_cpu; struct pcpu *pcpup; + uint64_t hart; + u_int cpuid; - /* Check we are able to start this cpu */ - if (id > mp_maxid) + /* Check if this hart supports MMU. */ + if (OF_getproplen(node, "mmu-type") < 0) return (0); KASSERT(id < MAXCPU, ("Too many CPUs")); @@ -382,28 +395,40 @@ cpu_reg[id][1] = reg[1]; #endif - target_cpu = reg[0]; + hart = reg[0]; if (addr_size == 2) { - target_cpu <<= 32; - target_cpu |= reg[1]; + hart <<= 32; + hart |= reg[1]; } - pcpup = &__pcpu[id]; - - /* We are already running on cpu 0 */ - if (id == 0) { + /* We are already running on this cpu */ + if (hart == boot_hart) return (1); - } - pcpu_init(pcpup, id, sizeof(struct pcpu)); + /* + * Rotate the CPU IDs to put the boot CPU as CPU 0. + * We keep the other CPUs ordered. + */ + cpuid = hart; + if (cpuid < boot_hart) + cpuid += mp_maxid + 1; + cpuid -= boot_hart; + cpuid_to_hart[cpuid] = hart; + + /* Check if we are able to start this cpu */ + if (cpuid > mp_maxid) + return (0); + + pcpup = &__pcpu[cpuid]; + pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); - dpcpu[id - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); - dpcpu_init(dpcpu[id - 1], id); + dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); + dpcpu_init(dpcpu[cpuid - 1], cpuid); - printf("Starting CPU %u (%lx)\n", id, target_cpu); - __riscv_boot_ap[id] = 1; + printf("Starting CPU %u (hart %lx)\n", cpuid, hart); + __riscv_boot_ap[hart] = 1; - CPU_SET(id, &all_cpus); + CPU_SET(cpuid, &all_cpus); return (1); } @@ -416,6 +441,7 @@ mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); + cpuid_to_hart[0] = boot_hart; CPU_SET(0, &all_cpus); switch(cpu_enum_method) { @@ -435,13 +461,24 @@ { } +static boolean_t +cpu_check_mmu(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) +{ + + /* Check if this hart supports MMU. */ + if (OF_getproplen(node, "mmu-type") < 0) + return (0); + + return (1); +} + void cpu_mp_setmaxid(void) { #ifdef FDT int cores; - cores = ofw_cpu_early_foreach(NULL, false); + cores = ofw_cpu_early_foreach(cpu_check_mmu, true); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) Index: sys/riscv/riscv/pmap.c =================================================================== --- sys/riscv/riscv/pmap.c +++ sys/riscv/riscv/pmap.c @@ -233,6 +233,8 @@ static struct rwlock_padalign pvh_global_lock; static struct mtx_padalign allpmaps_lock; +extern uint32_t cpuid_to_hart[MAXCPU]; + static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); @@ -723,6 +725,21 @@ } #ifdef SMP +static cpuset_t +pmap_cpu_to_hart(cpuset_t mask) +{ + cpuset_t harts; + u_int cpu; + + CPU_ZERO(&harts); + + CPU_FOREACH(cpu) + if (CPU_ISSET(cpu, &mask)) + CPU_SET(cpuid_to_hart[cpu], &harts); + + return (harts); +} + /* * For SMP, these functions have to use IPIs for coherence. * @@ -734,13 +751,15 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t mask; + cpuset_t harts; sched_pin(); mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - if (!CPU_EMPTY(&mask) && smp_started) - sbi_remote_sfence_vma(mask.__bits, va, 1); + harts = pmap_cpu_to_hart(mask); + if (!CPU_EMPTY(&harts) && smp_started) + sbi_remote_sfence_vma(harts.__bits, va, 1); sfence_vma_page(va); sched_unpin(); } @@ -749,13 +768,15 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t mask; + cpuset_t harts; sched_pin(); mask = pmap->pm_active; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - if (!CPU_EMPTY(&mask) && smp_started) - sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); + harts = pmap_cpu_to_hart(mask); + if (!CPU_EMPTY(&harts) && smp_started) + sbi_remote_sfence_vma(harts.__bits, sva, eva - sva + 1); /* * Might consider a loop of sfence_vma_page() for a small @@ -769,6 +790,7 @@ pmap_invalidate_all(pmap_t pmap) { cpuset_t mask; + cpuset_t harts; sched_pin(); mask = pmap->pm_active; @@ -780,8 +802,9 @@ * all sfence_vma requests as global however. */ fence(); - if (!CPU_EMPTY(&mask) && smp_started) - sbi_remote_sfence_vma(mask.__bits, 0, 0); + harts = pmap_cpu_to_hart(mask); + if (!CPU_EMPTY(&harts) && smp_started) + sbi_remote_sfence_vma(harts.__bits, 0, 0); sfence_vma(); sched_unpin(); } @@ -4302,7 +4325,9 @@ void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { +#ifdef SMP cpuset_t mask; + cpuset_t harts; /* * From the RISC-V User-Level ISA V2.2: @@ -4316,9 +4341,13 @@ mask = all_cpus; CPU_CLR(PCPU_GET(cpuid), &mask); fence(); - if (!CPU_EMPTY(&mask) && smp_started) - sbi_remote_fence_i(mask.__bits); + harts = pmap_cpu_to_hart(mask); + if (!CPU_EMPTY(&harts) && smp_started) + sbi_remote_fence_i(harts.__bits); sched_unpin(); +#else + fence(); +#endif } /*