diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -290,29 +290,33 @@ init_secondary_tail(); } -/******************************************************************* - * local functions and data - */ - -#ifdef NUMA static void -mp_realloc_pcpu(int cpuid, int domain) +amd64_mp_alloc_pcpu(void) { vm_page_t m; - vm_offset_t oa, na; - - oa = (vm_offset_t)&__pcpu[cpuid]; - if (vm_phys_domain(pmap_kextract(oa)) == domain) - return; - m = vm_page_alloc_noobj_domain(domain, 0); - if (m == NULL) - return; - na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - pagecopy((void *)oa, (void *)na); - pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1); - /* XXX old pcpu page leaked. */ -} + int cpu; + + /* Allocate pcpu areas to the correct domain. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { +#ifdef NUMA + m = NULL; + if (vm_ndomains > 1) { + m = vm_page_alloc_noobj_domain( + acpi_pxm_get_cpu_locality(cpu_apic_ids[cpu]), 0); + } + if (m == NULL) +#else + m = vm_page_alloc_noobj(0); #endif + if (m == NULL) + panic("cannot alloc pcpu page for cpu %d", cpu); + pmap_qenter((vm_offset_t)&__pcpu[cpu], &m, 1); + } +} + +/******************************************************************* + * local functions and data + */ /* * start each AP in our list @@ -330,6 +334,7 @@ int apic_id, cpu, domain, i; u_char mpbiosreason; + amd64_mp_alloc_pcpu(); mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); MPASS(bootMP_size <= PAGE_SIZE); @@ -403,16 +408,6 @@ outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ - /* Relocate pcpu areas to the correct domain. */ -#ifdef NUMA - if (vm_ndomains > 1) - for (cpu = 1; cpu < mp_ncpus; cpu++) { - apic_id = cpu_apic_ids[cpu]; - domain = acpi_pxm_get_cpu_locality(apic_id); - mp_realloc_pcpu(cpu, domain); - } -#endif - /* start each AP */ domain = 0; for (cpu = 1; cpu < mp_ncpus; cpu++) { diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1902,7 +1902,7 @@ vm_offset_t va; pt_entry_t *pte, *pcpu_pte; struct region_descriptor r_gdt; - uint64_t cr4, pcpu_phys; + uint64_t cr4, pcpu0_phys; u_long res; int i; @@ -1917,7 +1917,7 @@ */ create_pagetables(firstaddr); - pcpu_phys = allocpages(firstaddr, MAXCPU); + pcpu0_phys = allocpages(firstaddr, 1); /* * Add a physical memory segment (vm_phys_seg) corresponding to the @@ -1995,10 +1995,15 @@ SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU); virtual_avail = va; - for (i = 0; i < MAXCPU; i++) { - pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW | - pg_g | pg_nx | X86_PG_M | X86_PG_A; - } + /* + * Map the BSP PCPU now, rest PCPUs are mapped by + * amd64_mp_alloc_pcpu()/start_all_aps() when we know the + * number of CPUs and NUMA affinity. + */ + pcpu_pte[0] = pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx | + X86_PG_M | X86_PG_A; + for (i = 1; i < MAXCPU; i++) + pcpu_pte[i] = 0; /* * Re-initialize PCPU area for BSP after switching.