Index: stable/12/sys/amd64/amd64/mp_machdep.c =================================================================== --- stable/12/sys/amd64/amd64/mp_machdep.c (revision 361333) +++ stable/12/sys/amd64/amd64/mp_machdep.c (revision 361334) @@ -1,754 +1,755 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1996, by Steve Passe * Copyright (c) 2003, by Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include #include #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (KERNBASE + 0x0467) #define WARMBOOT_SEG (KERNBASE + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) #define GiB(v) (v ## ULL << 30) #define AP_BOOTPT_SZ (PAGE_SIZE * 3) /* Temporary variables for init_secondary() */ char *doublefault_stack; char *mce_stack; char *nmi_stack; char *dbg_stack; /* * Local data and functions. */ static int start_ap(int apic_id); static bool is_kernel_paddr(vm_paddr_t pa) { return (pa >= trunc_2mpage(btext - KERNBASE) && pa < round_page(_end - KERNBASE)); } static bool is_mpboot_good(vm_paddr_t start, vm_paddr_t end) { return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem); } /* * Calculate usable address in base memory for AP trampoline code. */ void mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx) { vm_paddr_t start, end; unsigned int i; bool allocated; alloc_ap_trampoline(physmap, physmap_idx); /* * Find a memory region big enough below the 4GB boundary to * store the initial page tables. Region must be mapped by * the direct map. * * Note that it needs to be aligned to a page boundary. */ allocated = false; for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { /* * First, try to chomp at the start of the physmap region. * Kernel binary might claim it already. */ start = round_page(physmap[i]); end = start + AP_BOOTPT_SZ; if (start < end && end <= physmap[i + 1] && is_mpboot_good(start, end) && !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { allocated = true; physmap[i] = end; break; } /* * Second, try to chomp at the end. Again, check * against kernel. */ end = trunc_page(physmap[i + 1]); start = end - AP_BOOTPT_SZ; if (start < end && start >= physmap[i] && is_mpboot_good(start, end) && !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) { allocated = true; physmap[i + 1] = start; break; } } if (allocated) { mptramp_pagetables = start; if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { memmove(&physmap[i], &physmap[i + 2], sizeof(*physmap) * (*physmap_idx - i + 2)); *physmap_idx -= 2; } } else { mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ; if (bootverbose) printf( "Cannot find enough space for the initial AP page tables, placing them at %#x", mptramp_pagetables); } } /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ if (pmap_pcid_enabled) { if (invpcid_works) { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_invpcid_pti_pti) : IDTVEC(invltlb_invpcid_nopti), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_invpcid_pti) : IDTVEC(invlpg_invpcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_invpcid_pti) : IDTVEC(invlrng_invpcid), SDT_SYSIGT, SEL_KPL, 0); } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pcid_pti) : IDTVEC(invltlb_pcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pcid_pti) : IDTVEC(invlpg_pcid), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pcid_pti) : IDTVEC(invlrng_pcid), SDT_SYSIGT, SEL_KPL, 0); } } else { setidt(IPI_INVLTLB, pti ? IDTVEC(invltlb_pti) : IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLPG, pti ? IDTVEC(invlpg_pti) : IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, pti ? IDTVEC(invlrng_pti) : IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); } /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, pti ? IDTVEC(invlcache_pti) : IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, pti ? IDTVEC(rendezvous_pti) : IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, pti ? IDTVEC(ipi_intr_bitmap_handler_pti) : IDTVEC(ipi_intr_bitmap_handler), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, pti ? IDTVEC(cpustop_pti) : IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ init_ops.start_all_aps(); set_interrupt_apic_ids(); -} + acpi_pxm_set_cpu_locality(); +} /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct nmi_pcpu *np; u_int64_t cr0; int cpu, gsel_tss, x; struct region_descriptor ap_gdt; /* Set by the startup code for us to use */ cpu = bootAP; /* Update microcode before doing anything else. */ ucode_load_ap(cpu); /* Init tss */ common_tss[cpu] = common_tss[0]; common_tss[cpu].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE]; /* The NMI stack runs on IST2. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist2 = (long) np; /* The MC# stack runs on IST3. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist3 = (long) np; /* The DB# stack runs on IST4. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist4 = (long) np; /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]); ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; ap_gdt.rd_base = (long) &gdt[NGDT * cpu]; lgdt(&ap_gdt); /* does magic intra-segment return */ /* Get per-cpu data */ pc = &__pcpu[cpu]; /* prime data page for it to use */ pcpu_init(pc, cpu, sizeof(struct pcpu)); dpcpu_init(dpcpu, cpu); pc->pc_apic_id = cpu_apic_ids[cpu]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_tssp = &common_tss[cpu]; pc->pc_commontssp = &common_tss[cpu]; pc->pc_rsp0 = 0; pc->pc_pti_rsp0 = (((vm_offset_t)&pc->pc_pti_stack + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful); pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]; pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL]; pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GUSERLDT_SEL]; /* See comment in pmap_bootstrap(). */ pc->pc_pcid_next = PMAP_PCID_KERN + 2; pc->pc_pcid_gen = 1; common_tss[cpu].tss_rsp0 = 0; /* Save the per-cpu pointer for use by the NMI handler. */ np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; /* Save the per-cpu pointer for use by the MC# handler. */ np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; /* Save the per-cpu pointer for use by the DB# handler. */ np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ fix_cpuid(); lidt(&r_idt); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); amd64_conf_fast_syscall(); /* signal our startup to the BSP. */ mp_naps++; /* Spin until the BSP releases the AP's. */ while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); init_secondary_tail(); } /******************************************************************* * local functions and data */ #ifdef NUMA static void mp_realloc_pcpu(int cpuid, int domain) { vm_page_t m; vm_offset_t oa, na; oa = (vm_offset_t)&__pcpu[cpuid]; if (_vm_phys_domain(pmap_kextract(oa)) == domain) return; m = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); if (m == NULL) return; na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagecopy((void *)oa, (void *)na); pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1); /* XXX old pcpu page leaked. */ } #endif /* * start each AP in our list */ int native_start_all_aps(void) { u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; int apic_id, cpu, domain, i; u_char mpbiosreason; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* copy the AP 1st level boot code */ bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size); /* Locate the page tables, they'll be below the trampoline */ pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables); pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t); pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t); /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { /* Each slot of the level 4 pages points to the same level 3 page */ pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE); pt4[i] |= PG_V | PG_RW | PG_U; /* Each slot of the level 3 pages points to the same level 2 page */ pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE)); pt3[i] |= PG_V | PG_RW | PG_U; /* The level 2 page slots are mapped with 2MB pages for 1GB. */ pt2[i] = i * (2 * 1024 * 1024); pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; } /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ /* Relocate pcpu areas to the correct domain. */ #ifdef NUMA if (vm_ndomains > 1) for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; domain = acpi_pxm_get_cpu_locality(apic_id); mp_realloc_pcpu(cpu, domain); } #endif /* start each AP */ domain = 0; for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; #ifdef NUMA if (vm_ndomains > 1) domain = acpi_pxm_get_cpu_locality(apic_id); #endif /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); mce_stack = (char *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc_domainset( DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO); dbg_stack = (char *)kmem_malloc_domainset( DOMAINSET_PREF(domain), PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; /* attempt to start the Application Processor */ if (!start_ap(apic_id)) { /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } void invltlb_invpcid_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : INVPCID_CTX); PCPU_SET(smp_tlb_done, generation); } void invltlb_invpcid_pti_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; d.pad = 0; d.addr = 0; if (smp_tlb_pmap == kernel_pmap) { /* * This invalidation actually needs to clear kernel * mappings from the TLB in the current pmap, but * since we were asked for the flush in the kernel * pmap, achieve it by performing global flush. */ invpcid(&d, INVPCID_CTXGLOB); } else { invpcid(&d, INVPCID_CTX); d.pcid |= PMAP_PCID_USER_PT; invpcid(&d, INVPCID_CTX); } PCPU_SET(smp_tlb_done, generation); } void invltlb_pcid_handler(void) { uint64_t kcr3, ucr3; uint32_t generation, pcid; #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ if (smp_tlb_pmap == kernel_pmap) { invltlb_glob(); } else { /* * The current pmap might not be equal to * smp_tlb_pmap. The clearing of the pm_gen in * pmap_invalidate_all() takes care of TLB * invalidation when switching to the pmap on this * CPU. */ if (PCPU_GET(curpmap) == smp_tlb_pmap) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid; ucr3 = smp_tlb_pmap->pm_ucr3; if (ucr3 != PMAP_NO_CR3) { ucr3 |= PMAP_PCID_USER_PT | pcid; pmap_pti_pcid_invalidate(ucr3, kcr3); } else load_cr3(kcr3); } } PCPU_SET(smp_tlb_done, generation); } void invlpg_invpcid_handler(void) { struct invpcid_descr d; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = smp_tlb_addr1; invpcid(&d, INVPCID_ADDR); } PCPU_SET(smp_tlb_done, generation); } void invlpg_pcid_handler(void) { uint64_t kcr3, ucr3; uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ generation = smp_tlb_generation; /* Overlap with serialization */ invlpg(smp_tlb_addr1); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlpg(ucr3, kcr3, smp_tlb_addr1); } PCPU_SET(smp_tlb_done, generation); } void invlrng_invpcid_handler(void) { struct invpcid_descr d; vm_offset_t addr, addr2; uint32_t generation; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; } while (addr < addr2); if (smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = smp_tlb_addr1; do { invpcid(&d, INVPCID_ADDR); d.addr += PAGE_SIZE; } while (d.addr < addr2); } PCPU_SET(smp_tlb_done, generation); } void invlrng_pcid_handler(void) { vm_offset_t addr, addr2; uint64_t kcr3, ucr3; uint32_t generation; uint32_t pcid; #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ addr = smp_tlb_addr1; addr2 = smp_tlb_addr2; generation = smp_tlb_generation; /* Overlap with serialization */ do { invlpg(addr); addr += PAGE_SIZE; } while (addr < addr2); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3) { pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; kcr3 = smp_tlb_pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 |= pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlrng(ucr3, kcr3, smp_tlb_addr1, addr2); } PCPU_SET(smp_tlb_done, generation); } Index: stable/12/sys/arm64/arm64/mp_machdep.c =================================================================== --- stable/12/sys/arm64/arm64/mp_machdep.c (revision 361333) +++ stable/12/sys/arm64/arm64/mp_machdep.c (revision 361334) @@ -1,938 +1,936 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_acpi.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef DEV_ACPI #include #include #endif #ifdef FDT #include #include #include #include #endif #include #include "pic_if.h" #define MP_QUIRK_CPULIST 0x01 /* The list of cpus may be wrong, */ /* don't panic if one fails to start */ static uint32_t mp_quirks; #ifdef FDT static struct { const char *compat; uint32_t quirks; } fdt_quirks[] = { { "arm,foundation-aarch64", MP_QUIRK_CPULIST }, { "arm,fvp-base", MP_QUIRK_CPULIST }, /* This is incorrect in some DTS files */ { "arm,vfp-base", MP_QUIRK_CPULIST }, { NULL, 0 }, }; #endif typedef void intr_ipi_send_t(void *, cpuset_t, u_int); typedef void intr_ipi_handler_t(void *); #define INTR_IPI_NAMELEN (MAXCOMLEN + 1) struct intr_ipi { intr_ipi_handler_t * ii_handler; void * ii_handler_arg; intr_ipi_send_t * ii_send; void * ii_send_arg; char ii_name[INTR_IPI_NAMELEN]; u_long * ii_count; }; static struct intr_ipi ipi_sources[INTR_IPI_COUNT]; static struct intr_ipi *intr_ipi_lookup(u_int); static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *, void *); static device_identify_t arm64_cpu_identify; static device_probe_t arm64_cpu_probe; static device_attach_t arm64_cpu_attach; static void ipi_ast(void *); static void ipi_hardclock(void *); static void ipi_preempt(void *); static void ipi_rendezvous(void *); static void ipi_stop(void *); struct pcb stoppcbs[MAXCPU]; static device_t cpu_list[MAXCPU]; /* * Not all systems boot from the first CPU in the device tree. To work around * this we need to find which CPU we have booted from so when we later * enable the secondary CPUs we skip this one. */ static int cpu0 = -1; void mpentry(unsigned long cpuid); void init_secondary(uint64_t); /* Synchronize AP startup. */ static struct mtx ap_boot_mtx; /* Stacks for AP initialization, discarded once idle threads are started. */ void *bootstack; static void *bootstacks[MAXCPU]; /* Count of started APs, used to synchronize access to bootstack. */ static volatile int aps_started; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready; /* Temporary variables for init_secondary() */ void *dpcpu[MAXCPU - 1]; static device_method_t arm64_cpu_methods[] = { /* Device interface */ DEVMETHOD(device_identify, arm64_cpu_identify), DEVMETHOD(device_probe, arm64_cpu_probe), DEVMETHOD(device_attach, arm64_cpu_attach), DEVMETHOD_END }; static devclass_t arm64_cpu_devclass; static driver_t arm64_cpu_driver = { "arm64_cpu", arm64_cpu_methods, 0 }; DRIVER_MODULE(arm64_cpu, cpu, arm64_cpu_driver, arm64_cpu_devclass, 0, 0); static void arm64_cpu_identify(driver_t *driver, device_t parent) { if (device_find_child(parent, "arm64_cpu", -1) != NULL) return; if (BUS_ADD_CHILD(parent, 0, "arm64_cpu", -1) == NULL) device_printf(parent, "add child failed\n"); } static int arm64_cpu_probe(device_t dev) { u_int cpuid; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); device_quiet(dev); return (0); } static int arm64_cpu_attach(device_t dev) { const uint32_t *reg; size_t reg_size; u_int cpuid; int i; cpuid = device_get_unit(dev); if (cpuid >= MAXCPU || cpuid > mp_maxid) return (EINVAL); KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid)); reg = cpu_get_cpuid(dev, ®_size); if (reg == NULL) return (EINVAL); if (bootverbose) { device_printf(dev, "register <"); for (i = 0; i < reg_size; i++) printf("%s%x", (i == 0) ? "" : " ", reg[i]); printf(">\n"); } /* Set the device to start it later */ cpu_list[cpuid] = dev; return (0); } static void release_aps(void *dummy __unused) { int i, started; /* Only release CPUs if they exist */ if (mp_ncpus == 1) return; intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL); intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL); intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL); intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL); intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL); intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL); atomic_store_rel_int(&aps_ready, 1); /* Wake up the other CPUs */ __asm __volatile( "dsb ishst \n" "sev \n" ::: "memory"); printf("Release APs..."); started = 0; for (i = 0; i < 2000; i++) { if (smp_started) { printf("done\n"); return; } /* * Don't time out while we are making progress. Some large * systems can take a while to start all CPUs. */ if (smp_cpus > started) { i = 0; started = smp_cpus; } DELAY(1000); } printf("APs not started\n"); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); void init_secondary(uint64_t cpu) { struct pcpu *pcpup; pcpup = &__pcpu[cpu]; /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); /* Signal the BSP and spin until it has released all APs. */ atomic_add_int(&aps_started, 1); while (!atomic_load_int(&aps_ready)) __asm __volatile("wfe"); /* Initialize curthread */ KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); pcpup->pc_curthread = pcpup->pc_idlethread; /* * Identify current CPU. This is necessary to setup * affinity registers and to provide support for * runtime chip identification. */ identify_cpu(); install_cpu_errata(); intr_pic_init_secondary(); /* Start per-CPU event timers. */ cpu_initclocks_ap(); #ifdef VFP vfp_init(); #endif dbg_init(); pan_enable(); mtx_lock_spin(&ap_boot_mtx); atomic_add_rel_32(&smp_cpus, 1); if (smp_cpus == mp_ncpus) { /* enable IPI's, tlb shootdown, freezes etc */ atomic_store_rel_int(&smp_started, 1); } mtx_unlock_spin(&ap_boot_mtx); /* * Assert that smp_after_idle_runnable condition is reasonable. */ MPASS(PCPU_GET(curpcb) == NULL); /* Enter the scheduler */ sched_throw(NULL); panic("scheduler returned us to init_secondary"); /* NOTREACHED */ } static void smp_after_idle_runnable(void *arg __unused) { struct pcpu *pc; int cpu; for (cpu = 1; cpu < mp_ncpus; cpu++) { if (bootstacks[cpu] != NULL) { pc = pcpu_find(cpu); while ((void *)atomic_load_ptr(&pc->pc_curpcb) == NULL) cpu_spinwait(); kmem_free((vm_offset_t)bootstacks[cpu], PAGE_SIZE); } } } SYSINIT(smp_after_idle_runnable, SI_SUB_SMP, SI_ORDER_ANY, smp_after_idle_runnable, NULL); /* * Send IPI thru interrupt controller. */ static void pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi) { KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi); } /* * Setup IPI handler on interrupt controller. * * Not SMP coherent. */ static void intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand, void *arg) { struct intr_irqsrc *isrc; struct intr_ipi *ii; int error; KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi)); error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc); if (error != 0) return; isrc->isrc_handlers++; ii = intr_ipi_lookup(ipi); KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi)); ii->ii_handler = hand; ii->ii_handler_arg = arg; ii->ii_send = pic_ipi_send; ii->ii_send_arg = isrc; strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN); ii->ii_count = intr_ipi_setup_counters(name); } static void intr_ipi_send(cpuset_t cpus, u_int ipi) { struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); ii->ii_send(ii->ii_send_arg, cpus, ipi); } static void ipi_ast(void *dummy __unused) { CTR0(KTR_SMP, "IPI_AST"); } static void ipi_hardclock(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); hardclockintr(); } static void ipi_preempt(void *dummy __unused) { CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); sched_preempt(curthread); } static void ipi_rendezvous(void *dummy __unused) { CTR0(KTR_SMP, "IPI_RENDEZVOUS"); smp_rendezvous_action(); } static void ipi_stop(void *dummy __unused) { u_int cpu; CTR0(KTR_SMP, "IPI_STOP"); cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); /* Indicate we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) cpu_spinwait(); CPU_CLR_ATOMIC(cpu, &started_cpus); CPU_CLR_ATOMIC(cpu, &stopped_cpus); CTR0(KTR_SMP, "IPI_STOP (restart)"); } struct cpu_group * cpu_topo(void) { return (smp_topo_none()); } /* Determine if we running MP machine */ int cpu_mp_probe(void) { /* ARM64TODO: Read the u bit of mpidr_el1 to determine this */ return (1); } static bool start_cpu(u_int id, uint64_t target_cpu) { struct pcpu *pcpup; vm_paddr_t pa; u_int cpuid; int err, naps; /* Check we are able to start this cpu */ if (id > mp_maxid) return (false); KASSERT(id < MAXCPU, ("Too many CPUs")); /* We are already running on cpu 0 */ if (id == cpu0) return (true); /* * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other * CPUs ordered as they are likely grouped into clusters so it can be * useful to keep that property, e.g. for the GICv3 driver to send * an IPI to all CPUs in the cluster. */ cpuid = id; if (cpuid < cpu0) cpuid += mp_maxid + 1; cpuid -= cpu0; pcpup = &__pcpu[cpuid]; pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); dpcpu[cpuid - 1] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); dpcpu_init(dpcpu[cpuid - 1], cpuid); bootstacks[cpuid] = (void *)kmem_malloc(PAGE_SIZE, M_WAITOK | M_ZERO); naps = atomic_load_int(&aps_started); bootstack = (char *)bootstacks[cpuid] + PAGE_SIZE; printf("Starting CPU %u (%lx)\n", cpuid, target_cpu); pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry); err = psci_cpu_on(target_cpu, pa, cpuid); if (err != PSCI_RETVAL_SUCCESS) { /* * Panic here if INVARIANTS are enabled and PSCI failed to * start the requested CPU. psci_cpu_on() returns PSCI_MISSING * to indicate we are unable to use it to start the given CPU. */ KASSERT(err == PSCI_MISSING || (mp_quirks & MP_QUIRK_CPULIST) == MP_QUIRK_CPULIST, ("Failed to start CPU %u (%lx), error %d\n", id, target_cpu, err)); pcpu_destroy(pcpup); kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); bootstacks[cpuid] = NULL; mp_ncpus--; /* Notify the user that the CPU failed to start */ printf("Failed to start CPU %u (%lx), error %d\n", id, target_cpu, err); } else { /* Wait for the AP to switch to its boot stack. */ while (atomic_load_int(&aps_started) < naps + 1) cpu_spinwait(); CPU_SET(cpuid, &all_cpus); } return (true); } #ifdef DEV_ACPI static void madt_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cpuid; u_int id; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; cpuid = arg; id = *cpuid; start_cpu(id, intr->ArmMpidr); __pcpu[id].pc_acpi_id = intr->Uid; (*cpuid)++; break; default: break; } } static void cpu_init_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cpuid; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return; madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return; } cpuid = 0; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, madt_handler, &cpuid); acpi_unmap_table(madt); #if MAXMEMDOM > 1 - /* set proximity info */ acpi_pxm_set_cpu_locality(); - acpi_pxm_free(); #endif } #endif #ifdef FDT static boolean_t cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t target_cpu; int domain; target_cpu = reg[0]; if (addr_size == 2) { target_cpu <<= 32; target_cpu |= reg[1]; } if (!start_cpu(id, target_cpu)) return (FALSE); /* Try to read the numa node of this cpu */ if (vm_ndomains == 1 || OF_getencprop(node, "numa-node-id", &domain, sizeof(domain)) <= 0) domain = 0; __pcpu[id].pc_domain = domain; if (domain < MAXMEMDOM) CPU_SET(id, &cpuset_domain[domain]); return (TRUE); } #endif /* Initialize and fire up non-boot processors */ void cpu_mp_start(void) { #ifdef FDT phandle_t node; int i; #endif mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); CPU_SET(0, &all_cpus); switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: KASSERT(cpu0 >= 0, ("Current CPU was not found")); cpu_init_acpi(); break; #endif #ifdef FDT case ARM64_BUS_FDT: node = OF_peer(0); for (i = 0; fdt_quirks[i].compat != NULL; i++) { if (ofw_bus_node_is_compatible(node, fdt_quirks[i].compat) != 0) { mp_quirks = fdt_quirks[i].quirks; } } KASSERT(cpu0 >= 0, ("Current CPU was not found")); ofw_cpu_early_foreach(cpu_init_fdt, true); break; #endif default: break; } } /* Introduce rest of cores to the world */ void cpu_mp_announce(void) { } #ifdef DEV_ACPI static void cpu_count_acpi_handler(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_MADT_GENERIC_INTERRUPT *intr; u_int *cores = arg; uint64_t mpidr_reg; switch(entry->Type) { case ACPI_MADT_TYPE_GENERIC_INTERRUPT: intr = (ACPI_MADT_GENERIC_INTERRUPT *)entry; if (cpu0 < 0) { mpidr_reg = READ_SPECIALREG(mpidr_el1); if ((mpidr_reg & 0xff00fffffful) == intr->ArmMpidr) cpu0 = *cores; } (*cores)++; break; default: break; } } static u_int cpu_count_acpi(void) { ACPI_TABLE_MADT *madt; vm_paddr_t physaddr; u_int cores; physaddr = acpi_find_table(ACPI_SIG_MADT); if (physaddr == 0) return (0); madt = acpi_map_table(physaddr, ACPI_SIG_MADT); if (madt == NULL) { printf("Unable to map the MADT, not starting APs\n"); return (0); } cores = 0; acpi_walk_subtables(madt + 1, (char *)madt + madt->Header.Length, cpu_count_acpi_handler, &cores); acpi_unmap_table(madt); return (cores); } #endif #ifdef FDT static boolean_t cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) { uint64_t mpidr_fdt, mpidr_reg; if (cpu0 < 0) { mpidr_fdt = reg[0]; if (addr_size == 2) { mpidr_fdt <<= 32; mpidr_fdt |= reg[1]; } mpidr_reg = READ_SPECIALREG(mpidr_el1); if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt) cpu0 = id; } return (TRUE); } #endif void cpu_mp_setmaxid(void) { #if defined(DEV_ACPI) || defined(FDT) int cores; #endif switch(arm64_bus_method) { #ifdef DEV_ACPI case ARM64_BUS_ACPI: cores = cpu_count_acpi(); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the ACPI tables\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; return; } break; #endif #ifdef FDT case ARM64_BUS_FDT: cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false); if (cores > 0) { cores = MIN(cores, MAXCPU); if (bootverbose) printf("Found %d CPUs in the device tree\n", cores); mp_ncpus = cores; mp_maxid = cores - 1; return; } break; #endif default: break; } if (bootverbose) printf("No CPU data, limiting to 1 core\n"); mp_ncpus = 1; mp_maxid = 0; } /* * Lookup IPI source. */ static struct intr_ipi * intr_ipi_lookup(u_int ipi) { if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); return (&ipi_sources[ipi]); } /* * interrupt controller dispatch function for IPIs. It should * be called straight from the interrupt controller, when associated * interrupt source is learned. Or from anybody who has an interrupt * source mapped. */ void intr_ipi_dispatch(u_int ipi, struct trapframe *tf) { void *arg; struct intr_ipi *ii; ii = intr_ipi_lookup(ipi); if (ii->ii_count == NULL) panic("%s: not setup IPI %u", __func__, ipi); intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid)); /* * Supply ipi filter with trapframe argument * if none is registered. */ arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf; ii->ii_handler(arg); } #ifdef notyet /* * Map IPI into interrupt controller. * * Not SMP coherent. */ static int ipi_map(struct intr_irqsrc *isrc, u_int ipi) { boolean_t is_percpu; int error; if (ipi >= INTR_IPI_COUNT) panic("%s: no such IPI %u", __func__, ipi); KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__)); isrc->isrc_type = INTR_ISRCT_NAMESPACE; isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI; isrc->isrc_nspc_num = ipi_next_num; error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu); if (error == 0) { isrc->isrc_dev = intr_irq_root_dev; ipi_next_num++; } return (error); } /* * Setup IPI handler to interrupt source. * * Note that there could be more ways how to send and receive IPIs * on a platform like fast interrupts for example. In that case, * one can call this function with ASIF_NOALLOC flag set and then * call intr_ipi_dispatch() when appropriate. * * Not SMP coherent. */ int intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter, void *arg, u_int flags) { struct intr_irqsrc *isrc; int error; if (filter == NULL) return(EINVAL); isrc = intr_ipi_lookup(ipi); if (isrc->isrc_ipifilter != NULL) return (EEXIST); if ((flags & AISHF_NOALLOC) == 0) { error = ipi_map(isrc, ipi); if (error != 0) return (error); } isrc->isrc_ipifilter = filter; isrc->isrc_arg = arg; isrc->isrc_handlers = 1; isrc->isrc_count = intr_ipi_setup_counters(name); isrc->isrc_index = 0; /* it should not be used in IPI case */ if (isrc->isrc_dev != NULL) { PIC_ENABLE_INTR(isrc->isrc_dev, isrc); PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc); } return (0); } #endif /* Sending IPI */ void ipi_all_but_self(u_int ipi) { cpuset_t cpus; cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi); intr_ipi_send(cpus, ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); intr_ipi_send(cpus, ipi); } Index: stable/12/sys/dev/acpica/acpi_pxm.c =================================================================== --- stable/12/sys/dev/acpica/acpi_pxm.c (revision 361333) +++ stable/12/sys/dev/acpica/acpi_pxm.c (revision 361334) @@ -1,708 +1,699 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if MAXMEMDOM > 1 static struct cpu_info { int enabled:1; int has_memory:1; int domain; int id; } *cpus; static int max_cpus; static int last_cpu; struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; int num_mem; static ACPI_TABLE_SRAT *srat; static vm_paddr_t srat_physaddr; static int domain_pxm[MAXMEMDOM]; static int ndomain; static vm_paddr_t maxphyaddr; static ACPI_TABLE_SLIT *slit; static vm_paddr_t slit_physaddr; static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; static void srat_walk_table(acpi_subtable_handler *handler, void *arg); /* * SLIT parsing. */ static void slit_parse_table(ACPI_TABLE_SLIT *s) { int i, j; int i_domain, j_domain; int offset = 0; uint8_t e; /* * This maps the SLIT data into the VM-domain centric view. * There may be sparse entries in the PXM namespace, so * remap them to a VM-domain ID and if it doesn't exist, * skip it. * * It should result in a packed 2d array of VM-domain * locality information entries. */ if (bootverbose) printf("SLIT.Localities: %d\n", (int) s->LocalityCount); for (i = 0; i < s->LocalityCount; i++) { i_domain = acpi_map_pxm_to_vm_domainid(i); if (i_domain < 0) continue; if (bootverbose) printf("%d: ", i); for (j = 0; j < s->LocalityCount; j++) { j_domain = acpi_map_pxm_to_vm_domainid(j); if (j_domain < 0) continue; e = s->Entry[i * s->LocalityCount + j]; if (bootverbose) printf("%d ", (int) e); /* 255 == "no locality information" */ if (e == 255) vm_locality_table[offset] = -1; else vm_locality_table[offset] = e; offset++; } if (bootverbose) printf("\n"); } } /* * Look for an ACPI System Locality Distance Information Table ("SLIT") */ static int parse_slit(void) { if (resource_disabled("slit", 0)) { return (-1); } slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); if (slit_physaddr == 0) { return (-1); } /* * Make a pass over the table to populate the cpus[] and * mem_info[] tables. */ slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); slit_parse_table(slit); acpi_unmap_table(slit); slit = NULL; return (0); } /* * SRAT parsing. */ /* * Returns true if a memory range overlaps with at least one range in * phys_avail[]. */ static int overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) { int i; for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { if (phys_avail[i + 1] <= start) continue; if (phys_avail[i] < end) return (1); break; } return (0); } /* * On x86 we can use the cpuid to index the cpus array, but on arm64 * we have an ACPI Processor UID with a larger range. * * Use this variable to indicate if the cpus can be stored by index. */ #ifdef __aarch64__ static const int cpus_use_indexing = 0; #else static const int cpus_use_indexing = 1; #endif /* * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64) */ static struct cpu_info * cpu_find(int cpuid) { int i; if (cpus_use_indexing) { if (cpuid <= last_cpu && cpus[cpuid].enabled) return (&cpus[cpuid]); } else { for (i = 0; i <= last_cpu; i++) if (cpus[i].id == cpuid) return (&cpus[i]); } return (NULL); } /* * Find CPU by pcpu pointer. */ static struct cpu_info * cpu_get_info(struct pcpu *pc) { struct cpu_info *cpup; int id; #ifdef __aarch64__ id = pc->pc_acpi_id; #else id = pc->pc_apic_id; #endif cpup = cpu_find(id); if (cpup == NULL) panic("SRAT: CPU with ID %u is not known", id); return (cpup); } /* * Add proximity information for a new CPU. */ static struct cpu_info * cpu_add(int cpuid, int domain) { struct cpu_info *cpup; if (cpus_use_indexing) { if (cpuid >= max_cpus) return (NULL); last_cpu = imax(last_cpu, cpuid); cpup = &cpus[cpuid]; } else { if (last_cpu >= max_cpus - 1) return (NULL); cpup = &cpus[++last_cpu]; } cpup->domain = domain; cpup->id = cpuid; cpup->enabled = 1; return (cpup); } static void srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) { ACPI_SRAT_CPU_AFFINITY *cpu; ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; ACPI_SRAT_MEM_AFFINITY *mem; ACPI_SRAT_GICC_AFFINITY *gicc; static struct cpu_info *cpup; int domain, i, slot; switch (entry->Type) { case ACPI_SRAT_TYPE_CPU_AFFINITY: cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; domain = cpu->ProximityDomainLo | cpu->ProximityDomainHi[0] << 8 | cpu->ProximityDomainHi[1] << 16 | cpu->ProximityDomainHi[2] << 24; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", cpu->ApicId, domain, (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) break; cpup = cpu_find(cpu->ApicId); if (cpup != NULL) { printf("SRAT: Duplicate local APIC ID %u\n", cpu->ApicId); *(int *)arg = ENXIO; break; } cpup = cpu_add(cpu->ApicId, domain); if (cpup == NULL) printf("SRAT: Ignoring local APIC ID %u (too high)\n", cpu->ApicId); break; case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; if (bootverbose) printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", x2apic->ApicId, x2apic->ProximityDomain, (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? "enabled" : "disabled"); if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) break; KASSERT(cpu_find(x2apic->ApicId) == NULL, ("Duplicate local APIC ID %u", x2apic->ApicId)); cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain); if (cpup == NULL) printf("SRAT: Ignoring local APIC ID %u (too high)\n", x2apic->ApicId); break; case ACPI_SRAT_TYPE_GICC_AFFINITY: gicc = (ACPI_SRAT_GICC_AFFINITY *)entry; if (bootverbose) printf("SRAT: Found CPU UID %u domain %d: %s\n", gicc->AcpiProcessorUid, gicc->ProximityDomain, (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ? "enabled" : "disabled"); if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED)) break; KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL, ("Duplicate CPU UID %u", gicc->AcpiProcessorUid)); cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain); if (cpup == NULL) printf("SRAT: Ignoring CPU UID %u (too high)\n", gicc->AcpiProcessorUid); break; case ACPI_SRAT_TYPE_MEMORY_AFFINITY: mem = (ACPI_SRAT_MEM_AFFINITY *)entry; if (bootverbose) printf( "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", mem->ProximityDomain, (uintmax_t)mem->BaseAddress, (uintmax_t)mem->Length, (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? "enabled" : "disabled"); if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) break; if (mem->BaseAddress >= maxphyaddr || !overlaps_phys_avail(mem->BaseAddress, mem->BaseAddress + mem->Length)) { printf("SRAT: Ignoring memory at addr 0x%jx\n", (uintmax_t)mem->BaseAddress); break; } if (num_mem == VM_PHYSSEG_MAX) { printf("SRAT: Too many memory regions\n"); *(int *)arg = ENXIO; break; } slot = num_mem; for (i = 0; i < num_mem; i++) { if (mem_info[i].end <= mem->BaseAddress) continue; if (mem_info[i].start < (mem->BaseAddress + mem->Length)) { printf("SRAT: Overlapping memory entries\n"); *(int *)arg = ENXIO; return; } slot = i; } for (i = num_mem; i > slot; i--) mem_info[i] = mem_info[i - 1]; mem_info[slot].start = mem->BaseAddress; mem_info[slot].end = mem->BaseAddress + mem->Length; mem_info[slot].domain = mem->ProximityDomain; num_mem++; break; } } /* * Ensure each memory domain has at least one CPU and that each CPU * has at least one memory domain. */ static int check_domains(void) { int found, i, j; for (i = 0; i < num_mem; i++) { found = 0; for (j = 0; j <= last_cpu; j++) if (cpus[j].enabled && cpus[j].domain == mem_info[i].domain) { cpus[j].has_memory = 1; found++; } if (!found) { printf("SRAT: No CPU found for memory domain %d\n", mem_info[i].domain); return (ENXIO); } } for (i = 0; i <= last_cpu; i++) if (cpus[i].enabled && !cpus[i].has_memory) { found = 0; for (j = 0; j < num_mem && !found; j++) { if (mem_info[j].domain == cpus[i].domain) found = 1; } if (!found) { if (bootverbose) printf("SRAT: mem dom %d is empty\n", cpus[i].domain); mem_info[num_mem].start = 0; mem_info[num_mem].end = 0; mem_info[num_mem].domain = cpus[i].domain; num_mem++; } } return (0); } /* * Check that the SRAT memory regions cover all of the regions in * phys_avail[]. */ static int check_phys_avail(void) { vm_paddr_t address; int i, j; /* j is the current offset into phys_avail[]. */ address = phys_avail[0]; j = 0; for (i = 0; i < num_mem; i++) { /* * Consume as many phys_avail[] entries as fit in this * region. */ while (address >= mem_info[i].start && address <= mem_info[i].end) { /* * If we cover the rest of this phys_avail[] entry, * advance to the next entry. */ if (phys_avail[j + 1] <= mem_info[i].end) { j += 2; if (phys_avail[j] == 0 && phys_avail[j + 1] == 0) { return (0); } address = phys_avail[j]; } else address = mem_info[i].end + 1; } } printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); return (ENXIO); } /* * Renumber the memory domains to be compact and zero-based if not * already. Returns an error if there are too many domains. */ static int renumber_domains(void) { int i, j, slot; /* Enumerate all the domains. */ ndomain = 0; for (i = 0; i < num_mem; i++) { /* See if this domain is already known. */ for (j = 0; j < ndomain; j++) { if (domain_pxm[j] >= mem_info[i].domain) break; } if (j < ndomain && domain_pxm[j] == mem_info[i].domain) continue; if (ndomain >= MAXMEMDOM) { ndomain = 1; printf("SRAT: Too many memory domains\n"); return (EFBIG); } /* Insert the new domain at slot 'j'. */ slot = j; for (j = ndomain; j > slot; j--) domain_pxm[j] = domain_pxm[j - 1]; domain_pxm[slot] = mem_info[i].domain; ndomain++; } /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ for (i = 0; i < ndomain; i++) { /* * If the domain is already the right value, no need * to renumber. */ if (domain_pxm[i] == i) continue; /* Walk the cpu[] and mem_info[] arrays to renumber. */ for (j = 0; j < num_mem; j++) if (mem_info[j].domain == domain_pxm[i]) mem_info[j].domain = i; for (j = 0; j <= last_cpu; j++) if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) cpus[j].domain = i; } return (0); } /* * Look for an ACPI System Resource Affinity Table ("SRAT"), * allocate space for cpu information, and initialize globals. */ int acpi_pxm_init(int ncpus, vm_paddr_t maxphys) { unsigned int idx, size; vm_paddr_t addr; if (resource_disabled("srat", 0)) return (-1); max_cpus = ncpus; last_cpu = -1; maxphyaddr = maxphys; srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); if (srat_physaddr == 0) return (-1); /* * Allocate data structure: * * Find the last physical memory region and steal some memory from * it. This is done because at this point in the boot process * malloc is still not usable. */ for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); KASSERT(idx != 0, ("phys_avail is empty!")); idx -= 2; size = sizeof(*cpus) * max_cpus; addr = trunc_page(phys_avail[idx + 1] - size); KASSERT(addr >= phys_avail[idx], ("Not enough memory for SRAT table items")); phys_avail[idx + 1] = addr - 1; /* * We cannot rely on PHYS_TO_DMAP because this code is also used in * i386, so use pmap_mapbios to map the memory, this will end up using * the default memory attribute (WB), and the DMAP when available. */ cpus = (struct cpu_info *)pmap_mapbios(addr, size); bzero(cpus, size); return (0); } static int parse_srat(void) { int error; /* * Make a pass over the table to populate the cpus[] and * mem_info[] tables. */ srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); error = 0; srat_walk_table(srat_parse_entry, &error); acpi_unmap_table(srat); srat = NULL; if (error || check_domains() != 0 || check_phys_avail() != 0 || renumber_domains() != 0) { srat_physaddr = 0; return (-1); } return (0); } static void init_mem_locality(void) { int i; /* * For now, assume -1 == "no locality information for * this pairing. */ for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) vm_locality_table[i] = -1; } /* * Parse SRAT and SLIT to save proximity info. Don't do * anything if SRAT is not available. */ void acpi_pxm_parse_tables(void) { if (srat_physaddr == 0) return; if (parse_srat() < 0) return; init_mem_locality(); (void)parse_slit(); } /* * Use saved data from SRAT/SLIT to update memory locality. */ void acpi_pxm_set_mem_locality(void) { if (srat_physaddr == 0) return; vm_phys_register_domains(ndomain, mem_info, vm_locality_table); } static void srat_walk_table(acpi_subtable_handler *handler, void *arg) { acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, handler, arg); } /* - * Setup per-CPU domain IDs from information saved in 'cpus'. + * Set up per-CPU domain IDs from information saved in 'cpus' and tear down data + * structures allocated by acpi_pxm_init(). */ void acpi_pxm_set_cpu_locality(void) { struct cpu_info *cpu; struct pcpu *pc; u_int i; if (srat_physaddr == 0) return; for (i = 0; i < MAXCPU; i++) { if (CPU_ABSENT(i)) continue; pc = pcpu_find(i); KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); cpu = cpu_get_info(pc); pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0; CPU_SET(i, &cpuset_domain[pc->pc_domain]); if (bootverbose) printf("SRAT: CPU %u has memory domain %d\n", i, pc->pc_domain); } + /* XXXMJ the page is leaked. */ + pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * max_cpus); + srat_physaddr = 0; + cpus = NULL; } int acpi_pxm_get_cpu_locality(int apic_id) { struct cpu_info *cpu; cpu = cpu_find(apic_id); if (cpu == NULL) panic("SRAT: CPU with ID %u is not known", apic_id); return (cpu->domain); -} - -/* - * Free data structures allocated during acpi_pxm_init. - */ -void -acpi_pxm_free(void) -{ - - if (srat_physaddr == 0) - return; - pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * max_cpus); - srat_physaddr = 0; - cpus = NULL; } /* * Map a _PXM value to a VM domain ID. * * Returns the domain ID, or -1 if no domain ID was found. */ int acpi_map_pxm_to_vm_domainid(int pxm) { int i; for (i = 0; i < ndomain; i++) { if (domain_pxm[i] == pxm) return (vm_ndomains > 1 ? i : 0); } return (-1); } #else /* MAXMEMDOM == 1 */ int acpi_map_pxm_to_vm_domainid(int pxm) { return (-1); } #endif /* MAXMEMDOM > 1 */ Index: stable/12/sys/dev/acpica/acpivar.h =================================================================== --- stable/12/sys/dev/acpica/acpivar.h (revision 361333) +++ stable/12/sys/dev/acpica/acpivar.h (revision 361334) @@ -1,559 +1,558 @@ /*- * Copyright (c) 2000 Mitsuru IWASAKI * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ACPIVAR_H_ #define _ACPIVAR_H_ #ifdef _KERNEL #include "acpi_if.h" #include "bus_if.h" #include #ifdef INTRNG #include #endif #include #include #include #include #include #include #include #include struct apm_clone_data; struct acpi_softc { device_t acpi_dev; struct cdev *acpi_dev_t; int acpi_enabled; int acpi_sstate; int acpi_sleep_disabled; int acpi_resources_reserved; struct sysctl_ctx_list acpi_sysctl_ctx; struct sysctl_oid *acpi_sysctl_tree; int acpi_power_button_sx; int acpi_sleep_button_sx; int acpi_lid_switch_sx; int acpi_standby_sx; int acpi_suspend_sx; int acpi_sleep_delay; int acpi_s4bios; int acpi_do_disable; int acpi_verbose; int acpi_handle_reboot; vm_offset_t acpi_wakeaddr; vm_paddr_t acpi_wakephys; int acpi_next_sstate; /* Next suspend Sx state. */ struct apm_clone_data *acpi_clone; /* Pseudo-dev for devd(8). */ STAILQ_HEAD(,apm_clone_data) apm_cdevs; /* All apm/apmctl/acpi cdevs. */ struct callout susp_force_to; /* Force suspend if no acks. */ }; struct acpi_device { /* ACPI ivars */ ACPI_HANDLE ad_handle; void *ad_private; int ad_flags; int ad_cls_class; /* Resources */ struct resource_list ad_rl; }; #ifdef INTRNG struct intr_map_data_acpi { struct intr_map_data hdr; u_int irq; u_int pol; u_int trig; }; #endif /* Track device (/dev/{apm,apmctl} and /dev/acpi) notification status. */ struct apm_clone_data { STAILQ_ENTRY(apm_clone_data) entries; struct cdev *cdev; int flags; #define ACPI_EVF_NONE 0 /* /dev/apm semantics */ #define ACPI_EVF_DEVD 1 /* /dev/acpi is handled via devd(8) */ #define ACPI_EVF_WRITE 2 /* Device instance is opened writable. */ int notify_status; #define APM_EV_NONE 0 /* Device not yet aware of pending sleep. */ #define APM_EV_NOTIFIED 1 /* Device saw next sleep state. */ #define APM_EV_ACKED 2 /* Device agreed sleep can occur. */ struct acpi_softc *acpi_sc; struct selinfo sel_read; }; #define ACPI_PRW_MAX_POWERRES 8 struct acpi_prw_data { ACPI_HANDLE gpe_handle; int gpe_bit; int lowest_wake; ACPI_OBJECT power_res[ACPI_PRW_MAX_POWERRES]; int power_res_count; }; /* Flags for each device defined in the AML namespace. */ #define ACPI_FLAG_WAKE_ENABLED 0x1 /* Macros for extracting parts of a PCI address from an _ADR value. */ #define ACPI_ADR_PCI_SLOT(adr) (((adr) & 0xffff0000) >> 16) #define ACPI_ADR_PCI_FUNC(adr) ((adr) & 0xffff) /* * Entry points to ACPI from above are global functions defined in this * file, sysctls, and I/O on the control device. Entry points from below * are interrupts (the SCI), notifies, task queue threads, and the thermal * zone polling thread. * * ACPI tables and global shared data are protected by a global lock * (acpi_mutex). * * Each ACPI device can have its own driver-specific mutex for protecting * shared access to local data. The ACPI_LOCK macros handle mutexes. * * Drivers that need to serialize access to functions (e.g., to route * interrupts, get/set control paths, etc.) should use the sx lock macros * (ACPI_SERIAL). * * ACPI-CA handles its own locking and should not be called with locks held. * * The most complicated path is: * GPE -> EC runs _Qxx -> _Qxx reads EC space -> GPE */ extern struct mtx acpi_mutex; #define ACPI_LOCK(sys) mtx_lock(&sys##_mutex) #define ACPI_UNLOCK(sys) mtx_unlock(&sys##_mutex) #define ACPI_LOCK_ASSERT(sys) mtx_assert(&sys##_mutex, MA_OWNED); #define ACPI_LOCK_DECL(sys, name) \ static struct mtx sys##_mutex; \ MTX_SYSINIT(sys##_mutex, &sys##_mutex, name, MTX_DEF) #define ACPI_SERIAL_BEGIN(sys) sx_xlock(&sys##_sxlock) #define ACPI_SERIAL_END(sys) sx_xunlock(&sys##_sxlock) #define ACPI_SERIAL_ASSERT(sys) sx_assert(&sys##_sxlock, SX_XLOCKED); #define ACPI_SERIAL_DECL(sys, name) \ static struct sx sys##_sxlock; \ SX_SYSINIT(sys##_sxlock, &sys##_sxlock, name) /* * ACPI CA does not define layers for non-ACPI CA drivers. * We define some here within the range provided. */ #define ACPI_AC_ADAPTER 0x00010000 #define ACPI_BATTERY 0x00020000 #define ACPI_BUS 0x00040000 #define ACPI_BUTTON 0x00080000 #define ACPI_EC 0x00100000 #define ACPI_FAN 0x00200000 #define ACPI_POWERRES 0x00400000 #define ACPI_PROCESSOR 0x00800000 #define ACPI_THERMAL 0x01000000 #define ACPI_TIMER 0x02000000 #define ACPI_OEM 0x04000000 /* * Constants for different interrupt models used with acpi_SetIntrModel(). */ #define ACPI_INTR_PIC 0 #define ACPI_INTR_APIC 1 #define ACPI_INTR_SAPIC 2 /* * Various features and capabilities for the acpi_get_features() method. * In particular, these are used for the ACPI 3.0 _PDC and _OSC methods. * See the Intel document titled "Intel Processor Vendor-Specific ACPI", * number 302223-007. */ #define ACPI_CAP_PERF_MSRS (1 << 0) /* Intel SpeedStep PERF_CTL MSRs */ #define ACPI_CAP_C1_IO_HALT (1 << 1) /* Intel C1 "IO then halt" sequence */ #define ACPI_CAP_THR_MSRS (1 << 2) /* Intel OnDemand throttling MSRs */ #define ACPI_CAP_SMP_SAME (1 << 3) /* MP C1, Px, and Tx (all the same) */ #define ACPI_CAP_SMP_SAME_C3 (1 << 4) /* MP C2 and C3 (all the same) */ #define ACPI_CAP_SMP_DIFF_PX (1 << 5) /* MP Px (different, using _PSD) */ #define ACPI_CAP_SMP_DIFF_CX (1 << 6) /* MP Cx (different, using _CSD) */ #define ACPI_CAP_SMP_DIFF_TX (1 << 7) /* MP Tx (different, using _TSD) */ #define ACPI_CAP_SMP_C1_NATIVE (1 << 8) /* MP C1 support other than halt */ #define ACPI_CAP_SMP_C3_NATIVE (1 << 9) /* MP C2 and C3 support */ #define ACPI_CAP_PX_HW_COORD (1 << 11) /* Intel P-state HW coordination */ #define ACPI_CAP_INTR_CPPC (1 << 12) /* Native Interrupt Handling for Collaborative Processor Performance Control notifications */ #define ACPI_CAP_HW_DUTY_C (1 << 13) /* Hardware Duty Cycling */ /* * Quirk flags. * * ACPI_Q_BROKEN: Disables all ACPI support. * ACPI_Q_TIMER: Disables support for the ACPI timer. * ACPI_Q_MADT_IRQ0: Specifies that ISA IRQ 0 is wired up to pin 0 of the * first APIC and that the MADT should force that by ignoring the PC-AT * compatible flag and ignoring overrides that redirect IRQ 0 to pin 2. */ extern int acpi_quirks; #define ACPI_Q_OK 0 #define ACPI_Q_BROKEN (1 << 0) #define ACPI_Q_TIMER (1 << 1) #define ACPI_Q_MADT_IRQ0 (1 << 2) /* * Note that the low ivar values are reserved to provide * interface compatibility with ISA drivers which can also * attach to ACPI. */ #define ACPI_IVAR_HANDLE 0x100 #define ACPI_IVAR_UNUSED 0x101 /* Unused/reserved. */ #define ACPI_IVAR_PRIVATE 0x102 #define ACPI_IVAR_FLAGS 0x103 /* * Accessor functions for our ivars. Default value for BUS_READ_IVAR is * (type) 0. The accessor functions don't check return values. */ #define __ACPI_BUS_ACCESSOR(varp, var, ivarp, ivar, type) \ \ static __inline type varp ## _get_ ## var(device_t dev) \ { \ uintptr_t v = 0; \ BUS_READ_IVAR(device_get_parent(dev), dev, \ ivarp ## _IVAR_ ## ivar, &v); \ return ((type) v); \ } \ \ static __inline void varp ## _set_ ## var(device_t dev, type t) \ { \ uintptr_t v = (uintptr_t) t; \ BUS_WRITE_IVAR(device_get_parent(dev), dev, \ ivarp ## _IVAR_ ## ivar, v); \ } __ACPI_BUS_ACCESSOR(acpi, handle, ACPI, HANDLE, ACPI_HANDLE) __ACPI_BUS_ACCESSOR(acpi, private, ACPI, PRIVATE, void *) __ACPI_BUS_ACCESSOR(acpi, flags, ACPI, FLAGS, int) void acpi_fake_objhandler(ACPI_HANDLE h, void *data); static __inline device_t acpi_get_device(ACPI_HANDLE handle) { void *dev = NULL; AcpiGetData(handle, acpi_fake_objhandler, &dev); return ((device_t)dev); } static __inline ACPI_OBJECT_TYPE acpi_get_type(device_t dev) { ACPI_HANDLE h; ACPI_OBJECT_TYPE t; if ((h = acpi_get_handle(dev)) == NULL) return (ACPI_TYPE_NOT_FOUND); if (ACPI_FAILURE(AcpiGetType(h, &t))) return (ACPI_TYPE_NOT_FOUND); return (t); } /* Find the difference between two PM tick counts. */ static __inline uint32_t acpi_TimerDelta(uint32_t end, uint32_t start) { if (end < start && (AcpiGbl_FADT.Flags & ACPI_FADT_32BIT_TIMER) == 0) end |= 0x01000000; return (end - start); } #ifdef ACPI_DEBUGGER void acpi_EnterDebugger(void); #endif #ifdef ACPI_DEBUG #include #define STEP(x) do {printf x, printf("\n"); cngetc();} while (0) #else #define STEP(x) #endif #define ACPI_VPRINT(dev, acpi_sc, x...) do { \ if (acpi_get_verbose(acpi_sc)) \ device_printf(dev, x); \ } while (0) /* Values for the first status word returned by _OSC. */ #define ACPI_OSC_FAILURE (1 << 1) #define ACPI_OSC_BAD_UUID (1 << 2) #define ACPI_OSC_BAD_REVISION (1 << 3) #define ACPI_OSC_CAPS_MASKED (1 << 4) #define ACPI_DEVINFO_PRESENT(x, flags) \ (((x) & (flags)) == (flags)) #define ACPI_DEVICE_PRESENT(x) \ ACPI_DEVINFO_PRESENT(x, ACPI_STA_DEVICE_PRESENT | \ ACPI_STA_DEVICE_FUNCTIONING) #define ACPI_BATTERY_PRESENT(x) \ ACPI_DEVINFO_PRESENT(x, ACPI_STA_DEVICE_PRESENT | \ ACPI_STA_DEVICE_FUNCTIONING | ACPI_STA_BATTERY_PRESENT) /* Callback function type for walking subtables within a table. */ typedef void acpi_subtable_handler(ACPI_SUBTABLE_HEADER *, void *); BOOLEAN acpi_DeviceIsPresent(device_t dev); BOOLEAN acpi_BatteryIsPresent(device_t dev); ACPI_STATUS acpi_GetHandleInScope(ACPI_HANDLE parent, char *path, ACPI_HANDLE *result); ACPI_BUFFER *acpi_AllocBuffer(int size); ACPI_STATUS acpi_ConvertBufferToInteger(ACPI_BUFFER *bufp, UINT32 *number); ACPI_STATUS acpi_GetInteger(ACPI_HANDLE handle, char *path, UINT32 *number); ACPI_STATUS acpi_SetInteger(ACPI_HANDLE handle, char *path, UINT32 number); ACPI_STATUS acpi_ForeachPackageObject(ACPI_OBJECT *obj, void (*func)(ACPI_OBJECT *comp, void *arg), void *arg); ACPI_STATUS acpi_FindIndexedResource(ACPI_BUFFER *buf, int index, ACPI_RESOURCE **resp); ACPI_STATUS acpi_AppendBufferResource(ACPI_BUFFER *buf, ACPI_RESOURCE *res); UINT8 acpi_DSMQuery(ACPI_HANDLE h, uint8_t *uuid, int revision); ACPI_STATUS acpi_EvaluateDSM(ACPI_HANDLE handle, uint8_t *uuid, int revision, uint64_t function, union acpi_object *package, ACPI_BUFFER *out_buf); ACPI_STATUS acpi_EvaluateOSC(ACPI_HANDLE handle, uint8_t *uuid, int revision, int count, uint32_t *caps_in, uint32_t *caps_out, bool query); ACPI_STATUS acpi_OverrideInterruptLevel(UINT32 InterruptNumber); ACPI_STATUS acpi_SetIntrModel(int model); int acpi_ReqSleepState(struct acpi_softc *sc, int state); int acpi_AckSleepState(struct apm_clone_data *clone, int error); ACPI_STATUS acpi_SetSleepState(struct acpi_softc *sc, int state); int acpi_wake_set_enable(device_t dev, int enable); int acpi_parse_prw(ACPI_HANDLE h, struct acpi_prw_data *prw); ACPI_STATUS acpi_Startup(void); void acpi_UserNotify(const char *subsystem, ACPI_HANDLE h, uint8_t notify); int acpi_bus_alloc_gas(device_t dev, int *type, int *rid, ACPI_GENERIC_ADDRESS *gas, struct resource **res, u_int flags); void acpi_walk_subtables(void *first, void *end, acpi_subtable_handler *handler, void *arg); BOOLEAN acpi_has_hid(ACPI_HANDLE handle); BOOLEAN acpi_MatchHid(ACPI_HANDLE h, const char *hid); struct acpi_parse_resource_set { void (*set_init)(device_t dev, void *arg, void **context); void (*set_done)(device_t dev, void *context); void (*set_ioport)(device_t dev, void *context, uint64_t base, uint64_t length); void (*set_iorange)(device_t dev, void *context, uint64_t low, uint64_t high, uint64_t length, uint64_t align); void (*set_memory)(device_t dev, void *context, uint64_t base, uint64_t length); void (*set_memoryrange)(device_t dev, void *context, uint64_t low, uint64_t high, uint64_t length, uint64_t align); void (*set_irq)(device_t dev, void *context, uint8_t *irq, int count, int trig, int pol); void (*set_ext_irq)(device_t dev, void *context, uint32_t *irq, int count, int trig, int pol); void (*set_drq)(device_t dev, void *context, uint8_t *drq, int count); void (*set_start_dependent)(device_t dev, void *context, int preference); void (*set_end_dependent)(device_t dev, void *context); }; extern struct acpi_parse_resource_set acpi_res_parse_set; int acpi_identify(void); void acpi_config_intr(device_t dev, ACPI_RESOURCE *res); #ifdef INTRNG int acpi_map_intr(device_t dev, u_int irq, ACPI_HANDLE handle); #endif ACPI_STATUS acpi_lookup_irq_resource(device_t dev, int rid, struct resource *res, ACPI_RESOURCE *acpi_res); ACPI_STATUS acpi_parse_resources(device_t dev, ACPI_HANDLE handle, struct acpi_parse_resource_set *set, void *arg); struct resource *acpi_alloc_sysres(device_t child, int type, int *rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); /* ACPI event handling */ UINT32 acpi_event_power_button_sleep(void *context); UINT32 acpi_event_power_button_wake(void *context); UINT32 acpi_event_sleep_button_sleep(void *context); UINT32 acpi_event_sleep_button_wake(void *context); #define ACPI_EVENT_PRI_FIRST 0 #define ACPI_EVENT_PRI_DEFAULT 10000 #define ACPI_EVENT_PRI_LAST 20000 typedef void (*acpi_event_handler_t)(void *, int); EVENTHANDLER_DECLARE(acpi_sleep_event, acpi_event_handler_t); EVENTHANDLER_DECLARE(acpi_wakeup_event, acpi_event_handler_t); /* Device power control. */ ACPI_STATUS acpi_pwr_wake_enable(ACPI_HANDLE consumer, int enable); ACPI_STATUS acpi_pwr_switch_consumer(ACPI_HANDLE consumer, int state); int acpi_device_pwr_for_sleep(device_t bus, device_t dev, int *dstate); int acpi_set_powerstate(device_t child, int state); /* APM emulation */ void acpi_apm_init(struct acpi_softc *); /* Misc. */ static __inline struct acpi_softc * acpi_device_get_parent_softc(device_t child) { device_t parent; parent = device_get_parent(child); if (parent == NULL) return (NULL); return (device_get_softc(parent)); } static __inline int acpi_get_verbose(struct acpi_softc *sc) { if (sc) return (sc->acpi_verbose); return (0); } char *acpi_name(ACPI_HANDLE handle); int acpi_avoid(ACPI_HANDLE handle); int acpi_disabled(char *subsys); int acpi_machdep_init(device_t dev); void acpi_install_wakeup_handler(struct acpi_softc *sc); int acpi_sleep_machdep(struct acpi_softc *sc, int state); int acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, int intr_enabled); int acpi_table_quirks(int *quirks); int acpi_machdep_quirks(int *quirks); int acpi_pnpinfo_str(ACPI_HANDLE handle, char *buf, size_t buflen); uint32_t hpet_get_uid(device_t dev); /* Battery Abstraction. */ struct acpi_battinfo; int acpi_battery_register(device_t dev); int acpi_battery_remove(device_t dev); int acpi_battery_get_units(void); int acpi_battery_get_info_expire(void); int acpi_battery_bst_valid(struct acpi_bst *bst); int acpi_battery_bix_valid(struct acpi_bix *bix); int acpi_battery_get_battinfo(device_t dev, struct acpi_battinfo *info); /* Embedded controller. */ void acpi_ec_ecdt_probe(device_t); /* AC adapter interface. */ int acpi_acad_get_acline(int *); /* Package manipulation convenience functions. */ #define ACPI_PKG_VALID(pkg, size) \ ((pkg) != NULL && (pkg)->Type == ACPI_TYPE_PACKAGE && \ (pkg)->Package.Count >= (size)) #define ACPI_PKG_VALID_EQ(pkg, size) \ ((pkg) != NULL && (pkg)->Type == ACPI_TYPE_PACKAGE && \ (pkg)->Package.Count == (size)) int acpi_PkgInt(ACPI_OBJECT *res, int idx, UINT64 *dst); int acpi_PkgInt32(ACPI_OBJECT *res, int idx, uint32_t *dst); int acpi_PkgInt16(ACPI_OBJECT *res, int idx, uint16_t *dst); int acpi_PkgStr(ACPI_OBJECT *res, int idx, void *dst, size_t size); int acpi_PkgGas(device_t dev, ACPI_OBJECT *res, int idx, int *type, int *rid, struct resource **dst, u_int flags); int acpi_PkgFFH_IntelCpu(ACPI_OBJECT *res, int idx, int *vendor, int *class, uint64_t *address, int *accsize); ACPI_HANDLE acpi_GetReference(ACPI_HANDLE scope, ACPI_OBJECT *obj); /* * Base level for BUS_ADD_CHILD. Special devices are added at orders less * than this, and normal devices at or above this level. This keeps the * probe order sorted so that things like sysresource are available before * their children need them. */ #define ACPI_DEV_BASE_ORDER 100 /* Default maximum number of tasks to enqueue. */ #ifndef ACPI_MAX_TASKS #define ACPI_MAX_TASKS MAX(32, MAXCPU * 4) #endif /* Default number of task queue threads to start. */ #ifndef ACPI_MAX_THREADS #define ACPI_MAX_THREADS 3 #endif /* Use the device logging level for ktr(4). */ #define KTR_ACPI KTR_DEV SYSCTL_DECL(_debug_acpi); /* * Parse and use proximity information in SRAT and SLIT. */ int acpi_pxm_init(int ncpus, vm_paddr_t maxphys); void acpi_pxm_parse_tables(void); void acpi_pxm_set_mem_locality(void); void acpi_pxm_set_cpu_locality(void); int acpi_pxm_get_cpu_locality(int apic_id); -void acpi_pxm_free(void); /* * Map a PXM to a VM domain. * * Returns the VM domain ID if found, or -1 if not found / invalid. */ int acpi_map_pxm_to_vm_domainid(int pxm); int acpi_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize, cpuset_t *cpuset); int acpi_get_domain(device_t dev, device_t child, int *domain); #ifdef __aarch64__ /* * ARM specific ACPI interfaces, relating to IORT table. */ int acpi_iort_map_pci_msi(u_int seg, u_int rid, u_int *xref, u_int *devid); int acpi_iort_its_lookup(u_int its_id, u_int *xref, int *pxm); #endif #endif /* _KERNEL */ #endif /* !_ACPIVAR_H_ */ Index: stable/12/sys/i386/i386/mp_machdep.c =================================================================== --- stable/12/sys/i386/i386/mp_machdep.c (revision 361333) +++ stable/12/sys/i386/i386/mp_machdep.c (revision 361334) @@ -1,464 +1,466 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1996, by Steve Passe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif #endif /* not lint */ #include #include #include #include /* cngetc() */ #include #ifdef GPROF #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define WARMBOOT_TARGET 0 #define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467) #define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469) #define CMOS_REG (0x70) #define CMOS_DATA (0x71) #define BIOS_RESET (0x0f) #define BIOS_WARM (0x0a) /* * this code MUST be enabled here and in mpboot.s. * it follows the very early stages of AP boot by placing values in CMOS ram. * it NORMALLY will never be needed and thus the primitive method for enabling. * #define CHECK_POINTS */ #if defined(CHECK_POINTS) #define CHECK_READ(A) (outb(CMOS_REG, (A)), inb(CMOS_DATA)) #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D))) #define CHECK_INIT(D); \ CHECK_WRITE(0x34, (D)); \ CHECK_WRITE(0x35, (D)); \ CHECK_WRITE(0x36, (D)); \ CHECK_WRITE(0x37, (D)); \ CHECK_WRITE(0x38, (D)); \ CHECK_WRITE(0x39, (D)); #define CHECK_PRINT(S); \ printf("%s: %d, %d, %d, %d, %d, %d\n", \ (S), \ CHECK_READ(0x34), \ CHECK_READ(0x35), \ CHECK_READ(0x36), \ CHECK_READ(0x37), \ CHECK_READ(0x38), \ CHECK_READ(0x39)); #else /* CHECK_POINTS */ #define CHECK_INIT(D) #define CHECK_PRINT(S) #define CHECK_WRITE(A, D) #endif /* CHECK_POINTS */ /* * Local data and functions. */ static void install_ap_tramp(void); static int start_all_aps(void); static int start_ap(int apic_id); static char *ap_copyout_buf; static char *ap_tramp_stack_base; /* * Initialize the IPI handlers and start up the AP's. */ void cpu_mp_start(void) { int i; /* Initialize the logical ID to APIC ID table. */ for (i = 0; i < MAXCPU; i++) { cpu_apic_ids[i] = -1; cpu_ipi_pending[i] = 0; } /* Install an inter-CPU IPI for TLB invalidation */ setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for cache invalidation. */ setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install generic inter-CPU IPI handler */ setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Install an inter-CPU IPI for CPU suspend/resume */ setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); cpu_info[boot_cpu_id].cpu_bsp = 1; } else KASSERT(boot_cpu_id == PCPU_GET(apic_id), ("BSP's APIC ID doesn't match boot_cpu_id")); /* Probe logical/physical core configuration. */ topo_probe(); assign_cpu_ids(); /* Start each Application Processor */ start_all_aps(); set_interrupt_apic_ids(); + + acpi_pxm_set_cpu_locality(); } /* * AP CPU's call this to initialize themselves. */ void init_secondary(void) { struct pcpu *pc; struct i386tss *common_tssp; struct region_descriptor r_gdt, r_idt; int gsel_tss, myid, x; u_int cr0; /* bootAP is set in start_ap() to our ID. */ myid = bootAP; /* Update microcode before doing anything else. */ ucode_load_ap(myid); /* Get per-cpu data */ pc = &__pcpu[myid]; /* prime data page for it to use */ pcpu_init(pc, myid, sizeof(struct pcpu)); dpcpu_init(dpcpu, myid); pc->pc_apic_id = cpu_apic_ids[myid]; pc->pc_prvspace = pc; pc->pc_curthread = 0; pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid]; fix_cpuid(); gdt_segs[GPRIV_SEL].ssd_base = (int)pc; gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp; gdt_segs[GLDT_SEL].ssd_base = (int)ldt; for (x = 0; x < NGDT; x++) { ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd); } r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) &gdt[myid * NGDT]; lgdt(&r_gdt); /* does magic intra-segment return */ r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1; r_idt.rd_base = (int)idt; lidt(&r_idt); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ - VM86_STACK_SPACE); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; common_tssp->tss_esp0 = PCPU_GET(trampstk); common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); common_tssp->tss_ioopt = sizeof(struct i386tss) << 16; PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); ltr(gsel_tss); PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd); PCPU_SET(copyout_buf, ap_copyout_buf); /* * Set to a known state: * Set by mpboot.s: CR0_PG, CR0_PE * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM */ cr0 = rcr0(); cr0 &= ~(CR0_CD | CR0_NW | CR0_EM); load_cr0(cr0); CHECK_WRITE(0x38, 5); /* signal our startup to the BSP. */ mp_naps++; CHECK_WRITE(0x39, 6); /* Spin until the BSP releases the AP's. */ while (atomic_load_acq_int(&aps_ready) == 0) ia32_pause(); /* BSP may have changed PTD while we were waiting */ invltlb(); #if defined(I586_CPU) && !defined(NO_F00F_HACK) lidt(&r_idt); #endif init_secondary_tail(); } /* * start each AP in our list */ #define TMPMAP_START 1 static int start_all_aps(void) { u_char mpbiosreason; u_int32_t mpbioswarmvec; int apic_id, cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); /* Remap lowest 1MB */ IdlePTD[0] = IdlePTD[1]; load_cr3(rcr3()); /* invalidate TLB */ /* install the AP 1st level boot code */ install_ap_tramp(); /* save the current value of the warm-start vector */ mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF); outb(CMOS_REG, BIOS_RESET); mpbiosreason = inb(CMOS_DATA); /* take advantage of the P==V mapping for PTD[0] for AP boot */ /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; /* allocate and set up a boot stack data page */ bootstacks[cpu] = (char *)kmem_malloc(kstack_pages * PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO); /* setup a vector to our boot code */ *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 4; bootAP = cpu; ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT); ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT); /* attempt to start the Application Processor */ CHECK_INIT(99); /* setup checkpoints */ if (!start_ap(apic_id)) { printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); CHECK_PRINT("trace"); /* show checkpoints */ /* better panic as the AP may be running loose */ printf("panic y/n? [y] "); if (cngetc() != 'n') panic("bye-bye"); } CHECK_PRINT("trace"); /* show checkpoints */ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* Unmap lowest 1MB again */ IdlePTD[0] = 0; load_cr3(rcr3()); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, mpbiosreason); /* number of APs actually started */ return mp_naps; } /* * load the 1st level AP boot code into base memory. */ /* targets for relocation */ extern void bigJump(void); extern void bootCodeSeg(void); extern void bootDataSeg(void); extern void MPentry(void); extern u_int MP_GDT; extern u_int mp_gdtbase; static void install_ap_tramp(void) { int x; int size = *(int *) ((u_long) & bootMP_size); vm_offset_t va = boot_address; u_char *src = (u_char *) ((u_long) bootMP); u_char *dst = (u_char *) va; u_int boot_base = (u_int) bootMP; u_int8_t *dst8; u_int16_t *dst16; u_int32_t *dst32; KASSERT (size <= PAGE_SIZE, ("'size' do not fit into PAGE_SIZE, as expected.")); pmap_kenter(va, boot_address); pmap_invalidate_page (kernel_pmap, va); for (x = 0; x < size; ++x) *dst++ = *src++; /* * modify addresses in code we just moved to basemem. unfortunately we * need fairly detailed info about mpboot.s for this to work. changes * to mpboot.s might require changes here. */ /* boot code is located in KERNEL space */ dst = (u_char *) va; /* modify the lgdt arg */ dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base)); *dst32 = boot_address + ((u_int) & MP_GDT - boot_base); /* modify the ljmp target for MPentry() */ dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1); *dst32 = (u_int)MPentry; /* modify the target for boot code segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; /* modify the target for boot data segment */ dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base)); dst8 = (u_int8_t *) (dst16 + 1); *dst16 = (u_int) boot_address & 0xffff; *dst8 = ((u_int) boot_address >> 16) & 0xff; } /* * This function starts the AP (application processor) identified * by the APIC ID 'physicalCpu'. It does quite a "song and dance" * to accomplish this. This is necessary because of the nuances * of the different hardware we might encounter. It isn't pretty, * but it seems to work. */ static int start_ap(int apic_id) { int vector, ms; int cpus; /* calculate the vector */ vector = (boot_address >> 12) & 0xff; /* used as a watchpoint to signal AP startup */ cpus = mp_naps; ipi_startup(apic_id, vector); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return 1; /* return SUCCESS */ DELAY(1000); } return 0; /* return FAILURE */ } Index: stable/12/sys/x86/acpica/srat.c =================================================================== --- stable/12/sys/x86/acpica/srat.c (revision 361333) +++ stable/12/sys/x86/acpica/srat.c (revision 361334) @@ -1,68 +1,59 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include #include #include #include #include #include #include #if MAXMEMDOM > 1 static void parse_acpi_tables(void *dummy) { acpi_pxm_init(max_apic_id + 1, cpu_getmaxphyaddr()); acpi_pxm_parse_tables(); acpi_pxm_set_mem_locality(); } SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables, NULL); -static void -srat_set_cpus(void *dummy) -{ - - acpi_pxm_set_cpu_locality(); - acpi_pxm_free(); -} -SYSINIT(srat_set_cpus, SI_SUB_CPU, SI_ORDER_ANY, srat_set_cpus, NULL); - #endif /* MAXMEMDOM > 1 */ Index: stable/12 =================================================================== --- stable/12 (revision 361333) +++ stable/12 (revision 361334) Property changes on: stable/12 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r361033