diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -255,6 +255,8 @@ { uint64_t msr; uint32_t cr4; + u_int r[4]; + int use_invlpg_pmap; cr4 = rcr4(); if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { @@ -317,6 +319,21 @@ if ((amd_feature & AMDID_RDTSCP) != 0 || (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0) wrmsr(MSR_TSC_AUX, cpu_auxmsr()); + + if (cpu_high >= 0x1a) { + cpuid_count(0x1a, 0, r); + if ((r[0] & CPUID_HYBRID_CORE_MASK) == + CPUID_HYBRID_SMALL_CORE) { + PCPU_SET(small, 1); + if (pmap_pcid_enabled) { + use_invlpg_pmap = 1; + TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_bug", + &use_invlpg_pmap); + PCPU_SET(invlpg_pmap, use_invlpg_pmap); + pmap_pcid_invlpg_bug = use_invlpg_pmap; + } + } + } } void diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -861,7 +861,7 @@ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - invlpg(smp_tlb_addr1); + pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1); if (smp_tlb_pmap == PCPU_GET(curpmap) && smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 && PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { @@ -886,7 +886,7 @@ (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - invlpg(smp_tlb_addr1); + pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 && PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) { @@ -932,7 +932,7 @@ addr = smp_tlb_addr1; do { - invlpg(addr); + pmap_invlpg(smp_tlb_pmap, addr); addr += PAGE_SIZE; } while (addr < smp_tlb_addr2); if (smp_tlb_pmap == PCPU_GET(curpmap) && @@ -966,7 +966,7 @@ addr = smp_tlb_addr1; do { - invlpg(addr); + pmap_invlpg(smp_tlb_pmap, addr); addr += PAGE_SIZE; } while (addr < smp_tlb_addr2); if (smp_tlb_pmap == PCPU_GET(curpmap) && diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -529,6 +529,10 @@ int invpcid_works = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, "Is the invpcid instruction available ?"); +int pmap_pcid_invlpg_bug = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_bug, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &pmap_pcid_invlpg_bug, 0, + ""); int __read_frequently pti = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, @@ -2797,7 +2801,7 @@ if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ - invlpg(va); + pmap_invlpg(pmap, va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB @@ -3136,7 +3140,7 @@ vm_offset_t addr2 __unused) { if (pmap == kernel_pmap) { - invlpg(va); + pmap_invlpg(kernel_pmap, va); } else if (pmap == PCPU_GET(curpmap)) { invlpg(va); pmap_invalidate_page_cb(pmap, va); @@ -3228,7 +3232,7 @@ if (pmap == kernel_pmap) { for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); + pmap_invlpg(kernel_pmap, addr); } else if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); @@ -7674,7 +7678,7 @@ va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); - invlpg(va); + pmap_invlpg(kernel_pmap, va); return ((void *)crashdumpmap); } @@ -10377,7 +10381,7 @@ page[i]->md.pat_mode, 0); pte_store(pte, paddr | X86_PG_RW | X86_PG_V | cache_bits); - invlpg(vaddr[i]); + pmap_invlpg(kernel_pmap, vaddr[i]); } } } @@ -10426,7 +10430,7 @@ if (addr != qframe) return; pte_store(vtopte(qframe), 0); - invlpg(qframe); + pmap_invlpg(kernel_pmap, qframe); mtx_unlock_spin(&qframe_mtx); } diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -99,7 +99,9 @@ uint32_t pc_smp_tlb_gen; \ u_int pc_smp_tlb_op; \ uint64_t pc_ucr3_load_mask; \ - char __pad[2916] /* pad to UMA_PCPU_ALLOC_SIZE */ + u_int pc_small; \ + u_int pc_invlpg_pmap; \ + char __pad[2908] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -431,6 +431,7 @@ extern vm_paddr_t dmaplimit; extern int pmap_pcid_enabled; extern int invpcid_works; +extern int pmap_pcid_invlpg_bug; #define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode) #define pmap_page_is_write_mapped(m) (((m)->a.flags & PGA_WRITEABLE) != 0) @@ -514,6 +515,24 @@ return (&pmap->pm_active); } +/* + * It seems that AlderLake+ small cores have some microarchitectural + * bug, which could prevents the INVLPG instruction to flush all + * global TLB entries when PCID is enabled. Work around it for now, + * by doing global invalidation on small cores instead of INVLPG. + */ +static __inline void +pmap_invlpg(pmap_t pmap, vm_offset_t va) +{ + if (pmap == kernel_pmap && PCPU_GET(invlpg_pmap)) { + struct invpcid_descr d = { 0 }; + + invpcid(&d, INVPCID_CTXGLOB); + } else { + invlpg(va); + } +} + #endif /* _KERNEL */ /* Return various clipped indexes for a given VA */ diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -490,6 +490,12 @@ #define CPUID_STDEXT3_CORE_CAP 0x40000000 #define CPUID_STDEXT3_SSBD 0x80000000 +/* CPUID */ +#define CPUID_HYBRID_ID lead 0x1a */ +#define CPUID_HYBRID_CORE_MASK 0xff000000 +#define CPUID_HYBRID_SMALL_CORE 0x20000000 +#define CPUID_HYBRID_LARGE_CORE 0x40000000 + /* MSR IA32_ARCH_CAP(ABILITIES) bits */ #define IA32_ARCH_CAP_RDCL_NO 0x00000001 #define IA32_ARCH_CAP_IBRS_ALL 0x00000002