Page MenuHomeFreeBSD

D37770.id114381.diff
No OneTemporary

D37770.id114381.diff

diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -255,6 +255,8 @@
{
uint64_t msr;
uint32_t cr4;
+ u_int r[4];
+ int use_invlpg_pmap;
cr4 = rcr4();
if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
@@ -317,6 +319,21 @@
if ((amd_feature & AMDID_RDTSCP) != 0 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
wrmsr(MSR_TSC_AUX, cpu_auxmsr());
+
+ if (cpu_high >= 0x1a) {
+ cpuid_count(0x1a, 0, r);
+ if ((r[0] & CPUID_HYBRID_CORE_MASK) ==
+ CPUID_HYBRID_SMALL_CORE) {
+ PCPU_SET(small_core, 1);
+ if (pmap_pcid_enabled) {
+ use_invlpg_pmap = 1;
+ TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_bug",
+ &use_invlpg_pmap);
+ PCPU_SET(pcid_invlpg_bug, use_invlpg_pmap);
+ pmap_pcid_invlpg_workaround = use_invlpg_pmap;
+ }
+ }
+ }
}
void
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -861,7 +861,7 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- invlpg(smp_tlb_addr1);
+ pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
@@ -886,7 +886,7 @@
(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- invlpg(smp_tlb_addr1);
+ pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
(ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
@@ -931,10 +931,16 @@
#endif /* COUNT_IPIS */
addr = smp_tlb_addr1;
- do {
- invlpg(addr);
- addr += PAGE_SIZE;
- } while (addr < smp_tlb_addr2);
+ if (smp_tlb_pmap == kernel_pmap && PCPU_GET(pcid_invlpg_bug)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ do {
+ invlpg(addr);
+ addr += PAGE_SIZE;
+ } while (addr < smp_tlb_addr2);
+ }
if (smp_tlb_pmap == PCPU_GET(curpmap) &&
smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -529,6 +529,10 @@
int invpcid_works = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
"Is the invpcid instruction available ?");
+int pmap_pcid_invlpg_workaround = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &pmap_pcid_invlpg_workaround, 0,
+ "Enable small core PCID/INVLPG workaround");
int __read_frequently pti = 0;
SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
@@ -2797,7 +2801,7 @@
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
- invlpg(va);
+ pmap_invlpg(pmap, va);
else if ((newpde & PG_G) == 0)
/*
* Promotion: flush every 4KB page mapping from the TLB
@@ -3136,7 +3140,7 @@
vm_offset_t addr2 __unused)
{
if (pmap == kernel_pmap) {
- invlpg(va);
+ pmap_invlpg(kernel_pmap, va);
} else if (pmap == PCPU_GET(curpmap)) {
invlpg(va);
pmap_invalidate_page_cb(pmap, va);
@@ -3227,8 +3231,14 @@
vm_offset_t addr;
if (pmap == kernel_pmap) {
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
+ if (PCPU_GET(pcid_invlpg_bug)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ pmap_invlpg(kernel_pmap, addr);
+ }
} else if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
@@ -7674,7 +7684,7 @@
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
pmap_kenter(va, pa);
- invlpg(va);
+ pmap_invlpg(kernel_pmap, va);
return ((void *)crashdumpmap);
}
@@ -10377,7 +10387,7 @@
page[i]->md.pat_mode, 0);
pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
cache_bits);
- invlpg(vaddr[i]);
+ pmap_invlpg(kernel_pmap, vaddr[i]);
}
}
}
@@ -10426,7 +10436,13 @@
if (addr != qframe)
return;
pte_store(vtopte(qframe), 0);
+
+ /*
+ * pmap_quick_enter_page() doesn't set PG_G, so we can use
+ * INVLPG there.
+ */
invlpg(qframe);
+
mtx_unlock_spin(&qframe_mtx);
}
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -99,7 +99,9 @@
uint32_t pc_smp_tlb_gen; \
u_int pc_smp_tlb_op; \
uint64_t pc_ucr3_load_mask; \
- char __pad[2916] /* pad to UMA_PCPU_ALLOC_SIZE */
+ u_int pc_small_core; \
+ u_int pc_pcid_invlpg_bug; \
+ char __pad[2908] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -431,6 +431,7 @@
extern vm_paddr_t dmaplimit;
extern int pmap_pcid_enabled;
extern int invpcid_works;
+extern int pmap_pcid_invlpg_workaround;
#define pmap_page_get_memattr(m) ((vm_memattr_t)(m)->md.pat_mode)
#define pmap_page_is_write_mapped(m) (((m)->a.flags & PGA_WRITEABLE) != 0)
@@ -514,6 +515,24 @@
return (&pmap->pm_active);
}
+/*
+ * It seems that AlderLake+ small cores have some microarchitectural
+ * bug, which results in the INVLPG instruction failing to flush all
+ * global TLB entries when PCID is enabled. Work around it for now,
+ * by doing global invalidation on small cores instead of INVLPG.
+ */
+static __inline void
+pmap_invlpg(pmap_t pmap, vm_offset_t va)
+{
+ if (pmap == kernel_pmap && PCPU_GET(pcid_invlpg_bug)) {
+ struct invpcid_descr d = { 0 };
+
+ invpcid(&d, INVPCID_CTXGLOB);
+ } else {
+ invlpg(va);
+ }
+}
+
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -490,6 +490,12 @@
#define CPUID_STDEXT3_CORE_CAP 0x40000000
#define CPUID_STDEXT3_SSBD 0x80000000
+/* CPUID */
+#define CPUID_HYBRID_ID lead 0x1a */
+#define CPUID_HYBRID_CORE_MASK 0xff000000
+#define CPUID_HYBRID_SMALL_CORE 0x20000000
+#define CPUID_HYBRID_LARGE_CORE 0x40000000
+
/* MSR IA32_ARCH_CAP(ABILITIES) bits */
#define IA32_ARCH_CAP_RDCL_NO 0x00000001
#define IA32_ARCH_CAP_IBRS_ALL 0x00000002

File Metadata

Mime Type
text/plain
Expires
Wed, Nov 26, 9:00 PM (11 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26226237
Default Alt Text
D37770.id114381.diff (6 KB)

Event Timeline