diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -255,6 +255,8 @@
 {
 	uint64_t msr;
 	uint32_t cr4;
+	u_int r[4];
+	int use_invlpg_pmap;
 
 	cr4 = rcr4();
 	if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
@@ -317,6 +319,21 @@
 	if ((amd_feature & AMDID_RDTSCP) != 0 ||
 	    (cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
 		wrmsr(MSR_TSC_AUX, cpu_auxmsr());
+
+	if (cpu_high >= 0x1a) {
+		cpuid_count(0x1a, 0, r);
+		if ((r[0] & CPUID_HYBRID_CORE_MASK) ==
+		    CPUID_HYBRID_SMALL_CORE) {
+			PCPU_SET(small, 1);
+			if (pmap_pcid_enabled) {
+				use_invlpg_pmap = 1;
+				TUNABLE_INT_FETCH("vm.pmap.pcid_invlpg_bug",
+				    &use_invlpg_pmap);
+				PCPU_SET(invlpg_pmap, use_invlpg_pmap);
+				pmap_pcid_invlpg_bug = use_invlpg_pmap;
+			}
+		}
+	}
 }
 
 void
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -861,7 +861,7 @@
 	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	invlpg(smp_tlb_addr1);
+	pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);
 	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
 	    smp_tlb_pmap->pm_ucr3 != PMAP_NO_CR3 &&
 	    PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
@@ -886,7 +886,7 @@
 	(*ipi_invlpg_counts[PCPU_GET(cpuid)])++;
 #endif /* COUNT_IPIS */
 
-	invlpg(smp_tlb_addr1);
+	pmap_invlpg(smp_tlb_pmap, smp_tlb_addr1);
 	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
 	    (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 &&
 	    PCPU_GET(ucr3_load_mask) == PMAP_UCR3_NOMASK) {
@@ -932,7 +932,7 @@
 
 	addr = smp_tlb_addr1;
 	do {
-		invlpg(addr);
+		pmap_invlpg(smp_tlb_pmap, addr);
 		addr += PAGE_SIZE;
 	} while (addr < smp_tlb_addr2);
 	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
@@ -966,7 +966,7 @@
 
 	addr = smp_tlb_addr1;
 	do {
-		invlpg(addr);
+		pmap_invlpg(smp_tlb_pmap, addr);
 		addr += PAGE_SIZE;
 	} while (addr < smp_tlb_addr2);
 	if (smp_tlb_pmap == PCPU_GET(curpmap) &&
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -529,6 +529,10 @@
 int invpcid_works = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
     "Is the invpcid instruction available ?");
+int pmap_pcid_invlpg_bug = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_bug, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+    &pmap_pcid_invlpg_bug, 0,
+    "");
 
 int __read_frequently pti = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
@@ -2797,7 +2801,7 @@
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
-		invlpg(va);
+		pmap_invlpg(pmap, va);
 	else if ((newpde & PG_G) == 0)
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB
@@ -3136,7 +3140,7 @@
     vm_offset_t addr2 __unused)
 {
 	if (pmap == kernel_pmap) {
-		invlpg(va);
+		pmap_invlpg(kernel_pmap, va);
 	} else if (pmap == PCPU_GET(curpmap)) {
 		invlpg(va);
 		pmap_invalidate_page_cb(pmap, va);
@@ -3228,7 +3232,7 @@
 
 	if (pmap == kernel_pmap) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
-			invlpg(addr);
+			pmap_invlpg(kernel_pmap, addr);
 	} else if (pmap == PCPU_GET(curpmap)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
@@ -7674,7 +7678,7 @@
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
-	invlpg(va);
+	pmap_invlpg(kernel_pmap, va);
 	return ((void *)crashdumpmap);
 }
 
@@ -10377,7 +10381,7 @@
 				    page[i]->md.pat_mode, 0);
 				pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
 				    cache_bits);
-				invlpg(vaddr[i]);
+				pmap_invlpg(kernel_pmap, vaddr[i]);
 			}
 		}
 	}
@@ -10426,7 +10430,7 @@
 	if (addr != qframe)
 		return;
 	pte_store(vtopte(qframe), 0);
-	invlpg(qframe);
+	pmap_invlpg(kernel_pmap, qframe);
 	mtx_unlock_spin(&qframe_mtx);
 }
 
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -99,7 +99,9 @@
 	uint32_t pc_smp_tlb_gen;					\
 	u_int	pc_smp_tlb_op;						\
 	uint64_t pc_ucr3_load_mask;					\
-	char	__pad[2916]		/* pad to UMA_PCPU_ALLOC_SIZE */
+	u_int	pc_small;						\
+	u_int	pc_invlpg_pmap;						\
+	char	__pad[2908]		/* pad to UMA_PCPU_ALLOC_SIZE */
 
 #define	PC_DBREG_CMD_NONE	0
 #define	PC_DBREG_CMD_LOAD	1
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -431,6 +431,7 @@
 extern vm_paddr_t dmaplimit;
 extern int pmap_pcid_enabled;
 extern int invpcid_works;
+extern int pmap_pcid_invlpg_bug;
 
 #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
 #define	pmap_page_is_write_mapped(m)	(((m)->a.flags & PGA_WRITEABLE) != 0)
@@ -514,6 +515,24 @@
 	return (&pmap->pm_active);
 }
 
+/*
+ * It seems that AlderLake+ small cores have some microarchitectural
+ * bug, which could prevents the INVLPG instruction to flush all
+ * global TLB entries when PCID is enabled.  Work around it for now,
+ * by doing global invalidation on small cores instead of INVLPG.
+ */
+static __inline void
+pmap_invlpg(pmap_t pmap, vm_offset_t va)
+{
+	if (pmap == kernel_pmap && PCPU_GET(invlpg_pmap)) {
+		struct invpcid_descr d = { 0 };
+
+		invpcid(&d, INVPCID_CTXGLOB);
+	} else {
+		invlpg(va);
+	}
+}
+
 #endif /* _KERNEL */
 
 /* Return various clipped indexes for a given VA */
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -490,6 +490,12 @@
 #define	CPUID_STDEXT3_CORE_CAP		0x40000000
 #define	CPUID_STDEXT3_SSBD		0x80000000
 
+/* CPUID */
+#define	CPUID_HYBRID_ID lead 0x1a */
+#define	CPUID_HYBRID_CORE_MASK	0xff000000
+#define	CPUID_HYBRID_SMALL_CORE	0x20000000
+#define	CPUID_HYBRID_LARGE_CORE	0x40000000
+
 /* MSR IA32_ARCH_CAP(ABILITIES) bits */
 #define	IA32_ARCH_CAP_RDCL_NO	0x00000001
 #define	IA32_ARCH_CAP_IBRS_ALL	0x00000002