diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1336,6 +1336,15 @@
 		pmap_pcid_enabled = 0;
 	}
 
+	TUNABLE_INT_FETCH("vm.pmap.invlpgb_works", &invlpgb_works);
+	if ((amd_extended_feature_extensions & AMDFEID_INVLPGB) != 0 &&
+	    invlpgb_works) {
+		invlpgb_works = 1;
+		invlpgb_maxcnt = cpu_procinfo3 & AMDID_INVLPGB_MAXCNT;
+	} else {
+		invlpgb_works = 0;
+	}
+
 	/*
 	 * Now we can do small core initialization, after the PCID
 	 * CPU features and user knobs are evaluated.
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -694,6 +694,12 @@
 void
 smp_masked_invltlb(pmap_t pmap, smp_invl_cb_t curcpu_cb)
 {
+	if (invlpgb_works && pmap == kernel_pmap) {
+		invlpgb(INVLPGB_GLOB, 0, 0);
+		tlbsync();
+		return;
+	}
+
 	smp_targeted_tlb_shootdown(pmap, 0, 0, curcpu_cb, invl_op_tlb);
 #ifdef COUNT_XINVLTLB_HITS
 	ipi_global++;
@@ -703,6 +709,12 @@
 void
 smp_masked_invlpg(vm_offset_t addr, pmap_t pmap, smp_invl_cb_t curcpu_cb)
 {
+	if (invlpgb_works && pmap == kernel_pmap) {
+		invlpgb(INVLPGB_GLOB | INVLPGB_VA | addr, 0, 0);
+		tlbsync();
+		return;
+	}
+
 	smp_targeted_tlb_shootdown(pmap, addr, 0, curcpu_cb, invl_op_pg);
 #ifdef COUNT_XINVLTLB_HITS
 	ipi_page++;
@@ -713,6 +725,36 @@
 smp_masked_invlpg_range(vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap,
     smp_invl_cb_t curcpu_cb)
 {
+	if (invlpgb_works && pmap == kernel_pmap) {
+		vm_offset_t va;
+		uint64_t cnt, total;
+
+		total = atop(addr2 - addr1);
+		for (va = addr1; total > 0;) {
+			if ((va & PDRMASK) != 0 || total < NPDEPG) {
+				cnt = atop((va & PDRMASK) - va);
+				if (cnt > total)
+					cnt = total;
+				if (cnt > invlpgb_maxcnt)
+					cnt = invlpgb_maxcnt;
+				invlpgb(INVLPGB_GLOB | INVLPGB_VA | va, 0,
+				    cnt - 1);
+				va += ptoa(cnt);
+				total -= cnt;
+			} else {
+				cnt = total / NPDEPG;
+				if (cnt > invlpgb_maxcnt)
+					cnt = invlpgb_maxcnt;
+				invlpgb(INVLPGB_GLOB | INVLPGB_VA | va, 0,
+				    INVLPGB_2M_CNT | (cnt - 1));
+				va += cnt << PDRSHIFT;
+				total -= cnt << NPDEPGSHIFT;
+			}
+		}
+		tlbsync();
+		return;
+	}
+
 	smp_targeted_tlb_shootdown(pmap, addr1, addr2, curcpu_cb,
 	    invl_op_pgrng);
 #ifdef COUNT_XINVLTLB_HITS
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -550,6 +550,10 @@
 int invpcid_works = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
     "Is the invpcid instruction available ?");
+int invlpgb_works = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, invlpgb_works, CTLFLAG_RD, &invlpgb_works, 0,
+    "Is the invlpgb instruction available ?");
+int invlpgb_maxcnt;
 int pmap_pcid_invlpg_workaround = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -529,6 +529,29 @@
 	    : : "r" (d), "r" ((u_long)type) : "memory");
 }
 
+#define	INVLPGB_VA		0x0001
+#define	INVLPGB_PCID		0x0002
+#define	INVLPGB_ASID		0x0004
+#define	INVLPGB_GLOB		0x0008
+#define	INVLPGB_FIN		0x0010
+#define	INVLPGB_NEST		0x0020
+
+#define	INVLPGB_DESCR(asid, pcid)	(((pcid) << 16) | (asid))
+
+#define	INVLPGB_2M_CNT		(1u << 31)
+
+static __inline void
+invlpgb(uint64_t rax, uint32_t edx, uint32_t ecx)
+{
+	__asm __volatile("invlpgb" : : "a" (rax), "d" (edx), "c" (ecx));
+}
+
+static __inline void
+tlbsync(void)
+{
+	__asm __volatile("tlbsync");
+}
+
 static __inline u_short
 rfs(void)
 {
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -424,6 +424,8 @@
 extern vm_paddr_t dmaplimit;
 extern int pmap_pcid_enabled;
 extern int invpcid_works;
+extern int invlpgb_works;
+extern int invlpgb_maxcnt;
 extern int pmap_pcid_invlpg_workaround;
 extern int pmap_pcid_invlpg_workaround_uena;