Index: head/sys/powerpc/aim/mmu_oea64.h
===================================================================
--- head/sys/powerpc/aim/mmu_oea64.h
+++ head/sys/powerpc/aim/mmu_oea64.h
@@ -82,12 +82,18 @@
 int64_t		moea64_pte_unset(struct pvo_entry *);
 int64_t		moea64_pte_clear(struct pvo_entry *, uint64_t);
 int64_t		moea64_pte_synch(struct pvo_entry *);
+int64_t		moea64_pte_insert_sp(struct pvo_entry *);
+int64_t		moea64_pte_unset_sp(struct pvo_entry *);
+int64_t		moea64_pte_replace_sp(struct pvo_entry *);
 
 typedef int64_t	(*moea64_pte_replace_t)(struct pvo_entry *, int);
 typedef int64_t	(*moea64_pte_insert_t)(struct pvo_entry *);
 typedef int64_t	(*moea64_pte_unset_t)(struct pvo_entry *);
 typedef int64_t	(*moea64_pte_clear_t)(struct pvo_entry *, uint64_t);
 typedef int64_t	(*moea64_pte_synch_t)(struct pvo_entry *);
+typedef int64_t	(*moea64_pte_insert_sp_t)(struct pvo_entry *);
+typedef int64_t	(*moea64_pte_unset_sp_t)(struct pvo_entry *);
+typedef int64_t	(*moea64_pte_replace_sp_t)(struct pvo_entry *);
 
 struct moea64_funcs {
 	moea64_pte_replace_t	pte_replace;
@@ -95,6 +101,9 @@
 	moea64_pte_unset_t	pte_unset;
 	moea64_pte_clear_t	pte_clear;
 	moea64_pte_synch_t	pte_synch;
+	moea64_pte_insert_sp_t	pte_insert_sp;
+	moea64_pte_unset_sp_t	pte_unset_sp;
+	moea64_pte_replace_sp_t	pte_replace_sp;
 };
 
 extern struct moea64_funcs *moea64_ops;
@@ -128,5 +137,6 @@
 extern u_long		moea64_pteg_count;
 extern u_long		moea64_pteg_mask;
 extern int		n_slbs;
+extern bool		moea64_has_lp_4k_16m;
 
 #endif /* _POWERPC_AIM_MMU_OEA64_H */
Index: head/sys/powerpc/aim/mmu_oea64.c
===================================================================
--- head/sys/powerpc/aim/mmu_oea64.c
+++ head/sys/powerpc/aim/mmu_oea64.c
@@ -83,6 +83,7 @@
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_dumpset.h>
+#include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/_inttypes.h>
@@ -111,9 +112,6 @@
 #define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
 #define	VSID_HASH_MASK		0x0000007fffffffffULL
 
-/* Get physical address from PVO. */
-#define	PVO_PADDR(pvo)		((pvo)->pvo_pte.pa & LPTE_RPGN)
-
 /*
  * Locking semantics:
  *
@@ -146,6 +144,48 @@
 #define PV_PAGE_UNLOCK(m)	PV_UNLOCK(VM_PAGE_TO_PHYS(m))
 #define PV_PAGE_LOCKASSERT(m)	PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
 
+/* Superpage PV lock */
+
+#define	PV_LOCK_SIZE		(1<<PDRSHIFT)
+
+static __always_inline void
+moea64_sp_pv_lock(vm_paddr_t pa)
+{
+	vm_paddr_t pa_end;
+
+	/* Note: breaking when pa_end is reached to avoid overflows */
+	pa_end = pa + (HPT_SP_SIZE - PV_LOCK_SIZE);
+	for (;;) {
+		mtx_lock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
+		if (pa == pa_end)
+			break;
+		pa += PV_LOCK_SIZE;
+	}
+}
+
+static __always_inline void
+moea64_sp_pv_unlock(vm_paddr_t pa)
+{
+	vm_paddr_t pa_end;
+
+	/* Note: breaking when pa_end is reached to avoid overflows */
+	pa_end = pa;
+	pa += HPT_SP_SIZE - PV_LOCK_SIZE;
+	for (;;) {
+		mtx_unlock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
+		if (pa == pa_end)
+			break;
+		pa -= PV_LOCK_SIZE;
+	}
+}
+
+#define	SP_PV_LOCK_ALIGNED(pa)		moea64_sp_pv_lock(pa)
+#define	SP_PV_UNLOCK_ALIGNED(pa)	moea64_sp_pv_unlock(pa)
+#define	SP_PV_LOCK(pa)			moea64_sp_pv_lock((pa) & ~HPT_SP_MASK)
+#define	SP_PV_UNLOCK(pa)		moea64_sp_pv_unlock((pa) & ~HPT_SP_MASK)
+#define	SP_PV_PAGE_LOCK(m)		SP_PV_LOCK(VM_PAGE_TO_PHYS(m))
+#define	SP_PV_PAGE_UNLOCK(m)		SP_PV_UNLOCK(VM_PAGE_TO_PHYS(m))
+
 struct ofw_map {
 	cell_t	om_va;
 	cell_t	om_len;
@@ -234,6 +274,7 @@
 uint64_t 	moea64_large_page_mask = 0;
 uint64_t	moea64_large_page_size = 0;
 int		moea64_large_page_shift = 0;
+bool		moea64_has_lp_4k_16m = false;
 
 /*
  * PVO calls.
@@ -255,8 +296,97 @@
 static void		moea64_syncicache(pmap_t pmap, vm_offset_t va,
 			    vm_paddr_t pa, vm_size_t sz);
 static void		moea64_pmap_init_qpages(void);
+static void		moea64_remove_locked(pmap_t, vm_offset_t,
+			    vm_offset_t, struct pvo_dlist *);
 
 /*
+ * Superpages data and routines.
+ */
+
+/*
+ * PVO flags (in vaddr) that must match for promotion to succeed.
+ * Note that protection bits are checked separately, as they reside in
+ * another field.
+ */
+#define	PVO_FLAGS_PROMOTE	(PVO_WIRED | PVO_MANAGED | PVO_PTEGIDX_VALID)
+
+#define	PVO_IS_SP(pvo)		(((pvo)->pvo_vaddr & PVO_LARGE) && \
+				 (pvo)->pvo_pmap != kernel_pmap)
+
+/* Get physical address from PVO. */
+#define	PVO_PADDR(pvo)		moea64_pvo_paddr(pvo)
+
+/* MD page flag indicating that the page is a superpage. */
+#define	MDPG_ATTR_SP		0x40000000
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0,
+    "VM/pmap parameters");
+
+static int superpages_enabled = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN,
+    &superpages_enabled, 0, "Enable support for transparent superpages");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, sp, CTLFLAG_RD, 0,
+    "SP page mapping counters");
+
+static u_long sp_demotions;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, demotions, CTLFLAG_RD,
+    &sp_demotions, 0, "SP page demotions");
+
+static u_long sp_mappings;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, mappings, CTLFLAG_RD,
+    &sp_mappings, 0, "SP page mappings");
+
+static u_long sp_p_failures;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_failures, CTLFLAG_RD,
+    &sp_p_failures, 0, "SP page promotion failures");
+
+static u_long sp_p_fail_pa;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_pa, CTLFLAG_RD,
+    &sp_p_fail_pa, 0, "SP page promotion failure: PAs don't match");
+
+static u_long sp_p_fail_flags;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_flags, CTLFLAG_RD,
+    &sp_p_fail_flags, 0, "SP page promotion failure: page flags don't match");
+
+static u_long sp_p_fail_prot;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_prot, CTLFLAG_RD,
+    &sp_p_fail_prot, 0,
+    "SP page promotion failure: page protections don't match");
+
+static u_long sp_p_fail_wimg;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_wimg, CTLFLAG_RD,
+    &sp_p_fail_wimg, 0, "SP page promotion failure: WIMG bits don't match");
+
+static u_long sp_promotions;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, promotions, CTLFLAG_RD,
+    &sp_promotions, 0, "SP page promotions");
+
+static bool moea64_ps_enabled(pmap_t);
+static void moea64_align_superpage(vm_object_t, vm_ooffset_t,
+    vm_offset_t *, vm_size_t);
+
+static int moea64_sp_enter(pmap_t pmap, vm_offset_t va,
+    vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind);
+static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp,
+    struct pvo_dlist *tofree);
+
+static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m);
+static void moea64_sp_demote_aligned(struct pvo_entry *sp);
+static void moea64_sp_demote(struct pvo_entry *pvo);
+
+static struct pvo_entry *moea64_sp_unwire(struct pvo_entry *sp);
+static struct pvo_entry *moea64_sp_protect(struct pvo_entry *sp,
+    vm_prot_t prot);
+
+static int64_t moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit);
+static int64_t moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m,
+    uint64_t ptebit);
+
+static __inline bool moea64_sp_pvo_in_range(struct pvo_entry *pvo,
+    vm_offset_t sva, vm_offset_t eva);
+
+/*
  * Kernel MMU interface
  */
 void moea64_clear_modify(vm_page_t);
@@ -362,6 +492,8 @@
 #ifdef __powerpc64__
 	.page_array_startup = moea64_page_array_startup,
 #endif
+	.ps_enabled = moea64_ps_enabled,
+	.align_superpage = moea64_align_superpage,
 
 	/* Internal interfaces */
 	.mapdev = moea64_mapdev,
@@ -381,6 +513,26 @@
 
 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods);
 
+/*
+ * Get physical address from PVO.
+ *
+ * For superpages, the lower bits are not stored on pvo_pte.pa and must be
+ * obtained from VA.
+ */
+static __always_inline vm_paddr_t
+moea64_pvo_paddr(struct pvo_entry *pvo)
+{
+	vm_paddr_t pa;
+
+	pa = (pvo)->pvo_pte.pa & LPTE_RPGN;
+
+	if (PVO_IS_SP(pvo)) {
+		pa &= ~HPT_SP_MASK; /* This is needed to clear LPTE_LP bits. */
+		pa |= PVO_VADDR(pvo) & HPT_SP_MASK;
+	}
+	return (pa);
+}
+
 static struct pvo_head *
 vm_page_to_pvoh(vm_page_t m)
 {
@@ -428,8 +580,10 @@
 	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
 	    | (vsid << 16);
 
-	shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift :
-	    ADDR_PIDX_SHFT;
+	if (pmap == kernel_pmap && (pvo->pvo_vaddr & PVO_LARGE) != 0)
+		shift = moea64_large_page_shift;
+	else
+		shift = ADDR_PIDX_SHFT;
 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
 	pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
 }
@@ -773,6 +927,9 @@
 	vm_paddr_t	kernelphysstart, kernelphysend;
 	int		rm_pavail;
 
+	/* Level 0 reservations consist of 4096 pages (16MB superpage). */
+	vm_level_0_order = 12;
+
 #ifndef __powerpc64__
 	/* We don't have a direct map since there is no BAT */
 	hw_direct_map = 0;
@@ -1204,6 +1361,17 @@
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva;
 	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		if (PVO_IS_SP(pvo)) {
+			if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+				pvo = moea64_sp_unwire(pvo);
+				continue;
+			} else {
+				CTR1(KTR_PMAP, "%s: demote before unwire",
+				    __func__);
+				moea64_sp_demote(pvo);
+			}
+		}
+
 		if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
 			panic("moea64_unwire: pvo %p is missing PVO_WIRED",
 			    pvo);
@@ -1489,10 +1657,11 @@
 moea64_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, u_int flags, int8_t psind)
 {
-	struct		pvo_entry *pvo, *oldpvo;
+	struct		pvo_entry *pvo, *oldpvo, *tpvo;
 	struct		pvo_head *pvo_head;
 	uint64_t	pte_lo;
 	int		error;
+	vm_paddr_t	pa;
 
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0)
@@ -1501,14 +1670,18 @@
 			VM_OBJECT_ASSERT_LOCKED(m->object);
 	}
 
+	if (psind > 0)
+		return (moea64_sp_enter(pmap, va, m, prot, flags, psind));
+
 	pvo = alloc_pvo_entry(0);
 	if (pvo == NULL)
 		return (KERN_RESOURCE_SHORTAGE);
 	pvo->pvo_pmap = NULL; /* to be filled in later */
 	pvo->pvo_pte.prot = prot;
 
-	pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
-	pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo;
+	pa = VM_PAGE_TO_PHYS(m);
+	pte_lo = moea64_calc_wimg(pa, pmap_page_get_memattr(m));
+	pvo->pvo_pte.pa = pa | pte_lo;
 
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		pvo->pvo_vaddr |= PVO_WIRED;
@@ -1520,10 +1693,20 @@
 		pvo->pvo_vaddr |= PVO_MANAGED;
 	}
 
-	PV_PAGE_LOCK(m);
+	PV_LOCK(pa);
 	PMAP_LOCK(pmap);
 	if (pvo->pvo_pmap == NULL)
 		init_pvo_entry(pvo, pmap, va);
+
+	if (moea64_ps_enabled(pmap) &&
+	    (tpvo = moea64_pvo_find_va(pmap, va & ~HPT_SP_MASK)) != NULL &&
+	    PVO_IS_SP(tpvo)) {
+		/* Demote SP before entering a regular page */
+		CTR2(KTR_PMAP, "%s: demote before enter: va=%#jx",
+		    __func__, (uintmax_t)va);
+		moea64_sp_demote_aligned(tpvo);
+	}
+
 	if (prot & VM_PROT_WRITE)
 		if (pmap_bootstrapped &&
 		    (m->oflags & VPO_UNMANAGED) == 0)
@@ -1544,9 +1727,10 @@
 			}
 
 			/* Then just clean up and go home */
-			PV_PAGE_UNLOCK(m);
 			PMAP_UNLOCK(pmap);
+			PV_UNLOCK(pa);
 			free_pvo_entry(pvo);
+			pvo = NULL;
 			goto out;
 		} else {
 			/* Otherwise, need to kill it first */
@@ -1557,7 +1741,7 @@
 		}
 	}
 	PMAP_UNLOCK(pmap);
-	PV_PAGE_UNLOCK(m);
+	PV_UNLOCK(pa);
 
 	/* Free any dead pages */
 	if (error == EEXIST) {
@@ -1573,8 +1757,23 @@
 	if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 &&
 	    (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 		vm_page_aflag_set(m, PGA_EXECUTABLE);
-		moea64_syncicache(pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
+		moea64_syncicache(pmap, va, pa, PAGE_SIZE);
 	}
+
+	/*
+	 * Try to promote pages.
+	 *
+	 * If the VA of the entered page is not aligned with its PA,
+	 * don't try page promotion as it is not possible.
+	 * This reduces the number of promotion failures dramatically.
+	 */
+	if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
+	    (pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
+	    (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
+	    (m->flags & PG_FICTITIOUS) == 0 &&
+	    vm_reserv_level_iffullpop(m) == 0)
+		moea64_sp_promote(pmap, va, m);
+
 	return (KERN_SUCCESS);
 }
 
@@ -1633,15 +1832,25 @@
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
+	vm_offset_t va;
+	int8_t psind;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	m = m_start;
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
-		moea64_enter(pm, start + ptoa(diff), m, prot &
-		    (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
-		    PMAP_ENTER_QUICK_LOCKED, 0);
+		va = start + ptoa(diff);
+		if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end &&
+		    m->psind == 1 && moea64_ps_enabled(pm))
+			psind = 1;
+		else
+			psind = 0;
+		moea64_enter(pm, va, m, prot &
+		    (VM_PROT_READ | VM_PROT_EXECUTE),
+		    PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind);
+		if (psind == 1)
+			m = &m[HPT_SP_SIZE / PAGE_SIZE - 1];
 		m = TAILQ_NEXT(m, listq);
 	}
 }
@@ -1755,6 +1964,27 @@
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 
+	/*
+	 * Are large page mappings enabled?
+	 */
+	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
+	if (superpages_enabled) {
+		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+		    ("moea64_init: can't assign to pagesizes[1]"));
+
+		if (moea64_large_page_size == 0) {
+			printf("mmu_oea64: HW does not support large pages. "
+					"Disabling superpages...\n");
+			superpages_enabled = 0;
+		} else if (!moea64_has_lp_4k_16m) {
+			printf("mmu_oea64: "
+			    "HW does not support mixed 4KB/16MB page sizes. "
+			    "Disabling superpages...\n");
+			superpages_enabled = 0;
+		} else
+			pagesizes[1] = HPT_SP_SIZE;
+	}
+
 	if (!hw_direct_map) {
 		uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
 	}
@@ -1834,7 +2064,7 @@
 	vm_page_assert_busied(m);
 
 	if (!pmap_page_is_write_mapped(m))
-		return
+		return;
 
 	powerpc_sync();
 	PV_PAGE_LOCK(m);
@@ -1844,6 +2074,11 @@
 		PMAP_LOCK(pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD) &&
 		    (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
+			if (PVO_IS_SP(pvo)) {
+				CTR1(KTR_PMAP, "%s: demote before remwr",
+				    __func__);
+				moea64_sp_demote(pvo);
+			}
 			pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
 			ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
 			if (ret < 0)
@@ -1892,6 +2127,9 @@
 	pmap_t	pmap;
 	uint64_t lo;
 
+	CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
+	    __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
+
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
@@ -1904,6 +2142,11 @@
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if (!(pvo->pvo_vaddr & PVO_DEAD)) {
+			if (PVO_IS_SP(pvo)) {
+				CTR1(KTR_PMAP,
+				    "%s: demote before set_memattr", __func__);
+				moea64_sp_demote(pvo);
+			}
 			pvo->pvo_pte.pa &= ~LPTE_WIMG;
 			pvo->pvo_pte.pa |= lo;
 			refchg = moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
@@ -2356,7 +2599,7 @@
 moea64_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
     vm_prot_t prot)
 {
-	struct	pvo_entry *pvo, *tpvo, key;
+	struct	pvo_entry *pvo, key;
 
 	CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
 	    sva, eva, prot);
@@ -2372,8 +2615,18 @@
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
-	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
-		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+	    pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		if (PVO_IS_SP(pvo)) {
+			if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+				pvo = moea64_sp_protect(pvo, prot);
+				continue;
+			} else {
+				CTR1(KTR_PMAP, "%s: demote before protect",
+				    __func__);
+				moea64_sp_demote(pvo);
+			}
+		}
 		moea64_pvo_protect(pm, pvo, prot);
 	}
 	PMAP_UNLOCK(pm);
@@ -2473,13 +2726,46 @@
 	}
 }
 
+static void
+moea64_remove_locked(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
+    struct pvo_dlist *tofree)
+{
+	struct pvo_entry *pvo, *tpvo, key;
+
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+	key.pvo_vaddr = sva;
+	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+		if (PVO_IS_SP(pvo)) {
+			if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+				tpvo = moea64_sp_remove(pvo, tofree);
+				continue;
+			} else {
+				CTR1(KTR_PMAP, "%s: demote before remove",
+				    __func__);
+				moea64_sp_demote(pvo);
+			}
+		}
+		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+
+		/*
+		 * For locking reasons, remove this from the page table and
+		 * pmap, but save delinking from the vm_page for a second
+		 * pass
+		 */
+		moea64_pvo_remove_from_pmap(pvo);
+		SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
+	}
+}
+
 /*
  * Remove the given range of addresses from the specified map.
  */
 void
 moea64_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
-	struct  pvo_entry *pvo, *tpvo, key;
+	struct pvo_entry *pvo;
 	struct pvo_dlist tofree;
 
 	/*
@@ -2488,23 +2774,9 @@
 	if (pm->pm_stats.resident_count == 0)
 		return;
 
-	key.pvo_vaddr = sva;
-
 	SLIST_INIT(&tofree);
-
 	PMAP_LOCK(pm);
-	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
-	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
-		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
-
-		/*
-		 * For locking reasons, remove this from the page table and
-		 * pmap, but save delinking from the vm_page for a second
-		 * pass
-		 */
-		moea64_pvo_remove_from_pmap(pvo);
-		SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink);
-	}
+	moea64_remove_locked(pm, sva, eva, &tofree);
 	PMAP_UNLOCK(pm);
 
 	while (!SLIST_EMPTY(&tofree)) {
@@ -2534,8 +2806,14 @@
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		wasdead = (pvo->pvo_vaddr & PVO_DEAD);
-		if (!wasdead)
+		if (!wasdead) {
+			if (PVO_IS_SP(pvo)) {
+				CTR1(KTR_PMAP, "%s: demote before remove_all",
+				    __func__);
+				moea64_sp_demote(pvo);
+			}
 			moea64_pvo_remove_from_pmap(pvo);
+		}
 		moea64_pvo_remove_from_page_locked(pvo, m);
 		if (!wasdead)
 			LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
@@ -2768,11 +3046,17 @@
 	struct	pvo_entry *pvo;
 	int64_t ret;
 	boolean_t rv;
+	vm_page_t sp;
 
 	/*
 	 * See if this bit is stored in the page already.
+	 *
+	 * For superpages, the bit is stored in the first vm page.
 	 */
-	if (m->md.mdpg_attrs & ptebit)
+	if ((m->md.mdpg_attrs & ptebit) != 0 ||
+	    ((sp = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK)) != NULL &&
+	     (sp->md.mdpg_attrs & (ptebit | MDPG_ATTR_SP)) ==
+	     (ptebit | MDPG_ATTR_SP)))
 		return (TRUE);
 
 	/*
@@ -2783,6 +3067,21 @@
 	powerpc_sync();
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
+		if (PVO_IS_SP(pvo)) {
+			ret = moea64_sp_query(pvo, ptebit);
+			/*
+			 * If SP was not demoted, check its REF/CHG bits here.
+			 */
+			if (ret != -1) {
+				if ((ret & ptebit) != 0) {
+					rv = TRUE;
+					break;
+				}
+				continue;
+			}
+			/* else, fallthrough */
+		}
+
 		ret = 0;
 
 		/*
@@ -2828,6 +3127,12 @@
 	count = 0;
 	PV_PAGE_LOCK(m);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
+		if (PVO_IS_SP(pvo)) {
+			if ((ret = moea64_sp_clear(pvo, m, ptebit)) != -1) {
+				count += ret;
+				continue;
+			}
+		}
 		ret = 0;
 
 		PMAP_LOCK(pvo->pvo_pmap);
@@ -3231,3 +3536,770 @@
 DEFINE_OEA64_IFUNC(int64_t, pte_clear, (struct pvo_entry *, uint64_t),
     moea64_null_method)
 DEFINE_OEA64_IFUNC(int64_t, pte_synch, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_insert_sp, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_unset_sp, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_replace_sp, (struct pvo_entry *), moea64_null_method)
+
+/* Superpage functions */
+
+/* MMU interface */
+
+static bool
+moea64_ps_enabled(pmap_t pmap)
+{
+	return (superpages_enabled);
+}
+
+static void
+moea64_align_superpage(vm_object_t object, vm_ooffset_t offset,
+    vm_offset_t *addr, vm_size_t size)
+{
+	vm_offset_t sp_offset;
+
+	if (size < HPT_SP_SIZE)
+		return;
+
+	CTR4(KTR_PMAP, "%s: offs=%#jx, addr=%p, size=%#jx",
+	    __func__, (uintmax_t)offset, addr, (uintmax_t)size);
+
+	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
+		offset += ptoa(object->pg_color);
+	sp_offset = offset & HPT_SP_MASK;
+	if (size - ((HPT_SP_SIZE - sp_offset) & HPT_SP_MASK) < HPT_SP_SIZE ||
+	    (*addr & HPT_SP_MASK) == sp_offset)
+		return;
+	if ((*addr & HPT_SP_MASK) < sp_offset)
+		*addr = (*addr & ~HPT_SP_MASK) + sp_offset;
+	else
+		*addr = ((*addr + HPT_SP_MASK) & ~HPT_SP_MASK) + sp_offset;
+}
+
+/* Helpers */
+
+static __inline void
+moea64_pvo_cleanup(struct pvo_dlist *tofree)
+{
+	struct pvo_entry *pvo;
+
+	/* clean up */
+	while (!SLIST_EMPTY(tofree)) {
+		pvo = SLIST_FIRST(tofree);
+		SLIST_REMOVE_HEAD(tofree, pvo_dlink);
+		if (pvo->pvo_vaddr & PVO_DEAD)
+			moea64_pvo_remove_from_page(pvo);
+		free_pvo_entry(pvo);
+	}
+}
+
+static __inline uint16_t
+pvo_to_vmpage_flags(struct pvo_entry *pvo)
+{
+	uint16_t flags;
+
+	flags = 0;
+	if ((pvo->pvo_pte.prot & VM_PROT_WRITE) != 0)
+		flags |= PGA_WRITEABLE;
+	if ((pvo->pvo_pte.prot & VM_PROT_EXECUTE) != 0)
+		flags |= PGA_EXECUTABLE;
+
+	return (flags);
+}
+
+/*
+ * Check if the given pvo and its superpage are in sva-eva range.
+ */
+static __inline bool
+moea64_sp_pvo_in_range(struct pvo_entry *pvo, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t spva;
+
+	spva = PVO_VADDR(pvo) & ~HPT_SP_MASK;
+	if (spva >= sva && spva + HPT_SP_SIZE <= eva) {
+		/*
+		 * Because this function is intended to be called from loops
+		 * that iterate over ordered pvo entries, if the condition
+		 * above is true then the pvo must be the first of its
+		 * superpage.
+		 */
+		KASSERT(PVO_VADDR(pvo) == spva,
+		    ("%s: unexpected unaligned superpage pvo", __func__));
+		return (true);
+	}
+	return (false);
+}
+
+/*
+ * Update vm about the REF/CHG bits if the superpage is managed and
+ * has (or had) write access.
+ */
+static void
+moea64_sp_refchg_process(struct pvo_entry *sp, vm_page_t m,
+    int64_t sp_refchg, vm_prot_t prot)
+{
+	vm_page_t m_end;
+	int64_t refchg;
+
+	if ((sp->pvo_vaddr & PVO_MANAGED) != 0 && (prot & VM_PROT_WRITE) != 0) {
+		for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++) {
+			refchg = sp_refchg |
+			    atomic_readandclear_32(&m->md.mdpg_attrs);
+			if (refchg & LPTE_CHG)
+				vm_page_dirty(m);
+			if (refchg & LPTE_REF)
+				vm_page_aflag_set(m, PGA_REFERENCED);
+		}
+	}
+}
+
+/* Superpage ops */
+
+static int
+moea64_sp_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    vm_prot_t prot, u_int flags, int8_t psind)
+{
+	struct pvo_entry *pvo, **pvos;
+	struct pvo_head *pvo_head;
+	vm_offset_t sva;
+	vm_page_t sm;
+	vm_paddr_t pa, spa;
+	bool sync;
+	struct pvo_dlist tofree;
+	int error, i;
+	uint16_t aflags;
+
+	KASSERT((va & HPT_SP_MASK) == 0, ("%s: va %#jx unaligned",
+	    __func__, (uintmax_t)va));
+	KASSERT(psind == 1, ("%s: invalid psind: %d", __func__, psind));
+	KASSERT(m->psind == 1, ("%s: invalid m->psind: %d",
+	    __func__, m->psind));
+	KASSERT(pmap != kernel_pmap,
+	    ("%s: function called with kernel pmap", __func__));
+
+	CTR5(KTR_PMAP, "%s: va=%#jx, pa=%#jx, prot=%#x, flags=%#x, psind=1",
+	    __func__, (uintmax_t)va, (uintmax_t)VM_PAGE_TO_PHYS(m),
+	    prot, flags);
+
+	SLIST_INIT(&tofree);
+
+	sva = va;
+	sm = m;
+	spa = pa = VM_PAGE_TO_PHYS(sm);
+
+	/* Try to allocate all PVOs first, to make failure handling easier. */
+	pvos = malloc(HPT_SP_PAGES * sizeof(struct pvo_entry *), M_TEMP,
+	    M_NOWAIT);
+	if (pvos == NULL) {
+		CTR1(KTR_PMAP, "%s: failed to alloc pvo array", __func__);
+		return (KERN_RESOURCE_SHORTAGE);
+	}
+
+	for (i = 0; i < HPT_SP_PAGES; i++) {
+		pvos[i] = alloc_pvo_entry(0);
+		if (pvos[i] == NULL) {
+			CTR1(KTR_PMAP, "%s: failed to alloc pvo", __func__);
+			for (i = i - 1; i >= 0; i--)
+				free_pvo_entry(pvos[i]);
+			free(pvos, M_TEMP);
+			return (KERN_RESOURCE_SHORTAGE);
+		}
+	}
+
+	SP_PV_LOCK_ALIGNED(spa);
+	PMAP_LOCK(pmap);
+
+	/* Note: moea64_remove_locked() also clears cached REF/CHG bits. */
+	moea64_remove_locked(pmap, va, va + HPT_SP_SIZE, &tofree);
+
+	/* Enter pages */
+	for (i = 0; i < HPT_SP_PAGES;
+	    i++, va += PAGE_SIZE, pa += PAGE_SIZE, m++) {
+		pvo = pvos[i];
+
+		pvo->pvo_pte.prot = prot;
+		pvo->pvo_pte.pa = (pa & ~LPTE_LP_MASK) | LPTE_LP_4K_16M |
+		    moea64_calc_wimg(pa, pmap_page_get_memattr(m));
+
+		if ((flags & PMAP_ENTER_WIRED) != 0)
+			pvo->pvo_vaddr |= PVO_WIRED;
+		pvo->pvo_vaddr |= PVO_LARGE;
+
+		if ((m->oflags & VPO_UNMANAGED) != 0)
+			pvo_head = NULL;
+		else {
+			pvo_head = &m->md.mdpg_pvoh;
+			pvo->pvo_vaddr |= PVO_MANAGED;
+		}
+
+		init_pvo_entry(pvo, pmap, va);
+
+		error = moea64_pvo_enter(pvo, pvo_head, NULL);
+		/*
+		 * All superpage PVOs were previously removed, so no errors
+		 * should occur while inserting the new ones.
+		 */
+		KASSERT(error == 0, ("%s: unexpected error "
+			    "when inserting superpage PVO: %d",
+			    __func__, error));
+	}
+
+	PMAP_UNLOCK(pmap);
+	SP_PV_UNLOCK_ALIGNED(spa);
+
+	sync = (sm->a.flags & PGA_EXECUTABLE) == 0;
+	/* Note: moea64_pvo_cleanup() also clears page prot. flags. */
+	moea64_pvo_cleanup(&tofree);
+	pvo = pvos[0];
+
+	/* Set vm page flags */
+	aflags = pvo_to_vmpage_flags(pvo);
+	if (aflags != 0)
+		for (m = sm; m < &sm[HPT_SP_PAGES]; m++)
+			vm_page_aflag_set(m, aflags);
+
+	/*
+	 * Flush the page from the instruction cache if this page is
+	 * mapped executable and cacheable.
+	 */
+	if (sync && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0)
+		moea64_syncicache(pmap, sva, spa, HPT_SP_SIZE);
+
+	atomic_add_long(&sp_mappings, 1);
+	CTR3(KTR_PMAP, "%s: SP success for va %#jx in pmap %p",
+	    __func__, (uintmax_t)sva, pmap);
+
+	free(pvos, M_TEMP);
+	return (KERN_SUCCESS);
+}
+
+static void
+moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	struct pvo_entry *first, *pvo;
+	vm_paddr_t pa, pa_end;
+	vm_offset_t sva, va_end;
+	int64_t sp_refchg;
+
+	/* This CTR may generate a lot of output. */
+	/* CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)va); */
+
+	va &= ~HPT_SP_MASK;
+	sva = va;
+	/* Get superpage */
+	pa = VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK;
+	m = PHYS_TO_VM_PAGE(pa);
+
+	PMAP_LOCK(pmap);
+
+	/*
+	 * Check if all pages meet promotion criteria.
+	 *
+	 * XXX In some cases the loop below may be executed for each or most
+	 * of the entered pages of a superpage, which can be expensive
+	 * (although it was not profiled) and need some optimization.
+	 *
+	 * Some cases where this seems to happen are:
+	 * - When a superpage is first entered read-only and later becomes
+	 *   read-write.
+	 * - When some of the superpage's virtual addresses map to previously
+	 *   wired/cached pages while others map to pages allocated from a
+	 *   different physical address range. A common scenario where this
+	 *   happens is when mmap'ing a file that is already present in FS
+	 *   block cache and doesn't fill a superpage.
+	 */
+	first = pvo = moea64_pvo_find_va(pmap, sva);
+	for (pa_end = pa + HPT_SP_SIZE;
+	    pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
+		if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+			CTR3(KTR_PMAP,
+			    "%s: NULL or dead PVO: pmap=%p, va=%#jx",
+			    __func__, pmap, (uintmax_t)va);
+			goto error;
+		}
+		if (PVO_PADDR(pvo) != pa) {
+			CTR5(KTR_PMAP, "%s: PAs don't match: "
+			    "pmap=%p, va=%#jx, pvo_pa=%#jx, exp_pa=%#jx",
+			    __func__, pmap, (uintmax_t)va,
+			    (uintmax_t)PVO_PADDR(pvo), (uintmax_t)pa);
+			atomic_add_long(&sp_p_fail_pa, 1);
+			goto error;
+		}
+		if ((first->pvo_vaddr & PVO_FLAGS_PROMOTE) !=
+		    (pvo->pvo_vaddr & PVO_FLAGS_PROMOTE)) {
+			CTR5(KTR_PMAP, "%s: PVO flags don't match: "
+			    "pmap=%p, va=%#jx, pvo_flags=%#jx, exp_flags=%#jx",
+			    __func__, pmap, (uintmax_t)va,
+			    (uintmax_t)(pvo->pvo_vaddr & PVO_FLAGS_PROMOTE),
+			    (uintmax_t)(first->pvo_vaddr & PVO_FLAGS_PROMOTE));
+			atomic_add_long(&sp_p_fail_flags, 1);
+			goto error;
+		}
+		if (first->pvo_pte.prot != pvo->pvo_pte.prot) {
+			CTR5(KTR_PMAP, "%s: PVO protections don't match: "
+			    "pmap=%p, va=%#jx, pvo_prot=%#x, exp_prot=%#x",
+			    __func__, pmap, (uintmax_t)va,
+			    pvo->pvo_pte.prot, first->pvo_pte.prot);
+			atomic_add_long(&sp_p_fail_prot, 1);
+			goto error;
+		}
+		if ((first->pvo_pte.pa & LPTE_WIMG) !=
+		    (pvo->pvo_pte.pa & LPTE_WIMG)) {
+			CTR5(KTR_PMAP, "%s: WIMG bits don't match: "
+			    "pmap=%p, va=%#jx, pvo_wimg=%#jx, exp_wimg=%#jx",
+			    __func__, pmap, (uintmax_t)va,
+			    (uintmax_t)(pvo->pvo_pte.pa & LPTE_WIMG),
+			    (uintmax_t)(first->pvo_pte.pa & LPTE_WIMG));
+			atomic_add_long(&sp_p_fail_wimg, 1);
+			goto error;
+		}
+
+		pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo);
+	}
+
+	/* All OK, promote. */
+
+	/*
+	 * Handle superpage REF/CHG bits. If REF or CHG is set in
+	 * any page, then it must be set in the superpage.
+	 *
+	 * Instead of querying each page, we take advantage of two facts:
+	 * 1- If a page is being promoted, it was referenced.
+	 * 2- If promoted pages are writable, they were modified.
+	 */
+	sp_refchg = LPTE_REF |
+	    ((first->pvo_pte.prot & VM_PROT_WRITE) != 0 ? LPTE_CHG : 0);
+
+	/* Promote pages */
+
+	for (pvo = first, va_end = PVO_VADDR(pvo) + HPT_SP_SIZE;
+	    pvo != NULL && PVO_VADDR(pvo) < va_end;
+	    pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+		pvo->pvo_pte.pa &= ~LPTE_LP_MASK;
+		pvo->pvo_pte.pa |= LPTE_LP_4K_16M;
+		pvo->pvo_vaddr |= PVO_LARGE;
+	}
+	moea64_pte_replace_sp(first);
+
+	/* Send REF/CHG bits to VM */
+	moea64_sp_refchg_process(first, m, sp_refchg, first->pvo_pte.prot);
+
+	/* Use first page to cache REF/CHG bits */
+	atomic_set_32(&m->md.mdpg_attrs, sp_refchg | MDPG_ATTR_SP);
+
+	PMAP_UNLOCK(pmap);
+
+	atomic_add_long(&sp_mappings, 1);
+	atomic_add_long(&sp_promotions, 1);
+	CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
+	    __func__, (uintmax_t)sva, pmap);
+	return;
+
+error:
+	atomic_add_long(&sp_p_failures, 1);
+	PMAP_UNLOCK(pmap);
+}
+
+static void
+moea64_sp_demote_aligned(struct pvo_entry *sp)
+{
+	struct pvo_entry *pvo;
+	vm_offset_t va, va_end;
+	vm_paddr_t pa;
+	vm_page_t m;
+	pmap_t pmap;
+	int64_t refchg;
+
+	CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+	pmap = sp->pvo_pmap;
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	pvo = sp;
+
+	/* Demote pages */
+
+	va = PVO_VADDR(pvo);
+	pa = PVO_PADDR(pvo);
+	m = PHYS_TO_VM_PAGE(pa);
+
+	for (pvo = sp, va_end = va + HPT_SP_SIZE;
+	    pvo != NULL && PVO_VADDR(pvo) < va_end;
+	    pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo),
+	    va += PAGE_SIZE, pa += PAGE_SIZE) {
+		KASSERT(pvo && PVO_VADDR(pvo) == va,
+		    ("%s: missing PVO for va %#jx", __func__, (uintmax_t)va));
+
+		pvo->pvo_vaddr &= ~PVO_LARGE;
+		pvo->pvo_pte.pa &= ~LPTE_RPGN;
+		pvo->pvo_pte.pa |= pa;
+
+	}
+	refchg = moea64_pte_replace_sp(sp);
+
+	/*
+	 * Clear SP flag
+	 *
+	 * XXX It is possible that another pmap has this page mapped as
+	 *     part of a superpage, but as the SP flag is used only for
+	 *     caching SP REF/CHG bits, that will be queried if not set
+	 *     in cache, it should be ok to clear it here.
+	 */
+	atomic_clear_32(&m->md.mdpg_attrs, MDPG_ATTR_SP);
+
+	/*
+	 * Handle superpage REF/CHG bits. A bit set in the superpage
+	 * means all pages should consider it set.
+	 */
+	moea64_sp_refchg_process(sp, m, refchg, sp->pvo_pte.prot);
+
+	atomic_add_long(&sp_demotions, 1);
+	CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
+	    __func__, (uintmax_t)PVO_VADDR(sp), pmap);
+}
+
+static void
+moea64_sp_demote(struct pvo_entry *pvo)
+{
+	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+	if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+		pvo = moea64_pvo_find_va(pvo->pvo_pmap,
+		    PVO_VADDR(pvo) & ~HPT_SP_MASK);
+		KASSERT(pvo != NULL, ("%s: missing PVO for va %#jx",
+		     __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+	}
+	moea64_sp_demote_aligned(pvo);
+}
+
+static struct pvo_entry *
+moea64_sp_unwire(struct pvo_entry *sp)
+{
+	struct pvo_entry *pvo, *prev;
+	vm_offset_t eva;
+	pmap_t pm;
+	int64_t ret, refchg;
+
+	CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+	pm = sp->pvo_pmap;
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+	eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+	refchg = 0;
+	for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
+			panic("%s: pvo %p is missing PVO_WIRED",
+			    __func__, pvo);
+		pvo->pvo_vaddr &= ~PVO_WIRED;
+
+		ret = moea64_pte_replace(pvo, 0 /* No invalidation */);
+		if (ret < 0)
+			refchg |= LPTE_CHG;
+		else
+			refchg |= ret;
+
+		pm->pm_stats.wired_count--;
+	}
+
+	/* Send REF/CHG bits to VM */
+	moea64_sp_refchg_process(sp, PHYS_TO_VM_PAGE(PVO_PADDR(sp)),
+	    refchg, sp->pvo_pte.prot);
+
+	return (prev);
+}
+
+static struct pvo_entry *
+moea64_sp_protect(struct pvo_entry *sp, vm_prot_t prot)
+{
+	struct pvo_entry *pvo, *prev;
+	vm_offset_t eva;
+	pmap_t pm;
+	vm_page_t m, m_end;
+	int64_t ret, refchg;
+	vm_prot_t oldprot;
+
+	CTR3(KTR_PMAP, "%s: va=%#jx, prot=%x",
+	    __func__, (uintmax_t)PVO_VADDR(sp), prot);
+
+	pm = sp->pvo_pmap;
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+	oldprot = sp->pvo_pte.prot;
+	m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+	KASSERT(m != NULL, ("%s: missing vm page for pa %#jx",
+	    __func__, (uintmax_t)PVO_PADDR(sp)));
+	eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+	refchg = 0;
+
+	for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		pvo->pvo_pte.prot = prot;
+		/*
+		 * If the PVO is in the page table, update mapping
+		 */
+		ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
+		if (ret < 0)
+			refchg |= LPTE_CHG;
+		else
+			refchg |= ret;
+	}
+
+	/* Send REF/CHG bits to VM */
+	moea64_sp_refchg_process(sp, m, refchg, oldprot);
+
+	/* Handle pages that became executable */
+	if ((m->a.flags & PGA_EXECUTABLE) == 0 &&
+	    (sp->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
+		if ((m->oflags & VPO_UNMANAGED) == 0)
+			for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++)
+				vm_page_aflag_set(m, PGA_EXECUTABLE);
+		moea64_syncicache(pm, PVO_VADDR(sp), PVO_PADDR(sp),
+		    HPT_SP_SIZE);
+	}
+
+	return (prev);
+}
+
+static struct pvo_entry *
+moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree)
+{
+	struct pvo_entry *pvo, *tpvo;
+	vm_offset_t eva;
+	pmap_t pm;
+
+	CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+	pm = sp->pvo_pmap;
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+	eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+	for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+
+		/*
+		 * For locking reasons, remove this from the page table and
+		 * pmap, but save delinking from the vm_page for a second
+		 * pass
+		 */
+		moea64_pvo_remove_from_pmap(pvo);
+		SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
+	}
+
+	/*
+	 * Clear SP bit
+	 *
+	 * XXX See comment in moea64_sp_demote_aligned() for why it's
+	 *     ok to always clear the SP bit on remove/demote.
+	 */
+	atomic_clear_32(&PHYS_TO_VM_PAGE(PVO_PADDR(sp))->md.mdpg_attrs,
+	    MDPG_ATTR_SP);
+
+	return (tpvo);
+}
+
+static int64_t
+moea64_sp_query_locked(struct pvo_entry *pvo, uint64_t ptebit)
+{
+	int64_t refchg, ret;
+	vm_offset_t eva;
+	vm_page_t m;
+	pmap_t pmap;
+	struct pvo_entry *sp;
+
+	pmap = pvo->pvo_pmap;
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	/* Get first SP PVO */
+	if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+		sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
+		KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
+		     __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+	} else
+		sp = pvo;
+	eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+
+	refchg = 0;
+	for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+		ret = moea64_pte_synch(pvo);
+		if (ret > 0) {
+			refchg |= ret & (LPTE_CHG | LPTE_REF);
+			if ((refchg & ptebit) != 0)
+				break;
+		}
+	}
+
+	/* Save results */
+	if (refchg != 0) {
+		m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+		atomic_set_32(&m->md.mdpg_attrs, refchg | MDPG_ATTR_SP);
+	}
+
+	return (refchg);
+}
+
+static int64_t
+moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit)
+{
+	int64_t refchg;
+	pmap_t pmap;
+
+	pmap = pvo->pvo_pmap;
+	PMAP_LOCK(pmap);
+
+	/*
+	 * Check if SP was demoted/removed before pmap lock was acquired.
+	 */
+	if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+		CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+		    __func__, (uintmax_t)PVO_PADDR(pvo));
+		PMAP_UNLOCK(pmap);
+		return (-1);
+	}
+
+	refchg = moea64_sp_query_locked(pvo, ptebit);
+	PMAP_UNLOCK(pmap);
+
+	CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
+	    __func__, (uintmax_t)PVO_VADDR(pvo),
+	    (uintmax_t)PVO_PADDR(pvo), (uintmax_t)refchg);
+
+	return (refchg);
+}
+
+static int64_t
+moea64_sp_pvo_clear(struct pvo_entry *pvo, uint64_t ptebit)
+{
+	int64_t refchg, ret;
+	pmap_t pmap;
+	struct pvo_entry *sp;
+	vm_offset_t eva;
+	vm_page_t m;
+
+	pmap = pvo->pvo_pmap;
+	PMAP_LOCK(pmap);
+
+	/*
+	 * Check if SP was demoted/removed before pmap lock was acquired.
+	 */
+	if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+		CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+		    __func__, (uintmax_t)PVO_PADDR(pvo));
+		PMAP_UNLOCK(pmap);
+		return (-1);
+	}
+
+	/* Get first SP PVO */
+	if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+		sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
+		KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
+		     __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+	} else
+		sp = pvo;
+	eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+
+	refchg = 0;
+	for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+		ret = moea64_pte_clear(pvo, ptebit);
+		if (ret > 0)
+			refchg |= ret & (LPTE_CHG | LPTE_REF);
+	}
+
+	m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+	atomic_clear_32(&m->md.mdpg_attrs, ptebit);
+	PMAP_UNLOCK(pmap);
+
+	CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
+	    __func__, (uintmax_t)PVO_VADDR(sp),
+	    (uintmax_t)PVO_PADDR(sp), (uintmax_t)refchg);
+
+	return (refchg);
+}
+
+static int64_t
+moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m, uint64_t ptebit)
+{
+	int64_t count, ret;
+	pmap_t pmap;
+
+	count = 0;
+	pmap = pvo->pvo_pmap;
+
+	/*
+	 * Since this reference bit is shared by 4096 4KB pages, it
+	 * should not be cleared every time it is tested. Apply a
+	 * simple "hash" function on the physical page number, the
+	 * virtual superpage number, and the pmap address to select
+	 * one 4KB page out of the 4096 on which testing the
+	 * reference bit will result in clearing that reference bit.
+	 * This function is designed to avoid the selection of the
+	 * same 4KB page for every 16MB page mapping.
+	 *
+	 * Always leave the reference bit of a wired mapping set, as
+	 * the current state of its reference bit won't affect page
+	 * replacement.
+	 */
+	if (ptebit == LPTE_REF && (((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) ^
+	    (PVO_VADDR(pvo) >> HPT_SP_SHIFT) ^ (uintptr_t)pmap) &
+	    (HPT_SP_PAGES - 1)) == 0 && (pvo->pvo_vaddr & PVO_WIRED) == 0) {
+		if ((ret = moea64_sp_pvo_clear(pvo, ptebit)) == -1)
+			return (-1);
+
+		if ((ret & ptebit) != 0)
+			count++;
+
+	/*
+	 * If this page was not selected by the hash function, then assume
+	 * its REF bit was set.
+	 */
+	} else if (ptebit == LPTE_REF) {
+		count++;
+
+	/*
+	 * To clear the CHG bit of a single SP page, first it must be demoted.
+	 * But if no CHG bit is set, no bit clear and thus no SP demotion is
+	 * needed.
+	 */
+	} else {
+		CTR4(KTR_PMAP, "%s: ptebit=%#jx, va=%#jx, pa=%#jx",
+		    __func__, (uintmax_t)ptebit, (uintmax_t)PVO_VADDR(pvo),
+		    (uintmax_t)PVO_PADDR(pvo));
+
+		PMAP_LOCK(pmap);
+
+		/*
+		 * Make sure SP wasn't demoted/removed before pmap lock
+		 * was acquired.
+		 */
+		if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+			CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+			    __func__, (uintmax_t)PVO_PADDR(pvo));
+			PMAP_UNLOCK(pmap);
+			return (-1);
+		}
+
+		ret = moea64_sp_query_locked(pvo, ptebit);
+		if ((ret & ptebit) != 0)
+			count++;
+		else {
+			PMAP_UNLOCK(pmap);
+			return (0);
+		}
+
+		moea64_sp_demote(pvo);
+		moea64_pte_clear(pvo, ptebit);
+
+		/*
+		 * Write protect the mapping to a single page so that a
+		 * subsequent write access may repromote.
+		 */
+		if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
+			moea64_pvo_protect(pmap, pvo,
+			    pvo->pvo_pte.prot & ~VM_PROT_WRITE);
+
+		PMAP_UNLOCK(pmap);
+	}
+
+	return (count);
+}
Index: head/sys/powerpc/aim/moea64_native.c
===================================================================
--- head/sys/powerpc/aim/moea64_native.c
+++ head/sys/powerpc/aim/moea64_native.c
@@ -132,11 +132,32 @@
 /* POWER9 only permits a 64k partition table size. */
 #define	PART_SIZE	0x10000
 
+/* Actual page sizes (to be used with tlbie, when L=0) */
+#define	AP_4K		0x00
+#define	AP_16M		0x80
+
+#define	LPTE_KERNEL_VSID_BIT	(KERNEL_VSID_BIT << \
+				(16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)))
+
+/* Abbreviated Virtual Address Page - high bits */
+#define	LPTE_AVA_PGNHI_MASK	0x0000000000000F80ULL
+#define	LPTE_AVA_PGNHI_SHIFT	7
+
+/* Effective Address Page - low bits */
+#define	EA_PAGELO_MASK		0x7ffULL
+#define	EA_PAGELO_SHIFT		11
+
 static bool moea64_crop_tlbie;
 static bool moea64_need_lock;
 
+/*
+ * The tlbie instruction has two forms: an old one used by PowerISA
+ * 2.03 and prior, and a newer one used by PowerISA 2.06 and later.
+ * We need to support both.
+ */
 static __inline void
-TLBIE(uint64_t vpn) {
+TLBIE(uint64_t vpn, uint64_t oldptehi)
+{
 #ifndef __powerpc64__
 	register_t vpn_hi, vpn_lo;
 	register_t msr;
@@ -153,12 +174,32 @@
 		while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
 		isync(); /* Flush instruction queue once lock acquired */
 
-		if (moea64_crop_tlbie)
+		if (moea64_crop_tlbie) {
 			vpn &= ~(0xffffULL << 48);
+#ifdef __powerpc64__
+			if ((oldptehi & LPTE_BIG) != 0)
+				__asm __volatile("tlbie %0, 1" :: "r"(vpn) :
+				    "memory");
+			else
+				__asm __volatile("tlbie %0, 0" :: "r"(vpn) :
+				    "memory");
+			__asm __volatile("eieio; tlbsync; ptesync" :::
+			    "memory");
+			goto done;
+#endif
+		}
 	}
 
 #ifdef __powerpc64__
 	/*
+	 * If this page has LPTE_BIG set and is from userspace, then
+	 * it must be a superpage with 4KB base/16MB actual page size.
+	 */
+	if ((oldptehi & LPTE_BIG) != 0 &&
+	    (oldptehi & LPTE_KERNEL_VSID_BIT) == 0)
+		vpn |= AP_16M;
+
+	/*
 	 * Explicitly clobber r0.  The tlbie instruction has two forms: an old
 	 * one used by PowerISA 2.03 and prior, and a newer one used by PowerISA
 	 * 2.06 (maybe 2.05?) and later.  We need to support both, and it just
@@ -168,7 +209,7 @@
 	 * in the newer form is in the same position as the L(page size) bit of
 	 * the old form, so a slong as RS is 0, we're good on both sides.
 	 */
-	__asm __volatile("li 0, 0 \n tlbie %0" :: "r"(vpn) : "r0", "memory");
+	__asm __volatile("li 0, 0 \n tlbie %0, 0" :: "r"(vpn) : "r0", "memory");
 	__asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
 #else
 	vpn_hi = (uint32_t)(vpn >> 32);
@@ -194,6 +235,7 @@
 	intr_restore(intr);
 #endif
 
+done:
 	/* No barriers or special ops -- taken care of by ptesync above */
 	if (need_lock)
 		tlbie_lock = 0;
@@ -224,6 +266,9 @@
 static int64_t	moea64_pte_clear_native(struct pvo_entry *, uint64_t);
 static int64_t	moea64_pte_replace_native(struct pvo_entry *, int);
 static int64_t	moea64_pte_unset_native(struct pvo_entry *);
+static int64_t	moea64_pte_insert_sp_native(struct pvo_entry *);
+static int64_t	moea64_pte_unset_sp_native(struct pvo_entry *);
+static int64_t	moea64_pte_replace_sp_native(struct pvo_entry *);
 
 /*
  * Utility routines.
@@ -245,10 +290,13 @@
 
 static struct moea64_funcs moea64_native_funcs = {
 	.pte_synch = moea64_pte_synch_native,
-	.pte_clear = moea64_pte_clear_native,	
-	.pte_unset = moea64_pte_unset_native,	
-	.pte_replace = moea64_pte_replace_native,	
-	.pte_insert = moea64_pte_insert_native,	
+	.pte_clear = moea64_pte_clear_native,
+	.pte_unset = moea64_pte_unset_native,
+	.pte_replace = moea64_pte_replace_native,
+	.pte_insert = moea64_pte_insert_native,
+	.pte_insert_sp = moea64_pte_insert_sp_native,
+	.pte_unset_sp = moea64_pte_unset_sp_native,
+	.pte_replace_sp = moea64_pte_replace_sp_native,
 };
 
 MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, oea64_mmu);
@@ -321,7 +369,7 @@
 		rw_runlock(&moea64_eviction_lock);
 
 		critical_enter();
-		TLBIE(pvo->pvo_vpn);
+		TLBIE(pvo->pvo_vpn, properpt.pte_hi);
 		critical_exit();
 	} else {
 		rw_runlock(&moea64_eviction_lock);
@@ -332,22 +380,11 @@
 	return (ptelo & (LPTE_REF | LPTE_CHG));
 }
 
-static int64_t
-moea64_pte_unset_native(struct pvo_entry *pvo)
+static __always_inline int64_t
+moea64_pte_unset_locked(volatile struct lpte *pt, uint64_t vpn)
 {
-	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
-	uint64_t ptelo, pvo_ptevpn;
+	uint64_t ptelo;
 
-	pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
-
-	rw_rlock(&moea64_eviction_lock);
-	if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) {
-		/* Evicted */
-		STAT_MOEA64(moea64_pte_overflow--);
-		rw_runlock(&moea64_eviction_lock);
-		return (-1);
-	}
-
 	/*
 	 * Invalidate the pte, briefly locking it to collect RC bits. No
 	 * atomics needed since this is protected against eviction by the lock.
@@ -356,11 +393,10 @@
 	critical_enter();
 	pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
 	PTESYNC();
-	TLBIE(pvo->pvo_vpn);
+	TLBIE(vpn, pt->pte_hi);
 	ptelo = be64toh(pt->pte_lo);
 	*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */
 	critical_exit();
-	rw_runlock(&moea64_eviction_lock);
 
 	/* Keep statistics */
 	STAT_MOEA64(moea64_pte_valid--);
@@ -369,6 +405,29 @@
 }
 
 static int64_t
+moea64_pte_unset_native(struct pvo_entry *pvo)
+{
+	volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
+	int64_t ret;
+	uint64_t pvo_ptevpn;
+
+	pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
+
+	rw_rlock(&moea64_eviction_lock);
+
+	if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) != pvo_ptevpn) {
+		/* Evicted */
+		STAT_MOEA64(moea64_pte_overflow--);
+		ret = -1;
+	} else
+		ret = moea64_pte_unset_locked(pt, pvo->pvo_vpn);
+
+	rw_runlock(&moea64_eviction_lock);
+
+	return (ret);
+}
+
+static int64_t
 moea64_pte_replace_inval_native(struct pvo_entry *pvo,
     volatile struct lpte *pt)
 {
@@ -394,7 +453,7 @@
 	critical_enter();
 	pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
 	PTESYNC();
-	TLBIE(pvo->pvo_vpn);
+	TLBIE(pvo->pvo_vpn, pt->pte_hi);
 	ptelo = be64toh(pt->pte_lo);
 	EIEIO();
 	pt->pte_lo = htobe64(properpt.pte_lo);
@@ -734,7 +793,7 @@
 		va |= (oldptehi & LPTE_AVPN_MASK) <<
 		    (ADDR_API_SHFT64 - ADDR_PIDX_SHFT);
 		PTESYNC();
-		TLBIE(va);
+		TLBIE(va, oldptehi);
 		STAT_MOEA64(moea64_pte_valid--);
 		STAT_MOEA64(moea64_pte_overflow++);
 	}
@@ -754,26 +813,18 @@
 	return (k);
 }
 
-static int64_t
-moea64_pte_insert_native(struct pvo_entry *pvo)
+static __always_inline int64_t
+moea64_pte_insert_locked(struct pvo_entry *pvo, struct lpte *insertpt,
+    uint64_t mask)
 {
-	struct lpte insertpt;
 	uintptr_t slot;
 
-	/* Initialize PTE */
-	moea64_pte_from_pvo(pvo, &insertpt);
-
-	/* Make sure further insertion is locked out during evictions */
-	rw_rlock(&moea64_eviction_lock);
-
 	/*
 	 * First try primary hash.
 	 */
-	pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
-	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
-	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+	slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
+	    mask | LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
-		rw_runlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
@@ -782,50 +833,52 @@
 	 * Now try secondary hash.
 	 */
 	pvo->pvo_vaddr ^= PVO_HID;
-	insertpt.pte_hi ^= LPTE_HID;
+	insertpt->pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
-	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
-	    LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+	slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
+	    mask | LPTE_WIRED | LPTE_LOCKED);
 	if (slot != -1) {
-		rw_runlock(&moea64_eviction_lock);
 		pvo->pvo_pte.slot = slot;
 		return (0);
 	}
 
-	/*
-	 * Out of luck. Find a PTE to sacrifice.
-	 */
+	return (-1);
+}
 
-	/* Lock out all insertions for a bit */
-	if (!rw_try_upgrade(&moea64_eviction_lock)) {
-		rw_runlock(&moea64_eviction_lock);
-		rw_wlock(&moea64_eviction_lock);
-	}
+static int64_t
+moea64_pte_insert_native(struct pvo_entry *pvo)
+{
+	struct lpte insertpt;
+	int64_t ret;
 
-	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
-	    LPTE_WIRED | LPTE_LOCKED);
-	if (slot != -1) {
-		rw_wunlock(&moea64_eviction_lock);
-		pvo->pvo_pte.slot = slot;
-		return (0);
-	}
+	/* Initialize PTE */
+	moea64_pte_from_pvo(pvo, &insertpt);
 
-	/* Try other hash table. Now we're getting desperate... */
-	pvo->pvo_vaddr ^= PVO_HID;
-	insertpt.pte_hi ^= LPTE_HID;
-	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
-	slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
-	    LPTE_WIRED | LPTE_LOCKED);
-	if (slot != -1) {
+	/* Make sure further insertion is locked out during evictions */
+	rw_rlock(&moea64_eviction_lock);
+
+	pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
+	ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
+	if (ret == -1) {
+		/*
+		 * Out of luck. Find a PTE to sacrifice.
+		 */
+
+		/* Lock out all insertions for a bit */
+		if (!rw_try_upgrade(&moea64_eviction_lock)) {
+			rw_runlock(&moea64_eviction_lock);
+			rw_wlock(&moea64_eviction_lock);
+		}
+		/* Don't evict large pages */
+		ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_BIG);
 		rw_wunlock(&moea64_eviction_lock);
-		pvo->pvo_pte.slot = slot;
-		return (0);
-	}
+		/* No freeable slots in either PTEG? We're hosed. */
+		if (ret == -1)
+			panic("moea64_pte_insert: overflow");
+	} else
+		rw_runlock(&moea64_eviction_lock);
 
-	/* No freeable slots in either PTEG? We're hosed. */
-	rw_wunlock(&moea64_eviction_lock);
-	panic("moea64_pte_insert: overflow");
-	return (-1);
+	return (0);
 }
 
 static void *
@@ -845,4 +898,135 @@
 
 	dctx->ptex = ptex_end;
 	return (__DEVOLATILE(struct lpte *, moea64_pteg_table) + ptex);
+}
+
+static __always_inline uint64_t
+moea64_vpn_from_pte(uint64_t ptehi, uintptr_t slot)
+{
+	uint64_t pgn, pgnlo, vsid;
+
+	vsid = (ptehi & LPTE_AVA_MASK) >> LPTE_VSID_SHIFT;
+	if ((ptehi & LPTE_HID) != 0)
+		slot ^= (moea64_pteg_mask << 3);
+	pgnlo = ((vsid & VSID_HASH_MASK) ^ (slot >> 3)) & EA_PAGELO_MASK;
+	pgn = ((ptehi & LPTE_AVA_PGNHI_MASK) << (EA_PAGELO_SHIFT -
+	    LPTE_AVA_PGNHI_SHIFT)) | pgnlo;
+	return ((vsid << 16) | pgn);
+}
+
+static __always_inline int64_t
+moea64_pte_unset_sp_locked(struct pvo_entry *pvo)
+{
+	volatile struct lpte *pt;
+	uint64_t ptehi, refchg, vpn;
+	vm_offset_t eva;
+	pmap_t pm;
+
+	pm = pvo->pvo_pmap;
+	refchg = 0;
+	eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+	for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		pt = moea64_pteg_table + pvo->pvo_pte.slot;
+		ptehi = be64toh(pt->pte_hi);
+		if ((ptehi & LPTE_AVPN_MASK) !=
+		    moea64_pte_vpn_from_pvo_vpn(pvo)) {
+			/* Evicted: invalidate new entry */
+			STAT_MOEA64(moea64_pte_overflow--);
+			vpn = moea64_vpn_from_pte(ptehi, pvo->pvo_pte.slot);
+			CTR1(KTR_PMAP, "Evicted page in pte_unset_sp: vpn=%jx",
+			    (uintmax_t)vpn);
+			/* Assume evicted page was modified */
+			refchg |= LPTE_CHG;
+		} else
+			vpn = pvo->pvo_vpn;
+
+		refchg |= moea64_pte_unset_locked(pt, vpn);
+	}
+
+	return (refchg);
+}
+
+static int64_t
+moea64_pte_unset_sp_native(struct pvo_entry *pvo)
+{
+	uint64_t refchg;
+
+	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+	rw_rlock(&moea64_eviction_lock);
+	refchg = moea64_pte_unset_sp_locked(pvo);
+	rw_runlock(&moea64_eviction_lock);
+
+	return (refchg);
+}
+
+static __always_inline int64_t
+moea64_pte_insert_sp_locked(struct pvo_entry *pvo)
+{
+	struct lpte insertpt;
+	int64_t ret;
+	vm_offset_t eva;
+	pmap_t pm;
+
+	pm = pvo->pvo_pmap;
+	eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+	for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		moea64_pte_from_pvo(pvo, &insertpt);
+		pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
+
+		ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
+		if (ret == -1) {
+			/* Lock out all insertions for a bit */
+			if (!rw_try_upgrade(&moea64_eviction_lock)) {
+				rw_runlock(&moea64_eviction_lock);
+				rw_wlock(&moea64_eviction_lock);
+			}
+			/* Don't evict large pages */
+			ret = moea64_pte_insert_locked(pvo, &insertpt,
+			    LPTE_BIG);
+			rw_downgrade(&moea64_eviction_lock);
+			/* No freeable slots in either PTEG? We're hosed. */
+			if (ret == -1)
+				panic("moea64_pte_insert_sp: overflow");
+		}
+	}
+
+	return (0);
+}
+
+static int64_t
+moea64_pte_insert_sp_native(struct pvo_entry *pvo)
+{
+	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+	rw_rlock(&moea64_eviction_lock);
+	moea64_pte_insert_sp_locked(pvo);
+	rw_runlock(&moea64_eviction_lock);
+
+	return (0);
+}
+
+static int64_t
+moea64_pte_replace_sp_native(struct pvo_entry *pvo)
+{
+	uint64_t refchg;
+
+	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+	rw_rlock(&moea64_eviction_lock);
+	refchg = moea64_pte_unset_sp_locked(pvo);
+	moea64_pte_insert_sp_locked(pvo);
+	rw_runlock(&moea64_eviction_lock);
+
+	return (refchg);
 }
Index: head/sys/powerpc/include/pmap.h
===================================================================
--- head/sys/powerpc/include/pmap.h
+++ head/sys/powerpc/include/pmap.h
@@ -148,8 +148,8 @@
 #define	PVO_MANAGED		0x020UL		/* PVO entry is managed */
 #define	PVO_BOOTSTRAP		0x080UL		/* PVO entry allocated during
 						   bootstrap */
-#define PVO_DEAD		0x100UL		/* waiting to be deleted */
-#define PVO_LARGE		0x200UL		/* large page */
+#define	PVO_DEAD		0x100UL		/* waiting to be deleted */
+#define	PVO_LARGE		0x200UL		/* large page */
 #define	PVO_VADDR(pvo)		((pvo)->pvo_vaddr & ~ADDR_POFF)
 #define	PVO_PTEGIDX_GET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
 #define	PVO_PTEGIDX_ISSET(pvo)	((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
Index: head/sys/powerpc/include/pte.h
===================================================================
--- head/sys/powerpc/include/pte.h
+++ head/sys/powerpc/include/pte.h
@@ -111,6 +111,7 @@
 /* High quadword: */
 #define LPTE_VSID_SHIFT		12
 #define LPTE_AVPN_MASK		0xFFFFFFFFFFFFFF80ULL
+#define LPTE_AVA_MASK		0x3FFFFFFFFFFFFF80ULL
 #define LPTE_API		0x0000000000000F80ULL
 #define LPTE_SWBITS		0x0000000000000078ULL
 #define LPTE_WIRED		0x0000000000000010ULL
@@ -120,8 +121,13 @@
 #define LPTE_VALID		0x0000000000000001ULL
 
 /* Low quadword: */
+#define	LP_4K_16M	0x38	/* 4KB base, 16MB actual page size */
+
 #define EXTEND_PTE(x)	UINT64_C(x)	/* make constants 64-bit */
 #define	LPTE_RPGN	0xfffffffffffff000ULL
+#define	LPTE_LP_MASK	0x00000000000ff000ULL
+#define	LPTE_LP_SHIFT	12
+#define	LPTE_LP_4K_16M	((unsigned long long)(LP_4K_16M) << LPTE_LP_SHIFT)
 #define	LPTE_REF	EXTEND_PTE( PTE_REF )
 #define	LPTE_CHG	EXTEND_PTE( PTE_CHG )
 #define	LPTE_WIMG	EXTEND_PTE( PTE_WIMG )
@@ -138,6 +144,12 @@
 #define	LPTE_BR		EXTEND_PTE( PTE_BR )	/* Both Read Only */
 #define	LPTE_RW		LPTE_BW
 #define	LPTE_RO		LPTE_BR
+
+/* HPT superpage definitions */
+#define	HPT_SP_SHIFT		(VM_LEVEL_0_ORDER + PAGE_SHIFT)
+#define	HPT_SP_SIZE		(1 << HPT_SP_SHIFT)
+#define	HPT_SP_MASK		(HPT_SP_SIZE - 1)
+#define	HPT_SP_PAGES		(1 << VM_LEVEL_0_ORDER)
 
 /* POWER ISA 3.0 Radix Table Definitions */
 #define	RPTE_VALID		0x8000000000000000ULL
Index: head/sys/powerpc/include/slb.h
===================================================================
--- head/sys/powerpc/include/slb.h
+++ head/sys/powerpc/include/slb.h
@@ -64,6 +64,14 @@
 #define	SLBE_ESID_MASK	0xfffffffff0000000UL /* Effective segment ID mask */
 #define	SLBE_ESID_SHIFT	28
 
+/*
+ * SLB page sizes encoding, as present in property ibm,segment-page-sizes
+ * of CPU device tree node.
+ *
+ * See LoPAPR: CPU Node Properties, section C.6.1.4.
+ */
+#define	SLB_PGSZ_4K_4K	0
+
 /* Virtual real-mode VSID in LPARs */
 #define VSID_VRMA	0x1ffffff
 
Index: head/sys/powerpc/include/vmparam.h
===================================================================
--- head/sys/powerpc/include/vmparam.h
+++ head/sys/powerpc/include/vmparam.h
@@ -185,31 +185,34 @@
 #define	VM_NFREELIST		1
 #define	VM_FREELIST_DEFAULT	0
 
-/*
- * The largest allocation size is 4MB.
- */
 #ifdef __powerpc64__
+/* The largest allocation size is 16MB. */
 #define	VM_NFREEORDER		13
 #else
+/* The largest allocation size is 4MB. */
 #define	VM_NFREEORDER		11
 #endif
 
 #ifndef	VM_NRESERVLEVEL
 #ifdef __powerpc64__
+/* Enable superpage reservations: 1 level. */
 #define	VM_NRESERVLEVEL		1
 #else
-/*
- * Disable superpage reservations.
- */
+/* Disable superpage reservations. */
 #define	VM_NRESERVLEVEL		0
 #endif
 #endif
 
-/*
- * Level 0 reservations consist of 512 pages.
- */
 #ifndef	VM_LEVEL_0_ORDER
-#define	VM_LEVEL_0_ORDER	9
+/* Level 0 reservations consist of 512 (RPT) or 4096 (HPT) pages. */
+#define	VM_LEVEL_0_ORDER	vm_level_0_order
+#ifndef	__ASSEMBLER__
+extern	int vm_level_0_order;
+#endif
+#endif
+
+#ifndef	VM_LEVEL_0_ORDER_MAX
+#define	VM_LEVEL_0_ORDER_MAX	12
 #endif
 
 #ifdef __powerpc64__
Index: head/sys/powerpc/powernv/platform_powernv.c
===================================================================
--- head/sys/powerpc/powernv/platform_powernv.c
+++ head/sys/powerpc/powernv/platform_powernv.c
@@ -141,6 +141,7 @@
 	phandle_t opal;
 	int res, len, idx;
 	register_t msr;
+	bool has_lp;
 
 	/* Ping OPAL again just to make sure */
 	opal_check();
@@ -228,6 +229,7 @@
 		    sizeof(arr));
 		len /= 4;
 		idx = 0;
+		has_lp = false;
 		while (len > 0) {
 			shift = arr[idx];
 			slb_encoding = arr[idx + 1];
@@ -238,17 +240,21 @@
 				lp_size = arr[idx];
 				lp_encoding = arr[idx+1];
 				if (slb_encoding == SLBV_L && lp_encoding == 0)
-					break;
+					has_lp = true;
 
+				if (slb_encoding == SLB_PGSZ_4K_4K &&
+				    lp_encoding == LP_4K_16M)
+					moea64_has_lp_4k_16m = true;
+
 				idx += 2;
 				len -= 2;
 				nptlp--;
 			}
-			if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
+			if (has_lp && moea64_has_lp_4k_16m)
 				break;
 		}
 
-		if (len == 0)
+		if (!has_lp)
 			panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
 			    "not supported by this system.");
 
Index: head/sys/powerpc/powerpc/pmap_dispatch.c
===================================================================
--- head/sys/powerpc/powerpc/pmap_dispatch.c
+++ head/sys/powerpc/powerpc/pmap_dispatch.c
@@ -77,6 +77,8 @@
 caddr_t crashdumpmap;
 
 int pmap_bootstrapped;
+/* Default level 0 reservations consist of 512 pages (2MB superpage). */
+int vm_level_0_order = 9;
 
 #ifdef AIM
 int
Index: head/sys/powerpc/pseries/mmu_phyp.c
===================================================================
--- head/sys/powerpc/pseries/mmu_phyp.c
+++ head/sys/powerpc/pseries/mmu_phyp.c
@@ -82,6 +82,9 @@
 static int64_t	mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit);
 static int64_t	mphyp_pte_unset(struct pvo_entry *pvo);
 static int64_t	mphyp_pte_insert(struct pvo_entry *pvo);
+static int64_t	mphyp_pte_unset_sp(struct pvo_entry *pvo);
+static int64_t	mphyp_pte_insert_sp(struct pvo_entry *pvo);
+static int64_t	mphyp_pte_replace_sp(struct pvo_entry *pvo);
 
 static struct pmap_funcs mphyp_methods = {
 	.install =           mphyp_install,
@@ -95,6 +98,9 @@
         .pte_clear =      mphyp_pte_clear,
         .pte_unset =      mphyp_pte_unset,
         .pte_insert =     mphyp_pte_insert,
+        .pte_unset_sp =   mphyp_pte_unset_sp,
+        .pte_insert_sp =  mphyp_pte_insert_sp,
+        .pte_replace_sp = mphyp_pte_replace_sp,
 };
 
 MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu);
@@ -135,6 +141,7 @@
 	uint64_t vsid;
 	phandle_t dev, node, root;
 	int idx, len, res;
+	bool has_lp;
 
 	rm_init(&mphyp_eviction_lock, "pte eviction");
 
@@ -199,6 +206,7 @@
 		    sizeof(arr));
 		len /= 4;
 		idx = 0;
+		has_lp = false;
 		while (len > 0) {
 			shift = arr[idx];
 			slb_encoding = arr[idx + 1];
@@ -220,18 +228,22 @@
 				    lp_encoding);
 
 				if (slb_encoding == SLBV_L && lp_encoding == 0)
-					break;
+					has_lp = true;
 
+				if (slb_encoding == SLB_PGSZ_4K_4K &&
+				    lp_encoding == LP_4K_16M)
+					moea64_has_lp_4k_16m = true;
+
 				idx += 2;
 				len -= 2;
 				nptlp--;
 			}
 			dprintf("\n");
-			if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
+			if (has_lp && moea64_has_lp_4k_16m)
 				break;
 		}
 
-		if (len > 0) {
+		if (has_lp) {
 			moea64_large_page_shift = shift;
 			moea64_large_page_size = 1ULL << lp_size;
 			moea64_large_page_mask = moea64_large_page_size - 1;
@@ -393,7 +405,7 @@
 		phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
 		    &pt.pte_lo, &junk);
 		
-		if (pt.pte_hi & LPTE_WIRED)
+		if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0)
 			continue;
 
 		/* This is a candidate, so remember it */
@@ -414,68 +426,61 @@
 	return (k);
 }
 
-static int64_t
-mphyp_pte_insert(struct pvo_entry *pvo)
+static __inline int64_t
+mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
 {
-	struct rm_priotracker track;
+	struct lpte evicted;
+	uint64_t index, junk;
 	int64_t result;
-	struct lpte evicted, pte;
-	uint64_t index, junk, lastptelo;
 
-	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
-
-	/* Initialize PTE */
-	moea64_pte_from_pvo(pvo, &pte);
-	evicted.pte_hi = 0;
-
-	/* Make sure further insertion is locked out during evictions */
-	rm_rlock(&mphyp_eviction_lock, &track);
-
 	/*
 	 * First try primary hash.
 	 */
 	pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
-	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi,
-	    pte.pte_lo, &index, &evicted.pte_lo, &junk);
+	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi,
+	    pte->pte_lo, &index, &evicted.pte_lo, &junk);
 	if (result == H_SUCCESS) {
-		rm_runlock(&mphyp_eviction_lock, &track);
 		pvo->pvo_pte.slot = index;
 		return (0);
 	}
 	KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
 	    "(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
-	    moea64_pteg_count, pte.pte_hi, pte.pte_lo));
+	    moea64_pteg_count, pte->pte_hi, pte->pte_lo));
 
 	/*
 	 * Next try secondary hash.
 	 */
 	pvo->pvo_vaddr ^= PVO_HID;
-	pte.pte_hi ^= LPTE_HID;
+	pte->pte_hi ^= LPTE_HID;
 	pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 
 	result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
-	    pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk);
+	    pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk);
 	if (result == H_SUCCESS) {
-		rm_runlock(&mphyp_eviction_lock, &track);
 		pvo->pvo_pte.slot = index;
 		return (0);
 	}
 	KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
 	    result));
 
-	/*
-	 * Out of luck. Find a PTE to sacrifice.
-	 */
+	return (-1);
+}
 
-	/* Lock out all insertions for a bit */
-	rm_runlock(&mphyp_eviction_lock, &track);
-	rm_wlock(&mphyp_eviction_lock);
 
+static __inline int64_t
+mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
+{
+	struct lpte evicted;
+	uint64_t index, junk, lastptelo;
+	int64_t result;
+
+	evicted.pte_hi = 0;
+
 	index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
 	if (index == -1L) {
 		/* Try other hash table? */
 		pvo->pvo_vaddr ^= PVO_HID;
-		pte.pte_hi ^= LPTE_HID;
+		pte->pte_hi ^= LPTE_HID;
 		pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
 		index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
 	}
@@ -500,18 +505,50 @@
 	/*
 	 * Set the new PTE.
 	 */
-	result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi,
-	    pte.pte_lo, &index, &evicted.pte_lo, &junk);
-	rm_wunlock(&mphyp_eviction_lock); /* All clear */
+	result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi,
+	    pte->pte_lo, &index, &evicted.pte_lo, &junk);
 
 	pvo->pvo_pte.slot = index;
 	if (result == H_SUCCESS)
 		return (0);
 
+	rm_wunlock(&mphyp_eviction_lock);
 	panic("Page replacement error: %ld", result);
 	return (result);
 }
 
+static int64_t
+mphyp_pte_insert(struct pvo_entry *pvo)
+{
+	struct rm_priotracker track;
+	int64_t ret;
+	struct lpte pte;
+
+	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+	/* Initialize PTE */
+	moea64_pte_from_pvo(pvo, &pte);
+
+	/* Make sure further insertion is locked out during evictions */
+	rm_rlock(&mphyp_eviction_lock, &track);
+
+	ret = mphyp_pte_insert_locked(pvo, &pte);
+	rm_runlock(&mphyp_eviction_lock, &track);
+
+	if (ret == -1) {
+		/*
+		 * Out of luck. Find a PTE to sacrifice.
+		 */
+
+		/* Lock out all insertions for a bit */
+		rm_wlock(&mphyp_eviction_lock);
+		ret = mphyp_pte_evict_and_insert_locked(pvo, &pte);
+		rm_wunlock(&mphyp_eviction_lock); /* All clear */
+	}
+
+	return (ret);
+}
+
 static void *
 mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
 {
@@ -540,4 +577,92 @@
 
 	dctx->ptex = ptex;
 	return (buf);
+}
+
+static int64_t
+mphyp_pte_unset_sp(struct pvo_entry *pvo)
+{
+	struct lpte pte;
+	uint64_t junk, refchg;
+	int err;
+	vm_offset_t eva;
+	pmap_t pm;
+
+	pm = pvo->pvo_pmap;
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+	refchg = 0;
+	eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+	for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		moea64_pte_from_pvo(pvo, &pte);
+
+		err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
+		    pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
+		    &junk);
+		KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
+		    ("Error removing page: %d", err));
+
+		if (err == H_NOT_FOUND)
+			STAT_MOEA64(moea64_pte_overflow--);
+		refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG);
+	}
+
+	return (refchg);
+}
+
+static int64_t
+mphyp_pte_insert_sp(struct pvo_entry *pvo)
+{
+	struct rm_priotracker track;
+	int64_t ret;
+	struct lpte pte;
+	vm_offset_t eva;
+	pmap_t pm;
+
+	pm = pvo->pvo_pmap;
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+	KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+	    ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+	eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+	/* Make sure further insertion is locked out during evictions */
+	rm_rlock(&mphyp_eviction_lock, &track);
+
+	for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+	    pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+		/* Initialize PTE */
+		moea64_pte_from_pvo(pvo, &pte);
+
+		ret = mphyp_pte_insert_locked(pvo, &pte);
+		if (ret == -1) {
+			/*
+			 * Out of luck. Find a PTE to sacrifice.
+			 */
+
+			/* Lock out all insertions for a bit */
+			rm_runlock(&mphyp_eviction_lock, &track);
+			rm_wlock(&mphyp_eviction_lock);
+			mphyp_pte_evict_and_insert_locked(pvo, &pte);
+			rm_wunlock(&mphyp_eviction_lock); /* All clear */
+			rm_rlock(&mphyp_eviction_lock, &track);
+		}
+	}
+
+	rm_runlock(&mphyp_eviction_lock, &track);
+	return (0);
+}
+
+static int64_t
+mphyp_pte_replace_sp(struct pvo_entry *pvo)
+{
+	int64_t refchg;
+
+	refchg = mphyp_pte_unset_sp(pvo);
+	mphyp_pte_insert_sp(pvo);
+	return (refchg);
 }
Index: head/sys/vm/vm_fault.c
===================================================================
--- head/sys/vm/vm_fault.c
+++ head/sys/vm/vm_fault.c
@@ -542,7 +542,8 @@
 	    pidx += npages, m = vm_page_next(&m[npages - 1])) {
 		vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
 #if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \
-    __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)
+    __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) || \
+    defined(__powerpc64__)
 		psind = m->psind;
 		if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
 		    pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||