Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -188,6 +188,64 @@
 #define	pmap_l1_pindex(v)	(NUL2E + ((v) >> L1_SHIFT))
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 
+#ifdef NUMA
+struct pmap_large_md_page {
+	struct rwlock   pv_lock;
+	struct md_page  pv_page;
+	/* Pad to a power of 2 */
+	int		pv_pad[2];
+};
+
+static struct pmap_large_md_page *
+_pa_to_pmdp(vm_paddr_t pa)
+{
+	struct vm_phys_seg *seg;
+	int segind;
+
+	for (segind = 0; segind < vm_phys_nsegs; segind++) {
+		seg = &vm_phys_segs[segind];
+		if (pa >= seg->start && pa < seg->end)
+			return ((struct pmap_large_md_page *)seg->md_first +
+			    pmap_l2_pindex(pa) - pmap_l2_pindex(seg->start));
+	}
+	return (NULL);
+}
+
+static struct pmap_large_md_page *
+pa_to_pmdp(vm_paddr_t pa)
+{
+	struct pmap_large_md_page *pvd;
+
+	pvd = _pa_to_pmdp(pa);
+	if (pvd == NULL)
+		panic("pa 0x%jx not within vm_phys_segs", (uintmax_t)pa);
+	return (pvd);
+}
+
+static struct pmap_large_md_page *
+page_to_pmdp(vm_page_t m)
+{
+	struct vm_phys_seg *seg;
+
+	seg = &vm_phys_segs[m->segind];
+	return ((struct pmap_large_md_page *)seg->md_first +
+	    pmap_l2_pindex(VM_PAGE_TO_PHYS(m)) - pmap_l2_pindex(seg->start));
+}
+
+#define	pa_to_pvh(pa)	(&(pa_to_pmdp(pa)->pv_page))
+#define	page_to_pvh(m)	(&(page_to_pmdp(m)->pv_page))
+
+#define	PHYS_TO_PV_LIST_LOCK(pa)	({			\
+	struct pmap_large_md_page *_pvd;			\
+	struct rwlock *_lock;					\
+	_pvd = _pa_to_pmdp(pa);					\
+	if (__predict_false(_pvd == NULL))			\
+		_lock = &pv_dummy_large.pv_lock;		\
+	else							\
+		_lock = &(_pvd->pv_lock);			\
+	_lock;							\
+})
+#else
 static struct md_page *
 pa_to_pvh(vm_paddr_t pa)
 {
@@ -217,6 +275,7 @@
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+#endif
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
@@ -304,9 +363,16 @@
 
 struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
 
+#ifdef	NUMA
+__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
+#define pv_dummy pv_dummy_large.pv_page
+__read_mostly static struct pmap_large_md_page *pv_table;
+__read_mostly vm_paddr_t pmap_last_pa;
+#else
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 static struct md_page *pv_table;
 static struct md_page pv_dummy;
+#endif
 
 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
 vm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
@@ -1284,59 +1350,111 @@
 	mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN);
 }
 
-/*
- *	Initialize the pmap module.
- *	Called by vm_init, to initialize any structures that the pmap
- *	system needs to map virtual memory.
- */
-void
-pmap_init(void)
+#ifdef NUMA
+static void
+pmap_init_pv_table(void)
 {
 	struct vm_phys_seg *seg, *next_seg;
-	struct md_page *pvh;
+	struct pmap_large_md_page *pvd;
 	vm_size_t s;
-	uint64_t mmfr1;
-	int i, pv_npg, vmid_bits;
+	long start, end, highest, pv_npg;
+	int domain, i, j, pages;
 
 	/*
-	 * Are large page mappings enabled?
+	 * We strongly depend on the size being a power of two, so the assert
+	 * is overzealous. However, should the struct be resized to a
+	 * different power of two, the code below needs to be revisited.
 	 */
-	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
-	if (superpages_enabled) {
-		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
-		    ("pmap_init: can't assign to pagesizes[1]"));
-		pagesizes[1] = L2_SIZE;
-		if (L1_BLOCKS_SUPPORTED) {
-			KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
-			    ("pmap_init: can't assign to pagesizes[2]"));
-			pagesizes[2] = L1_SIZE;
-		}
+	CTASSERT((sizeof(*pvd) == 64));
+
+	/*
+	 * Calculate the size of the array.
+	 */
+	pv_npg = 0;
+	for (i = 0; i < vm_phys_nsegs; i++) {
+		seg = &vm_phys_segs[i];
+		pv_npg += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+		    pmap_l2_pindex(seg->start);
 	}
+	s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page);
+	s = round_page(s);
+	pv_table = (struct pmap_large_md_page *)kva_alloc(s);
+	if (pv_table == NULL)
+		panic("%s: kva_alloc failed\n", __func__);
 
 	/*
-	 * Initialize the ASID allocator.
+	 * Iterate physical segments to allocate space for respective pages.
 	 */
-	pmap_init_asids(&asids,
-	    (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
+	highest = -1;
+	s = 0;
+	for (i = 0; i < vm_phys_nsegs; i++) {
+		seg = &vm_phys_segs[i];
+		start = highest + 1;
+		end = start + pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+		    pmap_l2_pindex(seg->start);
+		domain = seg->domain;
 
-	if (has_hyp()) {
-		mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
-		vmid_bits = 8;
+		if (highest >= end)
+			continue;
 
-		if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) ==
-		    ID_AA64MMFR1_VMIDBits_16)
-			vmid_bits = 16;
-		pmap_init_asids(&vmids, vmid_bits);
+		pvd = &pv_table[start];
+
+		pages = end - start + 1;
+		s = round_page(pages * sizeof(*pvd));
+		highest = start + (s / sizeof(*pvd)) - 1;
+
+		for (j = 0; j < s; j += PAGE_SIZE) {
+			vm_page_t m = vm_page_alloc_noobj_domain(domain, 0);
+			if (m == NULL)
+				panic("failed to allocate PV table page");
+			pmap_zero_page(m);
+			pmap_qenter((vm_offset_t)pvd + j, &m, 1);
+		}
+
+		for (j = 0; j < s / sizeof(*pvd); j++) {
+			rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
+			TAILQ_INIT(&pvd->pv_page.pv_list);
+			pvd++;
+		}
 	}
+	pvd = &pv_dummy_large;
+	memset(pvd, 0, sizeof(*pvd));
+	rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
+	TAILQ_INIT(&pvd->pv_page.pv_list);
 
 	/*
-	 * Initialize pv chunk lists.
+	 * Set pointers from vm_phys_segs to pv_table.
 	 */
-	for (i = 0; i < PMAP_MEMDOM; i++) {
-		mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL,
-		    MTX_DEF);
-		TAILQ_INIT(&pv_chunks[i].pvc_list);
+	for (i = 0, pvd = pv_table; i < vm_phys_nsegs; i++) {
+		seg = &vm_phys_segs[i];
+		seg->md_first = pvd;
+		pvd += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) -
+		    pmap_l2_pindex(seg->start);
+
+		/*
+		 * If there is a following segment, and the final
+		 * superpage of this segment and the initial superpage
+		 * of the next segment are the same then adjust the
+		 * pv_table entry for that next segment down by one so
+		 * that the pv_table entries will be shared.
+		 */
+		if (i + 1 < vm_phys_nsegs) {
+			next_seg = &vm_phys_segs[i + 1];
+			if (pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - 1 ==
+			    pmap_l2_pindex(next_seg->start)) {
+				pvd--;
+			}
+		}
 	}
+}
+#else
+static void
+pmap_init_pv_table(void)
+{
+	struct vm_phys_seg *seg, *next_seg;
+	struct md_page *pvh;
+	vm_size_t s;
+	long i, pv_npg;
 
 	/*
 	 * Initialize the pool of pv list locks.
@@ -1357,7 +1475,7 @@
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
-	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
+	s = (vm_size_t)pv_npg * sizeof(struct md_page);
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
@@ -1388,6 +1506,61 @@
 			}
 		}
 	}
+}
+#endif
+
+/*
+ *	Initialize the pmap module.
+ *	Called by vm_init, to initialize any structures that the pmap
+ *	system needs to map virtual memory.
+ */
+void
+pmap_init(void)
+{
+	uint64_t mmfr1;
+	int i, vmid_bits;
+
+	/*
+	 * Are large page mappings enabled?
+	 */
+	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
+	if (superpages_enabled) {
+		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+		    ("pmap_init: can't assign to pagesizes[1]"));
+		pagesizes[1] = L2_SIZE;
+		if (L1_BLOCKS_SUPPORTED) {
+			KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
+			    ("pmap_init: can't assign to pagesizes[2]"));
+			pagesizes[2] = L1_SIZE;
+		}
+	}
+
+	/*
+	 * Initialize the ASID allocator.
+	 */
+	pmap_init_asids(&asids,
+	    (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
+
+	if (has_hyp()) {
+		mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
+		vmid_bits = 8;
+
+		if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) ==
+		    ID_AA64MMFR1_VMIDBits_16)
+			vmid_bits = 16;
+		pmap_init_asids(&vmids, vmid_bits);
+	}
+
+	/*
+	 * Initialize pv chunk lists.
+	 */
+	for (i = 0; i < PMAP_MEMDOM; i++) {
+		mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL,
+		    MTX_DEF);
+		TAILQ_INIT(&pv_chunks[i].pvc_list);
+	}
+	pmap_init_pv_table();
+
 
 	vm_initialized = 1;
 }