Index: head/sys/arm/arm/physmem.c
===================================================================
--- head/sys/arm/arm/physmem.c	(revision 360081)
+++ head/sys/arm/arm/physmem.c	(nonexistent)
@@ -1,406 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_acpi.h"
-#include "opt_ddb.h"
-
-/*
- * Routines for describing and initializing anything related to physical memory.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_page.h>
-#include <vm/vm_phys.h>
-#include <machine/md_var.h>
-#include <arm/include/physmem.h>
-
-/*
- * These structures are used internally to keep track of regions of physical
- * ram, and regions within the physical ram that need to be excluded.  An
- * exclusion region can be excluded from crash dumps, from the vm pool of pages
- * that can be allocated, or both, depending on the exclusion flags associated
- * with the region.
- */
-#ifdef DEV_ACPI
-#define	MAX_HWCNT	32	/* ACPI needs more regions */
-#define	MAX_EXCNT	32
-#else
-#define	MAX_HWCNT	16
-#define	MAX_EXCNT	16
-#endif
-
-#if defined(__arm__)
-#define	MAX_PHYS_ADDR	0xFFFFFFFFull
-#define	pm_btop(x)	arm32_btop(x)
-#elif defined(__aarch64__)
-#define	MAX_PHYS_ADDR	0xFFFFFFFFFFFFFFFFull
-#define	pm_btop(x)	arm64_btop(x)
-#endif
-
-struct region {
-	vm_paddr_t	addr;
-	vm_size_t	size;
-	uint32_t	flags;
-};
-
-static struct region hwregions[MAX_HWCNT];
-static struct region exregions[MAX_EXCNT];
-
-static size_t hwcnt;
-static size_t excnt;
-
-/*
- * realmem is the total number of hardware pages, excluded or not.
- * Maxmem is one greater than the last physical page number.
- */
-long realmem;
-long Maxmem;
-
-/* The address at which the kernel was loaded.  Set early in initarm(). */
-vm_paddr_t arm_physmem_kernaddr;
-
-/*
- * Print the contents of the physical and excluded region tables using the
- * provided printf-like output function (which will be either printf or
- * db_printf).
- */
-static void
-physmem_dump_tables(int (*prfunc)(const char *, ...))
-{
-	int flags, i;
-	uintmax_t addr, size;
-	const unsigned int mbyte = 1024 * 1024;
-
-	prfunc("Physical memory chunk(s):\n");
-	for (i = 0; i < hwcnt; ++i) {
-		addr = hwregions[i].addr;
-		size = hwregions[i].size;
-		prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages)\n", addr,
-		    addr + size - 1, size / mbyte, size / PAGE_SIZE);
-	}
-
-	prfunc("Excluded memory regions:\n");
-	for (i = 0; i < excnt; ++i) {
-		addr  = exregions[i].addr;
-		size  = exregions[i].size;
-		flags = exregions[i].flags;
-		prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages) %s %s\n",
-		    addr, addr + size - 1, size / mbyte, size / PAGE_SIZE,
-		    (flags & EXFLAG_NOALLOC) ? "NoAlloc" : "",
-		    (flags & EXFLAG_NODUMP)  ? "NoDump" : "");
-	}
-
-#ifdef DEBUG
-	prfunc("Avail lists:\n");
-	for (i = 0; phys_avail[i] != 0; ++i) {
-		prfunc("  phys_avail[%d] 0x%08x\n", i, phys_avail[i]);
-	}
-	for (i = 0; dump_avail[i] != 0; ++i) {
-		prfunc("  dump_avail[%d] 0x%08x\n", i, dump_avail[i]);
-	}
-#endif
-}
-
-/*
- * Print the contents of the static mapping table.  Used for bootverbose.
- */
-void
-arm_physmem_print_tables(void)
-{
-
-	physmem_dump_tables(printf);
-}
-
-/*
- * Walk the list of hardware regions, processing it against the list of
- * exclusions that contain the given exflags, and generating an "avail list".
- *
- * Updates the value at *pavail with the sum of all pages in all hw regions.
- *
- * Returns the number of pages of non-excluded memory added to the avail list.
- */
-static size_t
-regions_to_avail(vm_paddr_t *avail, uint32_t exflags, size_t maxavail,
-    long *pavail, long *prealmem)
-{
-	size_t acnt, exi, hwi;
-	uint64_t end, start, xend, xstart;
-	long availmem, totalmem;
-	const struct region *exp, *hwp;
-
-	totalmem = 0;
-	availmem = 0;
-	acnt = 0;
-	for (hwi = 0, hwp = hwregions; hwi < hwcnt; ++hwi, ++hwp) {
-		start = hwp->addr;
-		end   = hwp->size + start;
-		totalmem += pm_btop((vm_offset_t)(end - start));
-		for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) {
-			/*
-			 * If the excluded region does not match given flags,
-			 * continue checking with the next excluded region.
-			 */
-			if ((exp->flags & exflags) == 0)
-				continue;
-			xstart = exp->addr;
-			xend   = exp->size + xstart;
-			/*
-			 * If the excluded region ends before this hw region,
-			 * continue checking with the next excluded region.
-			 */
-			if (xend <= start)
-				continue;
-			/*
-			 * If the excluded region begins after this hw region
-			 * we're done because both lists are sorted.
-			 */
-			if (xstart >= end)
-				break;
-			/*
-			 * If the excluded region completely covers this hw
-			 * region, shrink this hw region to zero size.
-			 */
-			if ((start >= xstart) && (end <= xend)) {
-				start = xend;
-				end = xend;
-				break;
-			}
-			/*
-			 * If the excluded region falls wholly within this hw
-			 * region without abutting or overlapping the beginning
-			 * or end, create an available entry from the leading
-			 * fragment, then adjust the start of this hw region to
-			 * the end of the excluded region, and continue checking
-			 * the next excluded region because another exclusion
-			 * could affect the remainder of this hw region.
-			 */
-			if ((xstart > start) && (xend < end)) {
-				if (acnt > 0 &&
-				    avail[acnt - 1] == (vm_paddr_t)start) {
-					avail[acnt - 1] = (vm_paddr_t)xstart;
-				} else {
-					avail[acnt++] = (vm_paddr_t)start;
-					avail[acnt++] = (vm_paddr_t)xstart;
-				}
-				availmem +=
-				    pm_btop((vm_offset_t)(xstart - start));
-				start = xend;
-				continue;
-			}
-			/*
-			 * We know the excluded region overlaps either the start
-			 * or end of this hardware region (but not both), trim
-			 * the excluded portion off the appropriate end.
-			 */
-			if (xstart <= start)
-				start = xend;
-			else
-				end = xstart;
-		}
-		/*
-		 * If the trimming actions above left a non-zero size, create an
-		 * available entry for it.
-		 */
-		if (end > start) {
-			if (acnt > 0 && avail[acnt - 1] == (vm_paddr_t)start) {
-				avail[acnt - 1] = (vm_paddr_t)end;
-			} else {
-				avail[acnt++] = (vm_paddr_t)start;
-				avail[acnt++] = (vm_paddr_t)end;
-			}
-			availmem += pm_btop((vm_offset_t)(end - start));
-		}
-		if (acnt >= maxavail)
-			panic("Not enough space in the dump/phys_avail arrays");
-	}
-
-	if (pavail != NULL)
-		*pavail = availmem;
-	if (prealmem != NULL)
-		*prealmem = totalmem;
-	return (acnt);
-}
-
-/*
- * Insertion-sort a new entry into a regions list; sorted by start address.
- */
-static size_t
-insert_region(struct region *regions, size_t rcnt, vm_paddr_t addr,
-    vm_size_t size, uint32_t flags)
-{
-	size_t i;
-	struct region *ep, *rp;
-
-	ep = regions + rcnt;
-	for (i = 0, rp = regions; i < rcnt; ++i, ++rp) {
-		if (rp->addr == addr && rp->size == size) /* Pure dup. */
-			return (rcnt);
-		if (flags == rp->flags) {
-			if (addr + size == rp->addr) {
-				rp->addr = addr;
-				rp->size += size;
-				return (rcnt);
-			} else if (rp->addr + rp->size == addr) {
-				rp->size += size;
-				return (rcnt);
-			}
-		}
-		if (addr < rp->addr) {
-			bcopy(rp, rp + 1, (ep - rp) * sizeof(*rp));
-			break;
-		}
-	}
-	rp->addr  = addr;
-	rp->size  = size;
-	rp->flags = flags;
-	rcnt++;
-
-	return (rcnt);
-}
-
-/*
- * Add a hardware memory region.
- */
-void
-arm_physmem_hardware_region(uint64_t pa, uint64_t sz)
-{
-	vm_offset_t adj;
-
-	/*
-	 * Filter out the page at PA 0x00000000.  The VM can't handle it, as
-	 * pmap_extract() == 0 means failure.
-	 */
-	if (pa == 0) {
-		if (sz <= PAGE_SIZE)
-			return;
-		pa  = PAGE_SIZE;
-		sz -= PAGE_SIZE;
-	} else if (pa > MAX_PHYS_ADDR) {
-		/* This range is past usable memory, ignore it */
-		return;
-	}
-
-	/*
-	 * Also filter out the page at the end of the physical address space --
-	 * if addr is non-zero and addr+size is zero we wrapped to the next byte
-	 * beyond what vm_paddr_t can express.  That leads to a NULL pointer
-	 * deref early in startup; work around it by leaving the last page out.
-	 *
-	 * XXX This just in:  subtract out a whole megabyte, not just 1 page.
-	 * Reducing the size by anything less than 1MB results in the NULL
-	 * pointer deref in _vm_map_lock_read().  Better to give up a megabyte
-	 * than leave some folks with an unusable system while we investigate.
-	 */
-	if ((pa + sz) > (MAX_PHYS_ADDR - 1024 * 1024)) {
-		sz = MAX_PHYS_ADDR - pa + 1;
-		if (sz <= 1024 * 1024)
-			return;
-		sz -= 1024 * 1024;
-	}
-
-	/*
-	 * Round the starting address up to a page boundary, and truncate the
-	 * ending page down to a page boundary.
-	 */
-	adj = round_page(pa) - pa;
-	pa  = round_page(pa);
-	sz  = trunc_page(sz - adj);
-
-	if (sz > 0 && hwcnt < nitems(hwregions))
-		hwcnt = insert_region(hwregions, hwcnt, pa, sz, 0);
-}
-
-/*
- * Add an exclusion region.
- */
-void
-arm_physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t exflags)
-{
-	vm_offset_t adj;
-
-	/*
-	 * Truncate the starting address down to a page boundary, and round the
-	 * ending page up to a page boundary.
-	 */
-	adj = pa - trunc_page(pa);
-	pa  = trunc_page(pa);
-	sz  = round_page(sz + adj);
-
-	if (excnt >= nitems(exregions))
-		panic("failed to exclude region %#jx-%#jx", (uintmax_t)pa,
-		    (uintmax_t)(pa + sz));
-	excnt = insert_region(exregions, excnt, pa, sz, exflags);
-}
-
-size_t
-arm_physmem_avail(vm_paddr_t *avail, size_t maxavail)
-{
-
-	return (regions_to_avail(avail, EXFLAG_NOALLOC, maxavail, NULL, NULL));
-}
-
-/*
- * Process all the regions added earlier into the global avail lists.
- *
- * Updates the kernel global 'physmem' with the number of physical pages
- * available for use (all pages not in any exclusion region).
- *
- * Updates the kernel global 'Maxmem' with the page number one greater then the
- * last page of physical memory in the system.
- */
-void
-arm_physmem_init_kernel_globals(void)
-{
-	size_t nextidx;
-
-	regions_to_avail(dump_avail, EXFLAG_NODUMP, PHYS_AVAIL_ENTRIES, NULL,
-	    NULL);
-	nextidx = regions_to_avail(phys_avail, EXFLAG_NOALLOC,
-	    PHYS_AVAIL_ENTRIES, &physmem, &realmem);
-	if (nextidx == 0)
-		panic("No memory entries in phys_avail");
-	Maxmem = atop(phys_avail[nextidx - 1]);
-}
-
-#ifdef DDB
-#include <ddb/ddb.h>
-
-DB_SHOW_COMMAND(physmem, db_show_physmem)
-{
-
-	physmem_dump_tables(db_printf);
-}
-
-#endif /* DDB */
-

Property changes on: head/sys/arm/arm/physmem.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/arm/arm/machdep.c
===================================================================
--- head/sys/arm/arm/machdep.c	(revision 360081)
+++ head/sys/arm/arm/machdep.c	(revision 360082)
@@ -1,1307 +1,1309 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Brinicombe
  *	for the NetBSD Project.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Machine dependent functions for kernel setup
  *
  * Created      : 17/09/94
  * Updated	: 18/04/01 updated for new wscons
  */
 
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 #include "opt_timer.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
+#include <sys/physmem.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/asm.h>
 #include <machine/debug_monitor.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/pcb.h>
-#include <machine/physmem.h>
 #include <machine/platform.h>
 #include <machine/sysarch.h>
 #include <machine/undefined.h>
 #include <machine/vfp.h>
 #include <machine/vmparam.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <machine/ofw_machdep.h>
 #endif
 
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define	debugf(fmt, args...)
 #endif
 
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) || \
     defined(COMPAT_FREEBSD9)
 #error FreeBSD/arm doesn't provide compatibility with releases prior to 10
 #endif
 
 #if __ARM_ARCH >= 6 && !defined(INTRNG)
 #error armv6 requires INTRNG
 #endif
 
 #ifndef _ARM_ARCH_5E
 #error FreeBSD requires ARMv5 or later
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 struct pcpu *pcpup = &__pcpu[0];
 
 static struct trapframe proc0_tf;
 uint32_t cpu_reset_address = 0;
 int cold = 1;
 vm_offset_t vector_page;
 
+/* The address at which the kernel was loaded.  Set early in initarm(). */
+vm_paddr_t arm_physmem_kernaddr;
+
 int (*_arm_memcpy)(void *, void *, int, int) = NULL;
 int (*_arm_bzero)(void *, int, int) = NULL;
 int _min_memcpy_size = 0;
 int _min_bzero_size = 0;
 
 extern int *end;
 
 #ifdef FDT
 vm_paddr_t pmap_pa;
 #if __ARM_ARCH >= 6
 vm_offset_t systempage;
 vm_offset_t irqstack;
 vm_offset_t undstack;
 vm_offset_t abtstack;
 #else
 /*
  * This is the number of L2 page tables required for covering max
  * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf,
  * stacks etc.), uprounded to be divisible by 4.
  */
 #define KERNEL_PT_MAX	78
 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX];
 struct pv_addr systempage;
 static struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 static struct pv_addr kernelstack;
 #endif /* __ARM_ARCH >= 6 */
 #endif /* FDT */
 
 #ifdef PLATFORM
 static delay_func *delay_impl;
 static void *delay_arg;
 #endif
 
 struct kva_md_info kmi;
-
 /*
  * arm32_vector_init:
  *
  *	Initialize the vector page, and select whether or not to
  *	relocate the vectors.
  *
  *	NOTE: We expect the vector page to be mapped at its expected
  *	destination.
  */
 
 extern unsigned int page0[], page0_data[];
 void
 arm_vector_init(vm_offset_t va, int which)
 {
 	unsigned int *vectors = (int *) va;
 	unsigned int *vectors_data = vectors + (page0_data - page0);
 	int vec;
 
 	/*
 	 * Loop through the vectors we're taking over, and copy the
 	 * vector's insn and data word.
 	 */
 	for (vec = 0; vec < ARM_NVEC; vec++) {
 		if ((which & (1 << vec)) == 0) {
 			/* Don't want to take over this vector. */
 			continue;
 		}
 		vectors[vec] = page0[vec];
 		vectors_data[vec] = page0_data[vec];
 	}
 
 	/* Now sync the vectors. */
 	icache_sync(va, (ARM_NVEC * 2) * sizeof(u_int));
 
 	vector_page = va;
 #if __ARM_ARCH < 6
 	if (va == ARM_VECTORS_HIGH) {
 		/*
 		 * Enable high vectors in the system control reg (SCTLR).
 		 *
 		 * Assume the MD caller knows what it's doing here, and really
 		 * does want the vector page relocated.
 		 *
 		 * Note: This has to be done here (and not just in
 		 * cpu_setup()) because the vector page needs to be
 		 * accessible *before* cpu_startup() is called.
 		 * Think ddb(9) ...
 		 */
 		cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC);
 	}
 #endif
 }
 
 static void
 cpu_startup(void *dummy)
 {
 	struct pcb *pcb = thread0.td_pcb;
 	const unsigned int mbyte = 1024 * 1024;
 #if __ARM_ARCH < 6 && !defined(ARM_CACHE_LOCK_ENABLE)
 	vm_page_t m;
 #endif
 
 	identify_arm_cpu();
 
 	vm_ksubmap_init(&kmi);
 
 	/*
 	 * Display the RAM layout.
 	 */
 	printf("real memory  = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(realmem),
 	    (uintmax_t)arm32_ptob(realmem) / mbyte);
 	printf("avail memory = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(vm_free_count()),
 	    (uintmax_t)arm32_ptob(vm_free_count()) / mbyte);
 	if (bootverbose) {
-		arm_physmem_print_tables();
+		physmem_print_tables();
 		devmap_print_table();
 	}
 
 	bufinit();
 	vm_pager_bufferinit();
 	pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack +
 	    USPACE_SVC_STACK_TOP;
 	pmap_set_pcb_pagedir(kernel_pmap, pcb);
 #if __ARM_ARCH < 6
 	vector_page_setprot(VM_PROT_READ);
 	pmap_postinit();
 #ifdef ARM_CACHE_LOCK_ENABLE
 	pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS);
 	arm_lock_cache_line(ARM_TP_ADDRESS);
 #else
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO);
 	pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m));
 #endif
 	*(uint32_t *)ARM_RAS_START = 0;
 	*(uint32_t *)ARM_RAS_END = 0xffffffff;
 #endif
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	dcache_wb_poc((vm_offset_t)ptr, (vm_paddr_t)vtophys(ptr), len);
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 #if __ARM_ARCH >= 6
 	struct pcpu *pc;
 
 	pc = pcpu_find(cpu_id);
 	if (pc == NULL || rate == NULL)
 		return (EINVAL);
 
 	if (pc->pc_clock == 0)
 		return (EOPNOTSUPP);
 
 	*rate = pc->pc_clock;
 
 	return (0);
 #else
 	return (ENXIO);
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu);
 	spinlock_enter();
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_idleclock();
 #endif
 	if (!sched_runnable())
 		cpu_sleep(0);
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_activeclock();
 #endif
 	spinlock_exit();
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 #ifdef NO_EVENTTIMERS
 /*
  * Most ARM platforms don't need to do anything special to init their clocks
  * (they get intialized during normal device attachment), and by not defining a
  * cpu_initclocks() function they get this generic one.  Any platform that needs
  * to do something special can just provide their own implementation, which will
  * override this one due to the weak linkage.
  */
 void
 arm_generic_initclocks(void)
 {
 }
 __weak_reference(arm_generic_initclocks, cpu_initclocks);
 
 #else
 void
 cpu_initclocks(void)
 {
 
 #ifdef SMP
 	if (PCPU_GET(cpuid) == 0)
 		cpu_initclocks_bsp();
 	else
 		cpu_initclocks_ap();
 #else
 	cpu_initclocks_bsp();
 #endif
 }
 #endif
 
 #ifdef PLATFORM
 void
 arm_set_delay(delay_func *impl, void *arg)
 {
 
 	KASSERT(impl != NULL, ("No DELAY implementation"));
 	delay_impl = impl;
 	delay_arg = arg;
 }
 
 void
 DELAY(int usec)
 {
 
 	TSENTER();
 	delay_impl(usec, delay_arg);
 	TSEXIT();
 }
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_cspr = cspr;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	cspr = td->td_md.md_saved_cspr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		restore_interrupts(cspr);
 	}
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(*tf));
 	tf->tf_usr_sp = stack;
 	tf->tf_usr_lr = imgp->entry_addr;
 	tf->tf_svc_lr = 0x77777777;
 	tf->tf_pc = imgp->entry_addr;
 	tf->tf_spsr = PSR_USR32_MODE;
 }
 
 
 #ifdef VFP
 /*
  * Get machine VFP context.
  */
 void
 get_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_store(&pcb->pcb_vfpstate, false);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(vfp->mcv_reg, pcb->pcb_vfpstate.reg,
 	    sizeof(vfp->mcv_reg));
 	vfp->mcv_fpscr = pcb->pcb_vfpstate.fpscr;
 }
 
 /*
  * Set machine VFP context.
  */
 void
 set_vfpcontext(struct thread *td, mcontext_vfp_t *vfp)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (td == curthread) {
 		critical_enter();
 		vfp_discard(td);
 		critical_exit();
 	} else
 		MPASS(TD_IS_SUSPENDED(td));
 	memcpy(pcb->pcb_vfpstate.reg, vfp->mcv_reg,
 	    sizeof(pcb->pcb_vfpstate.reg));
 	pcb->pcb_vfpstate.fpscr = vfp->mcv_fpscr;
 }
 #endif
 
 int
 arm_get_vfpstate(struct thread *td, void *args)
 {
 	int rv;
 	struct arm_get_vfpstate_args ua;
 	mcontext_vfp_t	mcontext_vfp;
 
 	rv = copyin(args, &ua, sizeof(ua));
 	if (rv != 0)
 		return (rv);
 	if (ua.mc_vfp_size != sizeof(mcontext_vfp_t))
 		return (EINVAL);
 #ifdef VFP
 	get_vfpcontext(td, &mcontext_vfp);
 #else
 	bzero(&mcontext_vfp, sizeof(mcontext_vfp));
 #endif
 
 	rv = copyout(&mcontext_vfp, ua.mc_vfp,  sizeof(mcontext_vfp));
 	if (rv != 0)
 		return (rv);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 	__greg_t *gr = mcp->__gregs;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		gr[_REG_R0] = 0;
 		gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C;
 	} else {
 		gr[_REG_R0]   = tf->tf_r0;
 		gr[_REG_CPSR] = tf->tf_spsr;
 	}
 	gr[_REG_R1]   = tf->tf_r1;
 	gr[_REG_R2]   = tf->tf_r2;
 	gr[_REG_R3]   = tf->tf_r3;
 	gr[_REG_R4]   = tf->tf_r4;
 	gr[_REG_R5]   = tf->tf_r5;
 	gr[_REG_R6]   = tf->tf_r6;
 	gr[_REG_R7]   = tf->tf_r7;
 	gr[_REG_R8]   = tf->tf_r8;
 	gr[_REG_R9]   = tf->tf_r9;
 	gr[_REG_R10]  = tf->tf_r10;
 	gr[_REG_R11]  = tf->tf_r11;
 	gr[_REG_R12]  = tf->tf_r12;
 	gr[_REG_SP]   = tf->tf_usr_sp;
 	gr[_REG_LR]   = tf->tf_usr_lr;
 	gr[_REG_PC]   = tf->tf_pc;
 
 	mcp->mc_vfp_size = 0;
 	mcp->mc_vfp_ptr = NULL;
 	memset(&mcp->mc_spare, 0, sizeof(mcp->mc_spare));
 
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	mcontext_vfp_t mc_vfp, *vfp;
 	struct trapframe *tf = td->td_frame;
 	const __greg_t *gr = mcp->__gregs;
 	int spsr;
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 */
 	spsr = gr[_REG_CPSR];
 	if ((spsr & PSR_MODE) != PSR_USR32_MODE ||
 	    (spsr & (PSR_I | PSR_F)) != 0)
 		return (EINVAL);
 
 #ifdef WITNESS
 	if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_size != sizeof(mc_vfp)) {
 		printf("%s: %s: Malformed mc_vfp_size: %d (0x%08X)\n",
 		    td->td_proc->p_comm, __func__,
 		    mcp->mc_vfp_size, mcp->mc_vfp_size);
 	} else if (mcp->mc_vfp_size != 0 && mcp->mc_vfp_ptr == NULL) {
 		printf("%s: %s: c_vfp_size != 0 but mc_vfp_ptr == NULL\n",
 		    td->td_proc->p_comm, __func__);
 	}
 #endif
 
 	if (mcp->mc_vfp_size == sizeof(mc_vfp) && mcp->mc_vfp_ptr != NULL) {
 		if (copyin(mcp->mc_vfp_ptr, &mc_vfp, sizeof(mc_vfp)) != 0)
 			return (EFAULT);
 		vfp = &mc_vfp;
 	} else {
 		vfp = NULL;
 	}
 
 	tf->tf_r0 = gr[_REG_R0];
 	tf->tf_r1 = gr[_REG_R1];
 	tf->tf_r2 = gr[_REG_R2];
 	tf->tf_r3 = gr[_REG_R3];
 	tf->tf_r4 = gr[_REG_R4];
 	tf->tf_r5 = gr[_REG_R5];
 	tf->tf_r6 = gr[_REG_R6];
 	tf->tf_r7 = gr[_REG_R7];
 	tf->tf_r8 = gr[_REG_R8];
 	tf->tf_r9 = gr[_REG_R9];
 	tf->tf_r10 = gr[_REG_R10];
 	tf->tf_r11 = gr[_REG_R11];
 	tf->tf_r12 = gr[_REG_R12];
 	tf->tf_usr_sp = gr[_REG_SP];
 	tf->tf_usr_lr = gr[_REG_LR];
 	tf->tf_pc = gr[_REG_PC];
 	tf->tf_spsr = gr[_REG_CPSR];
 #ifdef VFP
 	if (vfp != NULL)
 		set_vfpcontext(td, vfp);
 #endif
 	return (0);
 }
 
 void
 sendsig(catcher, ksi, mask)
 	sig_t catcher;
 	ksiginfo_t *ksi;
 	sigset_t *mask;
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_usr_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe *)td->td_frame->tf_usr_sp;
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe *)STACKALIGN(fp);
 	/* Populate the siginfo frame. */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 #ifdef VFP
 	get_vfpcontext(td, &frame.sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_size = sizeof(fp->sf_vfp);
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = &fp->sf_vfp;
 #else
 	frame.sf_uc.uc_mcontext.mc_vfp_size = 0;
 	frame.sf_uc.uc_mcontext.mc_vfp_ptr = NULL;
 #endif
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_r0 = sig;
 	tf->tf_r1 = (register_t)&fp->sf_si;
 	tf->tf_r2 = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_r5 = (register_t)&fp->sf_uc;
 	tf->tf_pc = (register_t)catcher;
 	tf->tf_usr_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_usr_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 #if __ARM_ARCH >= 7
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 #endif
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr,
 	    tf->tf_usr_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	/* Restore register context. */
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	pcb->pcb_regs.sf_r4 = tf->tf_r4;
 	pcb->pcb_regs.sf_r5 = tf->tf_r5;
 	pcb->pcb_regs.sf_r6 = tf->tf_r6;
 	pcb->pcb_regs.sf_r7 = tf->tf_r7;
 	pcb->pcb_regs.sf_r8 = tf->tf_r8;
 	pcb->pcb_regs.sf_r9 = tf->tf_r9;
 	pcb->pcb_regs.sf_r10 = tf->tf_r10;
 	pcb->pcb_regs.sf_r11 = tf->tf_r11;
 	pcb->pcb_regs.sf_r12 = tf->tf_r12;
 	pcb->pcb_regs.sf_pc = tf->tf_pc;
 	pcb->pcb_regs.sf_lr = tf->tf_usr_lr;
 	pcb->pcb_regs.sf_sp = tf->tf_usr_sp;
 }
 
 void
 pcpu0_init(void)
 {
 #if __ARM_ARCH >= 6
 	set_curthread(&thread0);
 #endif
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 }
 
 /*
  * Initialize proc0
  */
 void
 init_proc0(vm_offset_t kstack)
 {
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = kstack_pages;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_vfpcpu = -1;
 	thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 #if __ARM_ARCH >= 6
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #else
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #endif
 
 static void
 arm_kdb_init(void)
 {
 
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 #ifdef FDT
 #if __ARM_ARCH < 6
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	struct pv_addr kernel_l1pt;
 	struct pv_addr dpcpu;
 	vm_offset_t dtbp, freemempos, l2_start, lastaddr;
 	uint64_t memsize;
 	uint32_t l2size;
 	char *env;
 	void *kmdp;
 	u_int l1pagetable;
 	int i, j, err_devmap, mem_regions_sz;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 
 	memsize = 0;
 
 	cpuinfo_init();
 	set_cpufuncs();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp != NULL)
 		dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	else
 		dtbp = (vm_offset_t)NULL;
 
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0)
 		panic("Cannot get physical memory regions");
-	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
+	physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
-		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
+		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/* Calculate number of L2 tables needed for mapping vm_page_array */
 	l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page);
 	l2size = (l2size >> L1_S_SHIFT) + 1;
 
 	/*
 	 * Add one table for end of kernel map, one for stacks, msgbuf and
 	 * L1 and L2 tables map,  one for vectors map and two for
 	 * l2 structures from pmap_bootstrap.
 	 */
 	l2size += 5;
 
 	/* Make it divisible by 4 */
 	l2size = (l2size + 3) & ~3;
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)						\
 	alloc_pages((var).pv_va, (np));					\
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 
 	for (i = 0, j = 0; i < l2size; ++i) {
 		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[i],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 			j = i;
 		} else {
 			kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va +
 			    L2_TABLE_SIZE_REAL * (i - j);
 			kernel_pt_table[i].pv_pa =
 			    kernel_pt_table[i].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to 0x00000000
 	 * or 0xffff0000. This page will just contain the system vectors
 	 * and can be shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
 	valloc_pages(kernelstack, kstack_pages);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/*
 	 * Try to map as much as possible of kernel text and data using
 	 * 1MB section mapping and for the rest of initial kernel address
 	 * space use L2 coarse tables.
 	 *
 	 * Link L2 tables for mapping remainder of kernel (modulo 1MB)
 	 * and kernel structures
 	 */
 	l2_start = lastaddr & ~(L1_S_OFFSET);
 	for (i = 0 ; i < l2size - 1; i++)
 		pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE,
 		    &kernel_pt_table[i]);
 
 	pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE;
 
 	/* Map kernel code and data */
 	pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr,
 	   (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map L1 directory and allocated L2 page tables */
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va,
 	    kernel_pt_table[0].pv_pa,
 	    L2_TABLE_SIZE_REAL * l2size,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	/* Map allocated DPCPU, stacks and msgbuf */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa,
 	    freemempos - dpcpu.pv_va,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Link and map the vector page */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[l2size - 1]);
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE);
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	devmap_bootstrap(l1pagetable, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT);
 	pmap_pa = kernel_l1pt.pv_pa;
 	cpu_setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2));
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 
 	platform_gpio_init();
 
 	cninit();
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	arm_print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL) {
 		strlcpy(kernelname, env, sizeof(kernelname));
 		freeenv(env);
 	}
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in cpu_setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void *)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
-	arm_physmem_exclude_region(abp->abp_physaddr,
+	physmem_exclude_region(abp->abp_physaddr,
 	    (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC);
-	arm_physmem_init_kernel_globals();
+	physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	dbg_monitor_init();
 	arm_kdb_init();
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 #else /* __ARM_ARCH < 6 */
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_paddr_t lastaddr;
 	vm_offset_t dtbp, kernelstack, dpcpu;
 	char *env;
 	void *kmdp;
 	int err_devmap, mem_regions_sz;
 #ifdef EFI
 	struct efi_map_header *efihdr;
 #endif
 
 	/* get last allocated physical address */
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr;
 
 	set_cpufuncs();
 	cpuinfo_init();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 #if defined(LINUX_BOOT_ABI)
 	arm_parse_fdt_bootargs();
 #endif
 
 #ifdef EFI
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL) {
 		arm_add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz);
 	} else
 #endif
 	{
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,NULL) != 0)
 			panic("Cannot get physical memory regions");
 	}
-	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
+	physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
-		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
+		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/*
 	 * Set TEX remapping registers.
 	 * Setup kernel page tables and switch to kernel L1 page table.
 	 */
 	pmap_set_tex();
 	pmap_bootstrap_prepare(lastaddr);
 
 	/*
 	 * If EARLY_PRINTF support is enabled, we need to re-establish the
 	 * mapping after pmap_bootstrap_prepare() switches to new page tables.
 	 * Note that we can only do the remapping if the VA is outside the
 	 * kernel, now that we have real virtual (not VA=PA) mappings in effect.
 	 * Early printf does not work between the time pmap_set_tex() does
 	 * cp15_prrr_set() and this code remaps the VA.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_preboot_map_attr(SOCDEV_PA, SOCDEV_VA, 1024 * 1024, 
 	    VM_PROT_READ | VM_PROT_WRITE, VM_MEMATTR_DEVICE);
 #endif
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/*
 	 * Allocate a page for the system page mapped to 0xffff0000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	systempage = pmap_preboot_get_pages(1);
 
 	/* Map the vector page. */
 	pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH,  1);
 	if (virtual_end >= ARM_VECTORS_HIGH)
 		virtual_end = ARM_VECTORS_HIGH - 1;
 
 	/* Allocate dynamic per-cpu area. */
 	dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate stacks for all modes */
 	irqstack    = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU);
 	abtstack    = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU);
 	undstack    = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU );
 	kernelstack = pmap_preboot_get_vpages(kstack_pages);
 
 	/* Allocate message buffer. */
 	msgbufp = (void *)pmap_preboot_get_vpages(
 	    round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 	mutex_init();
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	devmap_bootstrap(0, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 	platform_gpio_init();
 	cninit();
 
 	/*
 	 * If we made a mapping for EARLY_PRINTF after pmap_bootstrap_prepare(),
 	 * undo it now that the normal console printf works.
 	 */
 #if defined(EARLY_PRINTF) && defined(SOCDEV_PA) && defined(SOCDEV_VA) && SOCDEV_VA < KERNBASE
 	pmap_kremove(SOCDEV_VA);
 #endif
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	debugf(" lastaddr1: 0x%08x\n", lastaddr);
 	arm_print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in cpu_setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	/* Set stack for exception handlers */
 	undefined_init();
 	init_proc0(kernelstack);
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	enable_interrupts(PSR_A);
 	pmap_bootstrap(0);
 
 	/* Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
-	arm_physmem_exclude_region(abp->abp_physaddr,
+	physmem_exclude_region(abp->abp_physaddr,
 		pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC);
-	arm_physmem_init_kernel_globals();
+	physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	/* Init message buffer. */
 	msgbufinit(msgbufp, msgbufsize);
 	dbg_monitor_init();
 	arm_kdb_init();
 	/* Apply possible BP hardening. */
 	cpuinfo_init_bp_hardening();
 	return ((void *)STACKALIGN(thread0.td_pcb));
 
 }
 
 #endif /* __ARM_ARCH < 6 */
 #endif /* FDT */
Index: head/sys/arm/arm/machdep_boot.c
===================================================================
--- head/sys/arm/arm/machdep_boot.c	(revision 360081)
+++ head/sys/arm/arm/machdep_boot.c	(revision 360082)
@@ -1,517 +1,517 @@
 /*-
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_platform.h"
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/ctype.h>
 #include <sys/linker.h>
+#include <sys/physmem.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #if defined(LINUX_BOOT_ABI)
 #include <sys/boot.h>
 #endif
 
 #include <machine/atags.h>
 #include <machine/cpu.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
-#include <machine/physmem.h>
 #include <machine/vmparam.h>	/* For KERNVIRTADDR */
 
 #ifdef FDT
 #include <contrib/libfdt/libfdt.h>
 #include <dev/fdt/fdt_common.h>
 #endif
 
 #ifdef EFI
 #include <sys/efi.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define	debugf(fmt, args...)
 #endif
 
 #ifdef LINUX_BOOT_ABI
 static char static_kenv[4096];
 #endif
 
 extern int *end;
 
 static uint32_t board_revision;
 /* hex representation of uint64_t */
 static char board_serial[32];
 static char *loader_envp;
 
 #if defined(LINUX_BOOT_ABI)
 #define LBABI_MAX_BANKS	10
 #define CMDLINE_GUARD "FreeBSD:"
 static uint32_t board_id;
 static struct arm_lbabi_tag *atag_list;
 static char linux_command_line[LBABI_MAX_COMMAND_LINE + 1];
 static char atags[LBABI_MAX_COMMAND_LINE * 2];
 #endif /* defined(LINUX_BOOT_ABI) */
 
 SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "Board attributes");
 SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD,
     &board_revision, 0, "Board revision");
 SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD,
     board_serial, 0, "Board serial");
 
 int vfp_exists;
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &vfp_exists, 0, "Floating point support enabled");
 
 void
 board_set_serial(uint64_t serial)
 {
 
 	snprintf(board_serial, sizeof(board_serial)-1,
 		    "%016jx", serial);
 }
 
 void
 board_set_revision(uint32_t revision)
 {
 
 	board_revision = revision;
 }
 
 static char *
 kenv_next(char *cp)
 {
 
 	if (cp != NULL) {
 		while (*cp != 0)
 			cp++;
 		cp++;
 		if (*cp == 0)
 			cp = NULL;
 	}
 	return (cp);
 }
 
 void
 arm_print_kenv(void)
 {
 	char *cp;
 
 	debugf("loader passed (static) kenv:\n");
 	if (loader_envp == NULL) {
 		debugf(" no env, null ptr\n");
 		return;
 	}
 	debugf(" loader_envp = 0x%08x\n", (uint32_t)loader_envp);
 
 	for (cp = loader_envp; cp != NULL; cp = kenv_next(cp))
 		debugf(" %x %s\n", (uint32_t)cp, cp);
 }
 
 
 #if defined(LINUX_BOOT_ABI)
 
 /* Convert the U-Boot command line into FreeBSD kenv and boot options. */
 static void
 cmdline_set_env(char *cmdline, const char *guard)
 {
 	size_t guard_len;
 
 	/* Skip leading spaces. */
 	while (isspace(*cmdline))
 		cmdline++;
 
 	/* Test and remove guard. */
 	if (guard != NULL && guard[0] != '\0') {
 		guard_len  =  strlen(guard);
 		if (strncasecmp(cmdline, guard, guard_len) != 0)
 			return;
 		cmdline += guard_len;
 	}
 
 	boothowto |= boot_parse_cmdline(cmdline);
 }
 
 /*
  * Called for armv6 and newer.
  */
 void arm_parse_fdt_bootargs(void)
 {
 
 #ifdef FDT
 	if (loader_envp == NULL && fdt_get_chosen_bootargs(linux_command_line,
 	    LBABI_MAX_COMMAND_LINE) == 0) {
 		init_static_kenv(static_kenv, sizeof(static_kenv));
 		cmdline_set_env(linux_command_line, CMDLINE_GUARD);
 	}
 #endif
 }
 
 /*
  * Called for armv[45].
  */
 static vm_offset_t
 linux_parse_boot_param(struct arm_boot_params *abp)
 {
 	struct arm_lbabi_tag *walker;
 	uint32_t revision;
 	uint64_t serial;
 	int size;
 	vm_offset_t lastaddr;
 #ifdef FDT
 	struct fdt_header *dtb_ptr;
 	uint32_t dtb_size;
 #endif
 
 	/*
 	 * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2
 	 * is atags or dtb pointer.  If all of these aren't satisfied,
 	 * then punt. Unfortunately, it looks like DT enabled kernels
 	 * doesn't uses board type and U-Boot delivers 0 in r1 for them.
 	 */
 	if (abp->abp_r0 != 0 || abp->abp_r2 == 0)
 		return (0);
 #ifdef FDT
 	/* Test if r2 point to valid DTB. */
 	dtb_ptr = (struct fdt_header *)abp->abp_r2;
 	if (fdt_check_header(dtb_ptr) == 0) {
 		dtb_size = fdt_totalsize(dtb_ptr);
 		return (fake_preload_metadata(abp, dtb_ptr, dtb_size));
 	}
 #endif
 
 	board_id = abp->abp_r1;
 	walker = (struct arm_lbabi_tag *)abp->abp_r2;
 
 	if (ATAG_TAG(walker) != ATAG_CORE)
 		return 0;
 
 	atag_list = walker;
 	while (ATAG_TAG(walker) != ATAG_NONE) {
 		switch (ATAG_TAG(walker)) {
 		case ATAG_CORE:
 			break;
 		case ATAG_MEM:
-			arm_physmem_hardware_region(walker->u.tag_mem.start,
+			physmem_hardware_region(walker->u.tag_mem.start,
 			    walker->u.tag_mem.size);
 			break;
 		case ATAG_INITRD2:
 			break;
 		case ATAG_SERIAL:
 			serial = walker->u.tag_sn.high;
 			serial <<= 32;
 			serial |= walker->u.tag_sn.low;
 			board_set_serial(serial);
 			break;
 		case ATAG_REVISION:
 			revision = walker->u.tag_rev.rev;
 			board_set_revision(revision);
 			break;
 		case ATAG_CMDLINE:
 			size = ATAG_SIZE(walker) -
 			    sizeof(struct arm_lbabi_header);
 			size = min(size, LBABI_MAX_COMMAND_LINE);
 			strncpy(linux_command_line, walker->u.tag_cmd.command,
 			    size);
 			linux_command_line[size] = '\0';
 			break;
 		default:
 			break;
 		}
 		walker = ATAG_NEXT(walker);
 	}
 
 	/* Save a copy for later */
 	bcopy(atag_list, atags,
 	    (char *)walker - (char *)atag_list + ATAG_SIZE(walker));
 
 	lastaddr = fake_preload_metadata(abp, NULL, 0);
 	init_static_kenv(static_kenv, sizeof(static_kenv));
 	cmdline_set_env(linux_command_line, CMDLINE_GUARD);
 	return lastaddr;
 }
 #endif
 
 #if defined(FREEBSD_BOOT_LOADER)
 static vm_offset_t
 freebsd_parse_boot_param(struct arm_boot_params *abp)
 {
 	vm_offset_t lastaddr = 0;
 	void *mdp;
 	void *kmdp;
 #ifdef DDB
 	vm_offset_t ksym_start;
 	vm_offset_t ksym_end;
 #endif
 
 	/*
 	 * Mask metadata pointer: it is supposed to be on page boundary. If
 	 * the first argument (mdp) doesn't point to a valid address the
 	 * bootloader must have passed us something else than the metadata
 	 * ptr, so we give up.  Also give up if we cannot find metadta section
 	 * the loader creates that we get all this data out of.
 	 */
 
 	if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL)
 		return 0;
 	preload_metadata = mdp;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		return 0;
 
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 	init_static_kenv(loader_envp, 0);
 	lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
 #ifdef DDB
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 	db_fetch_ksymtab(ksym_start, ksym_end);
 #endif
 	return lastaddr;
 }
 #endif
 
 vm_offset_t
 default_parse_boot_param(struct arm_boot_params *abp)
 {
 	vm_offset_t lastaddr;
 
 #if defined(LINUX_BOOT_ABI)
 	if ((lastaddr = linux_parse_boot_param(abp)) != 0)
 		return lastaddr;
 #endif
 #if defined(FREEBSD_BOOT_LOADER)
 	if ((lastaddr = freebsd_parse_boot_param(abp)) != 0)
 		return lastaddr;
 #endif
 	/* Fall back to hardcoded metadata. */
 	lastaddr = fake_preload_metadata(abp, NULL, 0);
 
 	return lastaddr;
 }
 
 /*
  * Stub version of the boot parameter parsing routine.  We are
  * called early in initarm, before even VM has been initialized.
  * This routine needs to preserve any data that the boot loader
  * has passed in before the kernel starts to grow past the end
  * of the BSS, traditionally the place boot-loaders put this data.
  *
  * Since this is called so early, things that depend on the vm system
  * being setup (including access to some SoC's serial ports), about
  * all that can be done in this routine is to copy the arguments.
  *
  * This is the default boot parameter parsing routine.  Individual
  * kernels/boards can override this weak function with one of their
  * own.  We just fake metadata...
  */
 __weak_reference(default_parse_boot_param, parse_boot_param);
 
 
 /*
  * Fake up a boot descriptor table
  */
 vm_offset_t
 fake_preload_metadata(struct arm_boot_params *abp __unused, void *dtb_ptr,
     size_t dtb_size)
 {
 #ifdef DDB
 	vm_offset_t zstart = 0, zend = 0;
 #endif
 	vm_offset_t lastaddr;
 	int i = 0;
 	static uint32_t fake_preload[35];
 
 	fake_preload[i++] = MODINFO_NAME;
 	fake_preload[i++] = strlen("kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "kernel");
 	i += 1;
 	fake_preload[i++] = MODINFO_TYPE;
 	fake_preload[i++] = strlen("elf kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "elf kernel");
 	i += 2;
 	fake_preload[i++] = MODINFO_ADDR;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	fake_preload[i++] = KERNVIRTADDR;
 	fake_preload[i++] = MODINFO_SIZE;
 	fake_preload[i++] = sizeof(uint32_t);
 	fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR;
 #ifdef DDB
 	if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) {
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4);
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8);
 		lastaddr = *(uint32_t *)(KERNVIRTADDR + 8);
 		zend = lastaddr;
 		zstart = *(uint32_t *)(KERNVIRTADDR + 4);
 		db_fetch_ksymtab(zstart, zend);
 	} else
 #endif
 		lastaddr = (vm_offset_t)&end;
 	if (dtb_ptr != NULL) {
 		/* Copy DTB to KVA space and insert it into module chain. */
 		lastaddr = roundup(lastaddr, sizeof(int));
 		fake_preload[i++] = MODINFO_METADATA | MODINFOMD_DTBP;
 		fake_preload[i++] = sizeof(uint32_t);
 		fake_preload[i++] = (uint32_t)lastaddr;
 		memmove((void *)lastaddr, dtb_ptr, dtb_size);
 		lastaddr += dtb_size;
 		lastaddr = roundup(lastaddr, sizeof(int));
 	}
 	fake_preload[i++] = 0;
 	fake_preload[i] = 0;
 	preload_metadata = (void *)fake_preload;
 
 	init_static_kenv(NULL, 0);
 
 	return (lastaddr);
 }
 
 #ifdef EFI
 void
 arm_add_efi_map_entries(struct efi_map_header *efihdr, struct mem_region *mr,
     int *mrcnt)
 {
 	struct efi_md *map, *p;
 	const char *type;
 	size_t efisz, memory_size;
 	int ndesc, i, j;
 
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode",
 		"PersistentMemory"
 	};
 
 	*mrcnt = 0;
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = roundup2(sizeof(struct efi_map_header), 0x10);
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	if (boothowto & RB_VERBOSE)
 		printf("%23s %12s %12s %8s %4s\n",
 		    "Type", "Physical", "Virtual", "#Pages", "Attr");
 
 	memory_size = 0;
 	for (i = 0, j = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		if (boothowto & RB_VERBOSE) {
 			if (p->md_type < nitems(types))
 				type = types[p->md_type];
 			else
 				type = "<INVALID>";
 			printf("%23s %012llx %12p %08llx ", type, p->md_phys,
 			    p->md_virt, p->md_pages);
 			if (p->md_attr & EFI_MD_ATTR_UC)
 				printf("UC ");
 			if (p->md_attr & EFI_MD_ATTR_WC)
 				printf("WC ");
 			if (p->md_attr & EFI_MD_ATTR_WT)
 				printf("WT ");
 			if (p->md_attr & EFI_MD_ATTR_WB)
 				printf("WB ");
 			if (p->md_attr & EFI_MD_ATTR_UCE)
 				printf("UCE ");
 			if (p->md_attr & EFI_MD_ATTR_WP)
 				printf("WP ");
 			if (p->md_attr & EFI_MD_ATTR_RP)
 				printf("RP ");
 			if (p->md_attr & EFI_MD_ATTR_XP)
 				printf("XP ");
 			if (p->md_attr & EFI_MD_ATTR_NV)
 				printf("NV ");
 			if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 				printf("MORE_RELIABLE ");
 			if (p->md_attr & EFI_MD_ATTR_RO)
 				printf("RO ");
 			if (p->md_attr & EFI_MD_ATTR_RT)
 				printf("RUNTIME");
 			printf("\n");
 		}
 
 		switch (p->md_type) {
 		case EFI_MD_TYPE_CODE:
 		case EFI_MD_TYPE_DATA:
 		case EFI_MD_TYPE_BS_CODE:
 		case EFI_MD_TYPE_BS_DATA:
 		case EFI_MD_TYPE_FREE:
 			/*
 			 * We're allowed to use any entry with these types.
 			 */
 			break;
 		default:
 			continue;
 		}
 
 		j++;
 		if (j >= FDT_MEM_REGIONS)
 			break;
 
 		mr[j].mr_start = p->md_phys;
 		mr[j].mr_size = p->md_pages * PAGE_SIZE;
 		memory_size += mr[j].mr_size;
 	}
 
 	*mrcnt = j;
 }
 #endif /* EFI */
Index: head/sys/arm/arm/mp_machdep.c
===================================================================
--- head/sys/arm/arm/mp_machdep.c	(revision 360081)
+++ head/sys/arm/arm/mp_machdep.c	(revision 360082)
@@ -1,399 +1,398 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 Semihalf.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/debug_monitor.h>
 #include <machine/smp.h>
 #include <machine/pcb.h>
-#include <machine/physmem.h>
 #include <machine/intr.h>
 #include <machine/vmparam.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 #ifdef CPU_MV_PJ4B
 #include <arm/mv/mvwin.h>
 #endif
 
 /* used to hold the AP's until we are ready to release them */
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 /* # of Applications processors */
 volatile int mp_naps;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 void set_stackptrs(int cpu);
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	KASSERT(mp_ncpus != 0, ("cpu_mp_probe: mp_ncpus is unset"));
 
 	CPU_SETOF(0, &all_cpus);
 
 	return (mp_ncpus > 1);
 }
 
 /* Start Application Processor via platform specific function */
 static int
 check_ap(void)
 {
 	uint32_t ms;
 
 	for (ms = 0; ms < 2000; ++ms) {
 		if ((mp_naps + 1) == mp_ncpus)
 			return (0);		/* success */
 		else
 			DELAY(1000);
 	}
 
 	return (-2);
 }
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 	int error, i;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* Reserve memory for application processors */
 	for(i = 0; i < (mp_ncpus - 1); i++)
 		dpcpu[i] = (void *)kmem_malloc(DPCPU_SIZE, M_WAITOK | M_ZERO);
 
 	dcache_wbinv_poc_all();
 
 	/* Initialize boot code and start up processors */
 	platform_mp_start_ap();
 
 	/*  Check if ap's started properly */
 	error = check_ap();
 	if (error)
 		printf("WARNING: Some AP's failed to start\n");
 	else
 		for (i = 1; i < mp_ncpus; i++)
 			CPU_SET(i, &all_cpus);
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 
 }
 
 extern vm_paddr_t pmap_pa;
 void
 init_secondary(int cpu)
 {
 	struct pcpu *pc;
 	uint32_t loop_counter;
 
 	pmap_set_tex();
 	cpuinfo_reinit_mmu(pmap_kern_ttb);
 	cpu_setup();
 
 	/* Provide stack pointers for other processor modes. */
 	set_stackptrs(cpu);
 
 	enable_interrupts(PSR_A);
 	pc = &__pcpu[cpu];
 
 	/*
 	 * pcpu_init() updates queue, so it should not be executed in parallel
 	 * on several cores
 	 */
 	while(mp_naps < (cpu - 1))
 		;
 
 	pcpu_init(pc, cpu, sizeof(struct pcpu));
 	dpcpu_init(dpcpu[cpu - 1], cpu);
 #if __ARM_ARCH >= 6 && defined(DDB)
 	dbg_monitor_init_secondary();
 #endif
 	/* Signal our startup to BSP */
 	atomic_add_rel_32(&mp_naps, 1);
 
 	/* Spin until the BSP releases the APs */
 	while (!atomic_load_acq_int(&aps_ready)) {
 #if __ARM_ARCH >= 7
 		__asm __volatile("wfe");
 #endif
 	}
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pc->pc_curthread = pc->pc_idlethread;
 	pc->pc_curpcb = pc->pc_idlethread->td_pcb;
 	set_curthread(pc->pc_idlethread);
 #ifdef VFP
 	vfp_init();
 #endif
 
 	/* Configure the interrupt controller */
 	intr_pic_init_secondary();
 
 	/* Apply possible BP hardening */
 	cpuinfo_init_bp_hardening();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	enable_interrupts(PSR_I);
 
 	loop_counter = 0;
 	while (smp_started == 0) {
 		DELAY(100);
 		loop_counter++;
 		if (loop_counter == 1000)
 			CTR0(KTR_SMP, "AP still wait for smp_started");
 	}
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	CTR0(KTR_SMP, "go into scheduler");
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 static void
 ipi_rendezvous(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 	smp_rendezvous_action();
 }
 
 static void
 ipi_ast(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_AST");
 }
 
 static void
 ipi_stop(void *dummy __unused)
 {
 	u_int cpu;
 
 	/*
 	 * IPI_STOP_HARD is mapped to IPI_STOP.
 	 */
 	CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
 
 	cpu = PCPU_GET(cpuid);
 	savectx(&stoppcbs[cpu]);
 
 	/*
 	 * CPUs are stopped when entering the debugger and at
 	 * system shutdown, both events which can precede a
 	 * panic dump.  For the dump to be correct, all caches
 	 * must be flushed and invalidated, but on ARM there's
 	 * no way to broadcast a wbinv_all to other cores.
 	 * Instead, we have each core do the local wbinv_all as
 	 * part of stopping the core.  The core requesting the
 	 * stop will do the l2 cache flush after all other cores
 	 * have done their l1 flushes and stopped.
 	 */
 	dcache_wbinv_poc_all();
 
 	/* Indicate we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		cpu_spinwait();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 #ifdef DDB
 	dbg_resume_dbreg();
 #endif
 	CTR0(KTR_SMP, "IPI_STOP (restart)");
 }
 
 static void
 ipi_preempt(void *arg)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = (struct trapframe *)arg;
 
 	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 	sched_preempt(td);
 
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 static void
 ipi_hardclock(void *arg)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = (struct trapframe *)arg;
 
 	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 	hardclockintr();
 
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 static void
 release_aps(void *dummy __unused)
 {
 	uint32_t loop_counter;
 
 	if (mp_ncpus == 1)
 		return;
 
 	intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
 	intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
 	intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
 	intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
 
 	atomic_store_rel_int(&aps_ready, 1);
 	/* Wake the other threads up */
 	dsb();
 	sev();
 
 	printf("Release APs\n");
 
 	for (loop_counter = 0; loop_counter < 2000; loop_counter++) {
 		if (smp_started)
 			return;
 		DELAY(1000);
 	}
 	printf("AP's not started\n");
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_1level(CG_SHARE_L2, mp_ncpus, 0));
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	platform_mp_setmaxid();
 }
 
 /* Sending IPI */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(other_cpus, ipi);
 }
 
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 	cpuset_t cpus;
 
 	CPU_ZERO(&cpus);
 	CPU_SET(cpu, &cpus);
 
 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
 	intr_ipi_send(cpus, ipi);
 }
 
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(cpus, ipi);
 }
Index: head/sys/arm/arm/pmap-v6.c
===================================================================
--- head/sys/arm/arm/pmap-v6.c	(revision 360081)
+++ head/sys/arm/arm/pmap-v6.c	(revision 360082)
@@ -1,6961 +1,6959 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1991 Regents of the University of California.
  * Copyright (c) 1994 John S. Dyson
  * Copyright (c) 1994 David Greenman
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * Copyright (c) 2014-2016 Svatopluk Kraus <skra@FreeBSD.org>
  * Copyright (c) 2014-2016 Michal Meloun <mmel@FreeBSD.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_vm.h"
 #include "opt_pmap.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/malloc.h>
 #include <sys/vmmeter.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
-#include <machine/physmem.h>
-
 #include <vm/vm.h>
 #include <vm/uma.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_reserv.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <machine/md_var.h>
 #include <machine/pmap_var.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/sf_buf.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #ifndef DIAGNOSTIC
 #define PMAP_INLINE	__inline
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PMAP_DEBUG
 static void pmap_zero_page_check(vm_page_t m);
 void pmap_debug(int level);
 int pmap_pid_dump(int pid);
 
 #define PDEBUG(_lev_,_stat_) \
 	if (pmap_debug_level >= (_lev_)) \
 		((_stat_))
 #define dprintf printf
 int pmap_debug_level = 1;
 #else   /* PMAP_DEBUG */
 #define PDEBUG(_lev_,_stat_) /* Nothing */
 #define dprintf(x, arg...)
 #endif  /* PMAP_DEBUG */
 
 /*
  *  Level 2 page tables map definion ('max' is excluded).
  */
 
 #define PT2V_MIN_ADDRESS	((vm_offset_t)PT2MAP)
 #define PT2V_MAX_ADDRESS	((vm_offset_t)PT2MAP + PT2MAP_SIZE)
 
 #define UPT2V_MIN_ADDRESS	((vm_offset_t)PT2MAP)
 #define UPT2V_MAX_ADDRESS \
     ((vm_offset_t)(PT2MAP + (KERNBASE >> PT2MAP_SHIFT)))
 
 /*
  *  Promotion to a 1MB (PTE1) page mapping requires that the corresponding
  *  4KB (PTE2) page mappings have identical settings for the following fields:
  */
 #define PTE2_PROMOTE	(PTE2_V | PTE2_A | PTE2_NM | PTE2_S | PTE2_NG |	\
 			 PTE2_NX | PTE2_RO | PTE2_U | PTE2_W |		\
 			 PTE2_ATTR_MASK)
 
 #define PTE1_PROMOTE	(PTE1_V | PTE1_A | PTE1_NM | PTE1_S | PTE1_NG |	\
 			 PTE1_NX | PTE1_RO | PTE1_U | PTE1_W |		\
 			 PTE1_ATTR_MASK)
 
 #define ATTR_TO_L1(l2_attr)	((((l2_attr) & L2_TEX0) ? L1_S_TEX0 : 0) | \
 				 (((l2_attr) & L2_C)    ? L1_S_C    : 0) | \
 				 (((l2_attr) & L2_B)    ? L1_S_B    : 0) | \
 				 (((l2_attr) & PTE2_A)  ? PTE1_A    : 0) | \
 				 (((l2_attr) & PTE2_NM) ? PTE1_NM   : 0) | \
 				 (((l2_attr) & PTE2_S)  ? PTE1_S    : 0) | \
 				 (((l2_attr) & PTE2_NG) ? PTE1_NG   : 0) | \
 				 (((l2_attr) & PTE2_NX) ? PTE1_NX   : 0) | \
 				 (((l2_attr) & PTE2_RO) ? PTE1_RO   : 0) | \
 				 (((l2_attr) & PTE2_U)  ? PTE1_U    : 0) | \
 				 (((l2_attr) & PTE2_W)  ? PTE1_W    : 0))
 
 #define ATTR_TO_L2(l1_attr)	((((l1_attr) & L1_S_TEX0) ? L2_TEX0 : 0) | \
 				 (((l1_attr) & L1_S_C)    ? L2_C    : 0) | \
 				 (((l1_attr) & L1_S_B)    ? L2_B    : 0) | \
 				 (((l1_attr) & PTE1_A)    ? PTE2_A  : 0) | \
 				 (((l1_attr) & PTE1_NM)   ? PTE2_NM : 0) | \
 				 (((l1_attr) & PTE1_S)    ? PTE2_S  : 0) | \
 				 (((l1_attr) & PTE1_NG)   ? PTE2_NG : 0) | \
 				 (((l1_attr) & PTE1_NX)   ? PTE2_NX : 0) | \
 				 (((l1_attr) & PTE1_RO)   ? PTE2_RO : 0) | \
 				 (((l1_attr) & PTE1_U)    ? PTE2_U  : 0) | \
 				 (((l1_attr) & PTE1_W)    ? PTE2_W  : 0))
 
 /*
  *  PTE2 descriptors creation macros.
  */
 #define PTE2_ATTR_DEFAULT	vm_memattr_to_pte2(VM_MEMATTR_DEFAULT)
 #define PTE2_ATTR_PT		vm_memattr_to_pte2(pt_memattr)
 
 #define PTE2_KPT(pa)	PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_PT)
 #define PTE2_KPT_NG(pa)	PTE2_KERN_NG(pa, PTE2_AP_KRW, PTE2_ATTR_PT)
 
 #define PTE2_KRW(pa)	PTE2_KERN(pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT)
 #define PTE2_KRO(pa)	PTE2_KERN(pa, PTE2_AP_KR, PTE2_ATTR_DEFAULT)
 
 #define PV_STATS
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 /*
  *  The boot_pt1 is used temporary in very early boot stage as L1 page table.
  *  We can init many things with no memory allocation thanks to its static
  *  allocation and this brings two main advantages:
  *  (1) other cores can be started very simply,
  *  (2) various boot loaders can be supported as its arguments can be processed
  *      in virtual address space and can be moved to safe location before
  *      first allocation happened.
  *  Only disadvantage is that boot_pt1 is used only in very early boot stage.
  *  However, the table is uninitialized and so lays in bss. Therefore kernel
  *  image size is not influenced.
  *
  *  QQQ: In the future, maybe, boot_pt1 can be used for soft reset and
  *       CPU suspend/resume game.
  */
 extern pt1_entry_t boot_pt1[];
 
 vm_paddr_t base_pt1;
 pt1_entry_t *kern_pt1;
 pt2_entry_t *kern_pt2tab;
 pt2_entry_t *PT2MAP;
 
 static uint32_t ttb_flags;
 static vm_memattr_t pt_memattr;
 ttb_entry_t pmap_kern_ttb;
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 
 static vm_offset_t kernel_vm_end_new;
 vm_offset_t kernel_vm_end = KERNBASE + NKPT2PG * NPT2_IN_PG * PTE1_SIZE;
 vm_offset_t vm_max_kernel_address;
 vm_paddr_t kernel_l1pa;
 
 static struct rwlock __aligned(CACHE_LINE_SIZE) pvh_global_lock;
 
 /*
  *  Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table; /* XXX: Is it used only the list in md_page? */
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
 int pv_maxchunks;			/* How many chunks we have KVA for */
 vm_offset_t pv_vafree;			/* freelist stored in the PTE */
 
 vm_paddr_t first_managed_pa;
 #define	pa_to_pvh(pa)	(&pv_table[pte1_index(pa - first_managed_pa)])
 
 /*
  *  All those kernel PT submaps that BSD is so fond of
  */
 caddr_t _tmppt = 0;
 
 /*
  *  Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static pt2_entry_t *PMAP1 = NULL, *PMAP2;
 static pt2_entry_t *PADDR1 = NULL, *PADDR2;
 #ifdef DDB
 static pt2_entry_t *PMAP3;
 static pt2_entry_t *PADDR3;
 static int PMAP3cpu __unused; /* for SMP only */
 #endif
 #ifdef SMP
 static int PMAP1cpu;
 static int PMAP1changedcpu;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD,
     &PMAP1changedcpu, 0,
     "Number of times pmap_pte2_quick changed CPU with same PMAP1");
 #endif
 static int PMAP1changed;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD,
     &PMAP1changed, 0,
     "Number of times pmap_pte2_quick changed PMAP1");
 static int PMAP1unchanged;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD,
     &PMAP1unchanged, 0,
     "Number of times pmap_pte2_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
 /*
  * Internal flags for pmap_enter()'s helper functions.
  */
 #define	PMAP_ENTER_NORECLAIM	0x1000000	/* Don't reclaim PV entries. */
 #define	PMAP_ENTER_NOREPLACE	0x2000000	/* Don't replace mappings. */
 
 static __inline void pt2_wirecount_init(vm_page_t m);
 static boolean_t pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p,
     vm_offset_t va);
 static int pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1,
     u_int flags, vm_page_t m);
 void cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size);
 
 /*
  *  Function to set the debug level of the pmap code.
  */
 #ifdef PMAP_DEBUG
 void
 pmap_debug(int level)
 {
 
 	pmap_debug_level = level;
 	dprintf("pmap_debug: level=%d\n", pmap_debug_level);
 }
 #endif /* PMAP_DEBUG */
 
 /*
  *  This table must corespond with memory attribute configuration in vm.h.
  *  First entry is used for normal system mapping.
  *
  *  Device memory is always marked as shared.
  *  Normal memory is shared only in SMP .
  *  Not outer shareable bits are not used yet.
  *  Class 6 cannot be used on ARM11.
  */
 #define TEXDEF_TYPE_SHIFT	0
 #define TEXDEF_TYPE_MASK	0x3
 #define TEXDEF_INNER_SHIFT	2
 #define TEXDEF_INNER_MASK	0x3
 #define TEXDEF_OUTER_SHIFT	4
 #define TEXDEF_OUTER_MASK	0x3
 #define TEXDEF_NOS_SHIFT	6
 #define TEXDEF_NOS_MASK		0x1
 
 #define TEX(t, i, o, s) 			\
 		((t) << TEXDEF_TYPE_SHIFT) |	\
 		((i) << TEXDEF_INNER_SHIFT) |	\
 		((o) << TEXDEF_OUTER_SHIFT | 	\
 		((s) << TEXDEF_NOS_SHIFT))
 
 static uint32_t tex_class[8] = {
 /*	    type      inner cache outer cache */
 	TEX(PRRR_MEM, NMRR_WB_WA, NMRR_WB_WA, 0),  /* 0 - ATTR_WB_WA	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 1 - ATTR_NOCACHE	*/
 	TEX(PRRR_DEV, NMRR_NC,	  NMRR_NC,    0),  /* 2 - ATTR_DEVICE	*/
 	TEX(PRRR_SO,  NMRR_NC,	  NMRR_NC,    0),  /* 3 - ATTR_SO	*/
 	TEX(PRRR_MEM, NMRR_WT,	  NMRR_WT,    0),  /* 4 - ATTR_WT	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 5 - NOT USED YET	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 6 - NOT USED YET	*/
 	TEX(PRRR_MEM, NMRR_NC,	  NMRR_NC,    0),  /* 7 - NOT USED YET	*/
 };
 #undef TEX
 
 static uint32_t pte2_attr_tab[8] = {
 	PTE2_ATTR_WB_WA,	/* 0 - VM_MEMATTR_WB_WA */
 	PTE2_ATTR_NOCACHE,	/* 1 - VM_MEMATTR_NOCACHE */
 	PTE2_ATTR_DEVICE,	/* 2 - VM_MEMATTR_DEVICE */
 	PTE2_ATTR_SO,		/* 3 - VM_MEMATTR_SO */
 	PTE2_ATTR_WT,		/* 4 - VM_MEMATTR_WRITE_THROUGH */
 	0,			/* 5 - NOT USED YET */
 	0,			/* 6 - NOT USED YET */
 	0			/* 7 - NOT USED YET */
 };
 CTASSERT(VM_MEMATTR_WB_WA == 0);
 CTASSERT(VM_MEMATTR_NOCACHE == 1);
 CTASSERT(VM_MEMATTR_DEVICE == 2);
 CTASSERT(VM_MEMATTR_SO == 3);
 CTASSERT(VM_MEMATTR_WRITE_THROUGH == 4);
 #define	VM_MEMATTR_END	(VM_MEMATTR_WRITE_THROUGH + 1)
 
 boolean_t
 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 {
 
 	return (mode >= 0 && mode < VM_MEMATTR_END);
 }
 
 static inline uint32_t
 vm_memattr_to_pte2(vm_memattr_t ma)
 {
 
 	KASSERT((u_int)ma < VM_MEMATTR_END,
 	    ("%s: bad vm_memattr_t %d", __func__, ma));
 	return (pte2_attr_tab[(u_int)ma]);
 }
 
 static inline uint32_t
 vm_page_pte2_attr(vm_page_t m)
 {
 
 	return (vm_memattr_to_pte2(m->md.pat_mode));
 }
 
 /*
  * Convert TEX definition entry to TTB flags.
  */
 static uint32_t
 encode_ttb_flags(int idx)
 {
 	uint32_t inner, outer, nos, reg;
 
 	inner = (tex_class[idx] >> TEXDEF_INNER_SHIFT) &
 		TEXDEF_INNER_MASK;
 	outer = (tex_class[idx] >> TEXDEF_OUTER_SHIFT) &
 		TEXDEF_OUTER_MASK;
 	nos = (tex_class[idx] >> TEXDEF_NOS_SHIFT) &
 		TEXDEF_NOS_MASK;
 
 	reg = nos << 5;
 	reg |= outer << 3;
 	if (cpuinfo.coherent_walk)
 		reg |= (inner & 0x1) << 6;
 	reg |= (inner & 0x2) >> 1;
 #ifdef SMP
 	ARM_SMP_UP(
 		reg |= 1 << 1,
 	);
 #endif
 	return reg;
 }
 
 /*
  *  Set TEX remapping registers in current CPU.
  */
 void
 pmap_set_tex(void)
 {
 	uint32_t prrr, nmrr;
 	uint32_t type, inner, outer, nos;
 	int i;
 
 #ifdef PMAP_PTE_NOCACHE
 	/* XXX fixme */
 	if (cpuinfo.coherent_walk) {
 		pt_memattr = VM_MEMATTR_WB_WA;
 		ttb_flags = encode_ttb_flags(0);
 	}
 	else {
 		pt_memattr = VM_MEMATTR_NOCACHE;
 		ttb_flags = encode_ttb_flags(1);
 	}
 #else
 	pt_memattr = VM_MEMATTR_WB_WA;
 	ttb_flags = encode_ttb_flags(0);
 #endif
 
 	prrr = 0;
 	nmrr = 0;
 
 	/* Build remapping register from TEX classes. */
 	for (i = 0; i < 8; i++) {
 		type = (tex_class[i] >> TEXDEF_TYPE_SHIFT) &
 			TEXDEF_TYPE_MASK;
 		inner = (tex_class[i] >> TEXDEF_INNER_SHIFT) &
 			TEXDEF_INNER_MASK;
 		outer = (tex_class[i] >> TEXDEF_OUTER_SHIFT) &
 			TEXDEF_OUTER_MASK;
 		nos = (tex_class[i] >> TEXDEF_NOS_SHIFT) &
 			TEXDEF_NOS_MASK;
 
 		prrr |= type  << (i * 2);
 		prrr |= nos   << (i + 24);
 		nmrr |= inner << (i * 2);
 		nmrr |= outer << (i * 2 + 16);
 	}
 	/* Add shareable bits for device memory. */
 	prrr |= PRRR_DS0 | PRRR_DS1;
 
 	/* Add shareable bits for normal memory in SMP case. */
 #ifdef SMP
 	ARM_SMP_UP(
 		prrr |= PRRR_NS1,
 	);
 #endif
 	cp15_prrr_set(prrr);
 	cp15_nmrr_set(nmrr);
 
 	/* Caches are disabled, so full TLB flush should be enough. */
 	tlb_flush_all_local();
 }
 
 /*
  * Remap one vm_meattr class to another one. This can be useful as
  * workaround for SOC errata, e.g. if devices must be accessed using
  * SO memory class.
  *
  * !!! Please note that this function is absolutely last resort thing.
  * It should not be used under normal circumstances. !!!
  *
  * Usage rules:
  * - it shall be called after pmap_bootstrap_prepare() and before
  *   cpu_mp_start() (thus only on boot CPU). In practice, it's expected
  *   to be called from platform_attach() or platform_late_init().
  *
  * - if remapping doesn't change caching mode, or until uncached class
  *   is remapped to any kind of cached one, then no other restriction exists.
  *
  * - if pmap_remap_vm_attr() changes caching mode, but both (original and
  *   remapped) remain cached, then caller is resposible for calling
  *   of dcache_wbinv_poc_all().
  *
  * - remapping of any kind of cached class to uncached is not permitted.
  */
 void
 pmap_remap_vm_attr(vm_memattr_t old_attr, vm_memattr_t new_attr)
 {
 	int old_idx, new_idx;
 
 	/* Map VM memattrs to indexes to tex_class table. */
 	old_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)old_attr]);
 	new_idx = PTE2_ATTR2IDX(pte2_attr_tab[(int)new_attr]);
 
 	/* Replace TEX attribute and apply it. */
 	tex_class[old_idx] = tex_class[new_idx];
 	pmap_set_tex();
 }
 
 /*
  * KERNBASE must be multiple of NPT2_IN_PG * PTE1_SIZE. In other words,
  * KERNBASE is mapped by first L2 page table in L2 page table page. It
  * meets same constrain due to PT2MAP being placed just under KERNBASE.
  */
 CTASSERT((KERNBASE & (NPT2_IN_PG * PTE1_SIZE - 1)) == 0);
 CTASSERT((KERNBASE - VM_MAXUSER_ADDRESS) >= PT2MAP_SIZE);
 
 /*
  *  In crazy dreams, PAGE_SIZE could be a multiple of PTE2_SIZE in general.
  *  For now, anyhow, the following check must be fulfilled.
  */
 CTASSERT(PAGE_SIZE == PTE2_SIZE);
 /*
  *  We don't want to mess up MI code with all MMU and PMAP definitions,
  *  so some things, which depend on other ones, are defined independently.
  *  Now, it is time to check that we don't screw up something.
  */
 CTASSERT(PDRSHIFT == PTE1_SHIFT);
 /*
  *  Check L1 and L2 page table entries definitions consistency.
  */
 CTASSERT(NB_IN_PT1 == (sizeof(pt1_entry_t) * NPTE1_IN_PT1));
 CTASSERT(NB_IN_PT2 == (sizeof(pt2_entry_t) * NPTE2_IN_PT2));
 /*
  *  Check L2 page tables page consistency.
  */
 CTASSERT(PAGE_SIZE == (NPT2_IN_PG * NB_IN_PT2));
 CTASSERT((1 << PT2PG_SHIFT) == NPT2_IN_PG);
 /*
  *  Check PT2TAB consistency.
  *  PT2TAB_ENTRIES is defined as a division of NPTE1_IN_PT1 by NPT2_IN_PG.
  *  This should be done without remainder.
  */
 CTASSERT(NPTE1_IN_PT1 == (PT2TAB_ENTRIES * NPT2_IN_PG));
 
 /*
  *	A PT2MAP magic.
  *
  *  All level 2 page tables (PT2s) are mapped continuously and accordingly
  *  into PT2MAP address space. As PT2 size is less than PAGE_SIZE, this can
  *  be done only if PAGE_SIZE is a multiple of PT2 size. All PT2s in one page
  *  must be used together, but not necessary at once. The first PT2 in a page
  *  must map things on correctly aligned address and the others must follow
  *  in right order.
  */
 #define NB_IN_PT2TAB	(PT2TAB_ENTRIES * sizeof(pt2_entry_t))
 #define NPT2_IN_PT2TAB	(NB_IN_PT2TAB / NB_IN_PT2)
 #define NPG_IN_PT2TAB	(NB_IN_PT2TAB / PAGE_SIZE)
 
 /*
  *  Check PT2TAB consistency.
  *  NPT2_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by NB_IN_PT2.
  *  NPG_IN_PT2TAB is defined as a division of NB_IN_PT2TAB by PAGE_SIZE.
  *  The both should be done without remainder.
  */
 CTASSERT(NB_IN_PT2TAB == (NPT2_IN_PT2TAB * NB_IN_PT2));
 CTASSERT(NB_IN_PT2TAB == (NPG_IN_PT2TAB * PAGE_SIZE));
 /*
  *  The implementation was made general, however, with the assumption
  *  bellow in mind. In case of another value of NPG_IN_PT2TAB,
  *  the code should be once more rechecked.
  */
 CTASSERT(NPG_IN_PT2TAB == 1);
 
 /*
  *  Get offset of PT2 in a page
  *  associated with given PT1 index.
  */
 static __inline u_int
 page_pt2off(u_int pt1_idx)
 {
 
 	return ((pt1_idx & PT2PG_MASK) * NB_IN_PT2);
 }
 
 /*
  *  Get physical address of PT2
  *  associated with given PT2s page and PT1 index.
  */
 static __inline vm_paddr_t
 page_pt2pa(vm_paddr_t pgpa, u_int pt1_idx)
 {
 
 	return (pgpa + page_pt2off(pt1_idx));
 }
 
 /*
  *  Get first entry of PT2
  *  associated with given PT2s page and PT1 index.
  */
 static __inline pt2_entry_t *
 page_pt2(vm_offset_t pgva, u_int pt1_idx)
 {
 
 	return ((pt2_entry_t *)(pgva + page_pt2off(pt1_idx)));
 }
 
 /*
  *  Get virtual address of PT2s page (mapped in PT2MAP)
  *  which holds PT2 which holds entry which maps given virtual address.
  */
 static __inline vm_offset_t
 pt2map_pt2pg(vm_offset_t va)
 {
 
 	va &= ~(NPT2_IN_PG * PTE1_SIZE - 1);
 	return ((vm_offset_t)pt2map_entry(va));
 }
 
 /*****************************************************************************
  *
  *     THREE pmap initialization milestones exist:
  *
  *  locore.S
  *    -> fundamental init (including MMU) in ASM
  *
  *  initarm()
  *    -> fundamental init continues in C
  *    -> first available physical address is known
  *
  *    pmap_bootstrap_prepare() -> FIRST PMAP MILESTONE (first epoch begins)
  *      -> basic (safe) interface for physical address allocation is made
  *      -> basic (safe) interface for virtual mapping is made
  *      -> limited not SMP coherent work is possible
  *
  *    -> more fundamental init continues in C
  *    -> locks and some more things are available
  *    -> all fundamental allocations and mappings are done
  *
  *    pmap_bootstrap() -> SECOND PMAP MILESTONE (second epoch begins)
  *      -> phys_avail[] and virtual_avail is set
  *      -> control is passed to vm subsystem
  *      -> physical and virtual address allocation are off limit
  *      -> low level mapping functions, some SMP coherent,
  *         are available, which cannot be used before vm subsystem
  *         is being inited
  *
  *  mi_startup()
  *    -> vm subsystem is being inited
  *
  *      pmap_init() -> THIRD PMAP MILESTONE (third epoch begins)
  *        -> pmap is fully inited
  *
  *****************************************************************************/
 
 /*****************************************************************************
  *
  *	PMAP first stage initialization and utility functions
  *	for pre-bootstrap epoch.
  *
  *  After pmap_bootstrap_prepare() is called, the following functions
  *  can be used:
  *
  *  (1) strictly only for this stage functions for physical page allocations,
  *      virtual space allocations, and mappings:
  *
  *  vm_paddr_t pmap_preboot_get_pages(u_int num);
  *  void pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num);
  *  vm_offset_t pmap_preboot_reserve_pages(u_int num);
  *  vm_offset_t pmap_preboot_get_vpages(u_int num);
  *  void pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size,
  *      vm_prot_t prot, vm_memattr_t attr);
  *
  *  (2) for all stages:
  *
  *  vm_paddr_t pmap_kextract(vm_offset_t va);
  *
  *  NOTE: This is not SMP coherent stage.
  *
  *****************************************************************************/
 
 #define KERNEL_P2V(pa) \
     ((vm_offset_t)((pa) - arm_physmem_kernaddr + KERNVIRTADDR))
 #define KERNEL_V2P(va) \
     ((vm_paddr_t)((va) - KERNVIRTADDR + arm_physmem_kernaddr))
 
 static vm_paddr_t last_paddr;
 
 /*
  *  Pre-bootstrap epoch page allocator.
  */
 vm_paddr_t
 pmap_preboot_get_pages(u_int num)
 {
 	vm_paddr_t ret;
 
 	ret = last_paddr;
 	last_paddr += num * PAGE_SIZE;
 
 	return (ret);
 }
 
 /*
  *	The fundamental initialization of PMAP stuff.
  *
  *  Some things already happened in locore.S and some things could happen
  *  before pmap_bootstrap_prepare() is called, so let's recall what is done:
  *  1. Caches are disabled.
  *  2. We are running on virtual addresses already with 'boot_pt1'
  *     as L1 page table.
  *  3. So far, all virtual addresses can be converted to physical ones and
  *     vice versa by the following macros:
  *       KERNEL_P2V(pa) .... physical to virtual ones,
  *       KERNEL_V2P(va) .... virtual to physical ones.
  *
  *  What is done herein:
  *  1. The 'boot_pt1' is replaced by real kernel L1 page table 'kern_pt1'.
  *  2. PT2MAP magic is brought to live.
  *  3. Basic preboot functions for page allocations and mappings can be used.
  *  4. Everything is prepared for L1 cache enabling.
  *
  *  Variations:
  *  1. To use second TTB register, so kernel and users page tables will be
  *     separated. This way process forking - pmap_pinit() - could be faster,
  *     it saves physical pages and KVA per a process, and it's simple change.
  *     However, it will lead, due to hardware matter, to the following:
  *     (a) 2G space for kernel and 2G space for users.
  *     (b) 1G space for kernel in low addresses and 3G for users above it.
  *     A question is: Is the case (b) really an option? Note that case (b)
  *     does save neither physical memory and KVA.
  */
 void
 pmap_bootstrap_prepare(vm_paddr_t last)
 {
 	vm_paddr_t pt2pg_pa, pt2tab_pa, pa, size;
 	vm_offset_t pt2pg_va;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	u_int i;
 	uint32_t l1_attr;
 
 	/*
 	 * Now, we are going to make real kernel mapping. Note that we are
 	 * already running on some mapping made in locore.S and we expect
 	 * that it's large enough to ensure nofault access to physical memory
 	 * allocated herein before switch.
 	 *
 	 * As kernel image and everything needed before are and will be mapped
 	 * by section mappings, we align last physical address to PTE1_SIZE.
 	 */
 	last_paddr = pte1_roundup(last);
 
 	/*
 	 * Allocate and zero page(s) for kernel L1 page table.
 	 *
 	 * Note that it's first allocation on space which was PTE1_SIZE
 	 * aligned and as such base_pt1 is aligned to NB_IN_PT1 too.
 	 */
 	base_pt1 = pmap_preboot_get_pages(NPG_IN_PT1);
 	kern_pt1 = (pt1_entry_t *)KERNEL_P2V(base_pt1);
 	bzero((void*)kern_pt1, NB_IN_PT1);
 	pte1_sync_range(kern_pt1, NB_IN_PT1);
 
 	/* Allocate and zero page(s) for kernel PT2TAB. */
 	pt2tab_pa = pmap_preboot_get_pages(NPG_IN_PT2TAB);
 	kern_pt2tab = (pt2_entry_t *)KERNEL_P2V(pt2tab_pa);
 	bzero(kern_pt2tab, NB_IN_PT2TAB);
 	pte2_sync_range(kern_pt2tab, NB_IN_PT2TAB);
 
 	/* Allocate and zero page(s) for kernel L2 page tables. */
 	pt2pg_pa = pmap_preboot_get_pages(NKPT2PG);
 	pt2pg_va = KERNEL_P2V(pt2pg_pa);
 	size = NKPT2PG * PAGE_SIZE;
 	bzero((void*)pt2pg_va, size);
 	pte2_sync_range((pt2_entry_t *)pt2pg_va, size);
 
 	/*
 	 * Add a physical memory segment (vm_phys_seg) corresponding to the
 	 * preallocated pages for kernel L2 page tables so that vm_page
 	 * structures representing these pages will be created. The vm_page
 	 * structures are required for promotion of the corresponding kernel
 	 * virtual addresses to section mappings.
 	 */
 	vm_phys_add_seg(pt2tab_pa, pmap_preboot_get_pages(0));
 
 	/*
 	 * Insert allocated L2 page table pages to PT2TAB and make
 	 * link to all PT2s in L1 page table. See how kernel_vm_end
 	 * is initialized.
 	 *
 	 * We play simple and safe. So every KVA will have underlaying
 	 * L2 page table, even kernel image mapped by sections.
 	 */
 	pte2p = kern_pt2tab_entry(KERNBASE);
 	for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += PTE2_SIZE)
 		pt2tab_store(pte2p++, PTE2_KPT(pa));
 
 	pte1p = kern_pte1(KERNBASE);
 	for (pa = pt2pg_pa; pa < pt2pg_pa + size; pa += NB_IN_PT2)
 		pte1_store(pte1p++, PTE1_LINK(pa));
 
 	/* Make section mappings for kernel. */
 	l1_attr = ATTR_TO_L1(PTE2_ATTR_DEFAULT);
 	pte1p = kern_pte1(KERNBASE);
 	for (pa = KERNEL_V2P(KERNBASE); pa < last; pa += PTE1_SIZE)
 		pte1_store(pte1p++, PTE1_KERN(pa, PTE1_AP_KRW, l1_attr));
 
 	/*
 	 * Get free and aligned space for PT2MAP and make L1 page table links
 	 * to L2 page tables held in PT2TAB.
 	 *
 	 * Note that pages holding PT2s are stored in PT2TAB as pt2_entry_t
 	 * descriptors and PT2TAB page(s) itself is(are) used as PT2s. Thus
 	 * each entry in PT2TAB maps all PT2s in a page. This implies that
 	 * virtual address of PT2MAP must be aligned to NPT2_IN_PG * PTE1_SIZE.
 	 */
 	PT2MAP = (pt2_entry_t *)(KERNBASE - PT2MAP_SIZE);
 	pte1p = kern_pte1((vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) {
 		pte1_store(pte1p++, PTE1_LINK(pa));
 	}
 
 	/*
 	 * Store PT2TAB in PT2TAB itself, i.e. self reference mapping.
 	 * Each pmap will hold own PT2TAB, so the mapping should be not global.
 	 */
 	pte2p = kern_pt2tab_entry((vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) {
 		pt2tab_store(pte2p++, PTE2_KPT_NG(pa));
 	}
 
 	/*
 	 * Choose correct L2 page table and make mappings for allocations
 	 * made herein which replaces temporary locore.S mappings after a while.
 	 * Note that PT2MAP cannot be used until we switch to kern_pt1.
 	 *
 	 * Note, that these allocations started aligned on 1M section and
 	 * kernel PT1 was allocated first. Making of mappings must follow
 	 * order of physical allocations as we've used KERNEL_P2V() macro
 	 * for virtual addresses resolution.
 	 */
 	pte2p = kern_pt2tab_entry((vm_offset_t)kern_pt1);
 	pt2pg_va = KERNEL_P2V(pte2_pa(pte2_load(pte2p)));
 
 	pte2p = page_pt2(pt2pg_va, pte1_index((vm_offset_t)kern_pt1));
 
 	/* Make mapping for kernel L1 page table. */
 	for (pa = base_pt1, i = 0; i < NPG_IN_PT1; i++, pa += PTE2_SIZE)
 		pte2_store(pte2p++, PTE2_KPT(pa));
 
 	/* Make mapping for kernel PT2TAB. */
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE)
 		pte2_store(pte2p++, PTE2_KPT(pa));
 
 	/* Finally, switch from 'boot_pt1' to 'kern_pt1'. */
 	pmap_kern_ttb = base_pt1 | ttb_flags;
 	cpuinfo_reinit_mmu(pmap_kern_ttb);
 	/*
 	 * Initialize the first available KVA. As kernel image is mapped by
 	 * sections, we are leaving some gap behind.
 	 */
 	virtual_avail = (vm_offset_t)kern_pt2tab + NPG_IN_PT2TAB * PAGE_SIZE;
 }
 
 /*
  *  Setup L2 page table page for given KVA.
  *  Used in pre-bootstrap epoch.
  *
  *  Note that we have allocated NKPT2PG pages for L2 page tables in advance
  *  and used them for mapping KVA starting from KERNBASE. However, this is not
  *  enough. Vectors and devices need L2 page tables too. Note that they are
  *  even above VM_MAX_KERNEL_ADDRESS.
  */
 static __inline vm_paddr_t
 pmap_preboot_pt2pg_setup(vm_offset_t va)
 {
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pt2pg_pa;
 
 	/* Get associated entry in PT2TAB. */
 	pte2p = kern_pt2tab_entry(va);
 
 	/* Just return, if PT2s page exists already. */
 	pte2 = pt2tab_load(pte2p);
 	if (pte2_is_valid(pte2))
 		return (pte2_pa(pte2));
 
 	KASSERT(va >= VM_MAX_KERNEL_ADDRESS,
 	    ("%s: NKPT2PG too small", __func__));
 
 	/*
 	 * Allocate page for PT2s and insert it to PT2TAB.
 	 * In other words, map it into PT2MAP space.
 	 */
 	pt2pg_pa = pmap_preboot_get_pages(1);
 	pt2tab_store(pte2p, PTE2_KPT(pt2pg_pa));
 
 	/* Zero all PT2s in allocated page. */
 	bzero((void*)pt2map_pt2pg(va), PAGE_SIZE);
 	pte2_sync_range((pt2_entry_t *)pt2map_pt2pg(va), PAGE_SIZE);
 
 	return (pt2pg_pa);
 }
 
 /*
  *  Setup L2 page table for given KVA.
  *  Used in pre-bootstrap epoch.
  */
 static void
 pmap_preboot_pt2_setup(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 
 	/* Setup PT2's page. */
 	pt2pg_pa = pmap_preboot_pt2pg_setup(va);
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(va));
 
 	/* Insert PT2 to PT1. */
 	pte1p = kern_pte1(va);
 	pte1_store(pte1p, PTE1_LINK(pt2_pa));
 }
 
 /*
  *  Get L2 page entry associated with given KVA.
  *  Used in pre-bootstrap epoch.
  */
 static __inline pt2_entry_t*
 pmap_preboot_vtopte2(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 
 	/* Setup PT2 if needed. */
 	pte1p = kern_pte1(va);
 	if (!pte1_is_valid(pte1_load(pte1p))) /* XXX - sections ?! */
 		pmap_preboot_pt2_setup(va);
 
 	return (pt2map_entry(va));
 }
 
 /*
  *  Pre-bootstrap epoch page(s) mapping(s).
  */
 void
 pmap_preboot_map_pages(vm_paddr_t pa, vm_offset_t va, u_int num)
 {
 	u_int i;
 	pt2_entry_t *pte2p;
 
 	/* Map all the pages. */
 	for (i = 0; i < num; i++) {
 		pte2p = pmap_preboot_vtopte2(va);
 		pte2_store(pte2p, PTE2_KRW(pa));
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 }
 
 /*
  *  Pre-bootstrap epoch virtual space alocator.
  */
 vm_offset_t
 pmap_preboot_reserve_pages(u_int num)
 {
 	u_int i;
 	vm_offset_t start, va;
 	pt2_entry_t *pte2p;
 
 	/* Allocate virtual space. */
 	start = va = virtual_avail;
 	virtual_avail += num * PAGE_SIZE;
 
 	/* Zero the mapping. */
 	for (i = 0; i < num; i++) {
 		pte2p = pmap_preboot_vtopte2(va);
 		pte2_store(pte2p, 0);
 		va += PAGE_SIZE;
 	}
 
 	return (start);
 }
 
 /*
  *  Pre-bootstrap epoch page(s) allocation and mapping(s).
  */
 vm_offset_t
 pmap_preboot_get_vpages(u_int num)
 {
 	vm_paddr_t  pa;
 	vm_offset_t va;
 
 	/* Allocate physical page(s). */
 	pa = pmap_preboot_get_pages(num);
 
 	/* Allocate virtual space. */
 	va = virtual_avail;
 	virtual_avail += num * PAGE_SIZE;
 
 	/* Map and zero all. */
 	pmap_preboot_map_pages(pa, va, num);
 	bzero((void *)va, num * PAGE_SIZE);
 
 	return (va);
 }
 
 /*
  *  Pre-bootstrap epoch page mapping(s) with attributes.
  */
 void
 pmap_preboot_map_attr(vm_paddr_t pa, vm_offset_t va, vm_size_t size,
     vm_prot_t prot, vm_memattr_t attr)
 {
 	u_int num;
 	u_int l1_attr, l1_prot, l2_prot, l2_attr;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	l2_prot = prot & VM_PROT_WRITE ? PTE2_AP_KRW : PTE2_AP_KR;
 	l2_prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX;
 	l2_attr = vm_memattr_to_pte2(attr);
 	l1_prot = ATTR_TO_L1(l2_prot);
 	l1_attr = ATTR_TO_L1(l2_attr);
 
 	/* Map all the pages. */
 	num = round_page(size);
 	while (num > 0) {
 		if ((((va | pa) & PTE1_OFFSET) == 0) && (num >= PTE1_SIZE)) {
 			pte1p = kern_pte1(va);
 			pte1_store(pte1p, PTE1_KERN(pa, l1_prot, l1_attr));
 			va += PTE1_SIZE;
 			pa += PTE1_SIZE;
 			num -= PTE1_SIZE;
 		} else {
 			pte2p = pmap_preboot_vtopte2(va);
 			pte2_store(pte2p, PTE2_KERN(pa, l2_prot, l2_attr));
 			va += PAGE_SIZE;
 			pa += PAGE_SIZE;
 			num -= PAGE_SIZE;
 		}
 	}
 }
 
 /*
  *  Extract from the kernel page table the physical address
  *  that is mapped by the given virtual address "va".
  */
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	pte1 = pte1_load(kern_pte1(va));
 	if (pte1_is_section(pte1)) {
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 	} else if (pte1_is_link(pte1)) {
 		/*
 		 * We should beware of concurrent promotion that changes
 		 * pte1 at this point. However, it's not a problem as PT2
 		 * page is preserved by promotion in PT2TAB. So even if
 		 * it happens, using of PT2MAP is still safe.
 		 *
 		 * QQQ: However, concurrent removing is a problem which
 		 *      ends in abort on PT2MAP space. Locking must be used
 		 *      to deal with this.
 		 */
 		pte2 = pte2_load(pt2map_entry(va));
 		pa = pte2_pa(pte2) | (va & PTE2_OFFSET);
 	}
 	else {
 		panic("%s: va %#x pte1 %#x", __func__, va, pte1);
 	}
 	return (pa);
 }
 
 /*
  *  Extract from the kernel page table the physical address
  *  that is mapped by the given virtual address "va". Also
  *  return L2 page table entry which maps the address.
  *
  *  This is only intended to be used for panic dumps.
  */
 vm_paddr_t
 pmap_dump_kextract(vm_offset_t va, pt2_entry_t *pte2p)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	pte1 = pte1_load(kern_pte1(va));
 	if (pte1_is_section(pte1)) {
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 		pte2 = pa | ATTR_TO_L2(pte1) | PTE2_V;
 	} else if (pte1_is_link(pte1)) {
 		pte2 = pte2_load(pt2map_entry(va));
 		pa = pte2_pa(pte2);
 	} else {
 		pte2 = 0;
 		pa = 0;
 	}
 	if (pte2p != NULL)
 		*pte2p = pte2;
 	return (pa);
 }
 
 /*****************************************************************************
  *
  *	PMAP second stage initialization and utility functions
  *	for bootstrap epoch.
  *
  *  After pmap_bootstrap() is called, the following functions for
  *  mappings can be used:
  *
  *  void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
  *  void pmap_kremove(vm_offset_t va);
  *  vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end,
  *      int prot);
  *
  *  NOTE: This is not SMP coherent stage. And physical page allocation is not
  *        allowed during this stage.
  *
  *****************************************************************************/
 
 /*
  *  Initialize kernel PMAP locks and lists, kernel_pmap itself, and
  *  reserve various virtual spaces for temporary mappings.
  */
 void
 pmap_bootstrap(vm_offset_t firstaddr)
 {
 	pt2_entry_t *unused __unused;
 	struct pcpu *pc;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_l1pa = (vm_paddr_t)kern_pt1;  /* for libkvm */
 	kernel_pmap->pm_pt1 = kern_pt1;
 	kernel_pmap->pm_pt2tab = kern_pt2tab;
 	CPU_FILL(&kernel_pmap->pm_active);  /* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
 	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	LIST_INIT(&allpmaps);
 
 	/*
 	 * Request a spin mutex so that changes to allpmaps cannot be
 	 * preempted by smp_rendezvous_cpus().
 	 */
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)  do {		\
 	v = (c)pmap_preboot_reserve_pages(n);	\
 	p = pt2map_entry((vm_offset_t)v);	\
 	} while (0)
 
 	/*
 	 * Local CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * Local CMAP2 is also used for data cache cleaning.
 	 */
 	pc = get_pcpu();
 	mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF);
 	SYSMAP(caddr_t, pc->pc_cmap1_pte2p, pc->pc_cmap1_addr, 1);
 	SYSMAP(caddr_t, pc->pc_cmap2_pte2p, pc->pc_cmap2_addr, 1);
 	SYSMAP(vm_offset_t, pc->pc_qmap_pte2p, pc->pc_qmap_addr, 1);
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * _tmppt is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, unused, _tmppt, 1);
 
 	/*
 	 * PADDR1 and PADDR2 are used by pmap_pte2_quick() and pmap_pte2(),
 	 * respectively. PADDR3 is used by pmap_pte2_ddb().
 	 */
 	SYSMAP(pt2_entry_t *, PMAP1, PADDR1, 1);
 	SYSMAP(pt2_entry_t *, PMAP2, PADDR2, 1);
 #ifdef DDB
 	SYSMAP(pt2_entry_t *, PMAP3, PADDR3, 1);
 #endif
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
 	/*
 	 * Note that in very short time in initarm(), we are going to
 	 * initialize phys_avail[] array and no further page allocation
 	 * can happen after that until vm subsystem will be initialized.
 	 */
 	kernel_vm_end_new = kernel_vm_end;
 	virtual_end = vm_max_kernel_address;
 }
 
 static void
 pmap_init_reserved_pages(void)
 {
 	struct pcpu *pc;
 	vm_offset_t pages;
 	int i;
 
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		/*
 		 * Skip if the mapping has already been initialized,
 		 * i.e. this is the BSP.
 		 */
 		if (pc->pc_cmap1_addr != 0)
 			continue;
 		mtx_init(&pc->pc_cmap_lock, "SYSMAPS", NULL, MTX_DEF);
 		pages = kva_alloc(PAGE_SIZE * 3);
 		if (pages == 0)
 			panic("%s: unable to allocate KVA", __func__);
 		pc->pc_cmap1_pte2p = pt2map_entry(pages);
 		pc->pc_cmap2_pte2p = pt2map_entry(pages + PAGE_SIZE);
 		pc->pc_qmap_pte2p = pt2map_entry(pages + (PAGE_SIZE * 2));
 		pc->pc_cmap1_addr = (caddr_t)pages;
 		pc->pc_cmap2_addr = (caddr_t)(pages + PAGE_SIZE);
 		pc->pc_qmap_addr = pages + (PAGE_SIZE * 2);
 	}
 }
 SYSINIT(rpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_reserved_pages, NULL);
 
 /*
  *  The function can already be use in second initialization stage.
  *  As such, the function DOES NOT call pmap_growkernel() where PT2
  *  allocation can happen. So if used, be sure that PT2 for given
  *  virtual address is allocated already!
  *
  *  Add a wired page to the kva.
  *  Note: not SMP coherent.
  */
 static __inline void
 pmap_kenter_prot_attr(vm_offset_t va, vm_paddr_t pa, uint32_t prot,
     uint32_t attr)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	pte1p = kern_pte1(va);
 	if (!pte1_is_valid(pte1_load(pte1p))) { /* XXX - sections ?! */
 		/*
 		 * This is a very low level function, so PT2 and particularly
 		 * PT2PG associated with given virtual address must be already
 		 * allocated. It's a pain mainly during pmap initialization
 		 * stage. However, called after pmap initialization with
 		 * virtual address not under kernel_vm_end will lead to
 		 * the same misery.
 		 */
 		if (!pte2_is_valid(pte2_load(kern_pt2tab_entry(va))))
 			panic("%s: kernel PT2 not allocated!", __func__);
 	}
 
 	pte2p = pt2map_entry(va);
 	pte2_store(pte2p, PTE2_KERN(pa, prot, attr));
 }
 
 PMAP_INLINE void
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 
 	pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, PTE2_ATTR_DEFAULT);
 }
 
 /*
  *  Remove a page from the kernel pagetables.
  *  Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 
 	pte1p = kern_pte1(va);
 	if (pte1_is_section(pte1_load(pte1p))) {
 		pte1_clear(pte1p);
 	} else {
 		pte2p = pt2map_entry(va);
 		pte2_clear(pte2p);
 	}
 }
 
 /*
  *  Share new kernel PT2PG with all pmaps.
  *  The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pt2tab(vm_offset_t va, pt2_entry_t npte2)
 {
 	pmap_t pmap;
 	pt2_entry_t *pte2p;
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte2p = pmap_pt2tab_entry(pmap, va);
 		pt2tab_store(pte2p, npte2);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  *  Share new kernel PTE1 with all pmaps.
  *  The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pte1(vm_offset_t va, pt1_entry_t npte1)
 {
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte1p = pmap_pte1(pmap, va);
 		pte1_store(pte1p, npte1);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  *  Used to map a range of physical addresses into kernel
  *  virtual address space.
  *
  *  The value passed in '*virt' is a suggested virtual address for
  *  the mapping. Architectures which can support a direct-mapped
  *  physical to virtual region can return the appropriate address
  *  within that region, leaving '*virt' unchanged. Other
  *  architectures should map the pages starting at '*virt' and
  *  update '*virt' with the first usable address after the mapped
  *  region.
  *
  *  NOTE: Read the comments above pmap_kenter_prot_attr() as
  *        the function is used herein!
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 	vm_paddr_t pte1_offset;
 	pt1_entry_t npte1;
 	uint32_t l1prot, l2prot;
 	uint32_t l1attr, l2attr;
 
 	PDEBUG(1, printf("%s: virt = %#x, start = %#x, end = %#x (size = %#x),"
 	    " prot = %d\n", __func__, *virt, start, end, end - start,  prot));
 
 	l2prot = (prot & VM_PROT_WRITE) ? PTE2_AP_KRW : PTE2_AP_KR;
 	l2prot |= (prot & VM_PROT_EXECUTE) ? PTE2_X : PTE2_NX;
 	l1prot = ATTR_TO_L1(l2prot);
 
 	l2attr = PTE2_ATTR_DEFAULT;
 	l1attr = ATTR_TO_L1(l2attr);
 
 	va = *virt;
 	/*
 	 * Does the physical address range's size and alignment permit at
 	 * least one section mapping to be created?
 	 */
 	pte1_offset = start & PTE1_OFFSET;
 	if ((end - start) - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) >=
 	    PTE1_SIZE) {
 		/*
 		 * Increase the starting virtual address so that its alignment
 		 * does not preclude the use of section mappings.
 		 */
 		if ((va & PTE1_OFFSET) < pte1_offset)
 			va = pte1_trunc(va) + pte1_offset;
 		else if ((va & PTE1_OFFSET) > pte1_offset)
 			va = pte1_roundup(va) + pte1_offset;
 	}
 	sva = va;
 	while (start < end) {
 		if ((start & PTE1_OFFSET) == 0 && end - start >= PTE1_SIZE) {
 			KASSERT((va & PTE1_OFFSET) == 0,
 			    ("%s: misaligned va %#x", __func__, va));
 			npte1 = PTE1_KERN(start, l1prot, l1attr);
 			pmap_kenter_pte1(va, npte1);
 			va += PTE1_SIZE;
 			start += PTE1_SIZE;
 		} else {
 			pmap_kenter_prot_attr(va, start, l2prot, l2attr);
 			va += PAGE_SIZE;
 			start += PAGE_SIZE;
 		}
 	}
 	tlb_flush_range(sva, va - sva);
 	*virt = va;
 	return (sva);
 }
 
 /*
  *  Make a temporary mapping for a physical address.
  *  This is only intended to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	/* QQQ: 'i' should be less or equal to MAXDUMPPGS. */
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	tlb_flush_local(va);
 	return ((void *)crashdumpmap);
 }
 
 
 /*************************************
  *
  *  TLB & cache maintenance routines.
  *
  *************************************/
 
 /*
  *  We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_tlb_flush(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		tlb_flush(va);
 }
 
 PMAP_INLINE void
 pmap_tlb_flush_range(pmap_t pmap, vm_offset_t sva, vm_size_t size)
 {
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		tlb_flush_range(sva, size);
 }
 
 /*
  *  Abuse the pte2 nodes for unmapped kva to thread a kva freelist through.
  *  Requirements:
  *   - Must deal with pages in order to ensure that none of the PTE2_* bits
  *     are ever set, PTE2_V in particular.
  *   - Assumes we can write to pte2s without pte2_store() atomic ops.
  *   - Assumes nothing will ever test these addresses for 0 to indicate
  *     no mapping instead of correctly checking PTE2_V.
  *   - Assumes a vm_offset_t will fit in a pte2 (true for arm).
  *  Because PTE2_V is never set, there can be no mappings to invalidate.
  */
 static vm_offset_t
 pmap_pte2list_alloc(vm_offset_t *head)
 {
 	pt2_entry_t *pte2p;
 	vm_offset_t va;
 
 	va = *head;
 	if (va == 0)
 		panic("pmap_ptelist_alloc: exhausted ptelist KVA");
 	pte2p = pt2map_entry(va);
 	*head = *pte2p;
 	if (*head & PTE2_V)
 		panic("%s: va with PTE2_V set!", __func__);
 	*pte2p = 0;
 	return (va);
 }
 
 static void
 pmap_pte2list_free(vm_offset_t *head, vm_offset_t va)
 {
 	pt2_entry_t *pte2p;
 
 	if (va & PTE2_V)
 		panic("%s: freeing va with PTE2_V set!", __func__);
 	pte2p = pt2map_entry(va);
 	*pte2p = *head;		/* virtual! PTE2_V is 0 though */
 	*head = va;
 }
 
 static void
 pmap_pte2list_init(vm_offset_t *head, void *base, int npages)
 {
 	int i;
 	vm_offset_t va;
 
 	*head = 0;
 	for (i = npages - 1; i >= 0; i--) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		pmap_pte2list_free(head, va);
 	}
 }
 
 /*****************************************************************************
  *
  *	PMAP third and final stage initialization.
  *
  *  After pmap_init() is called, PMAP subsystem is fully initialized.
  *
  *****************************************************************************/
 
 SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VM/pmap parameters");
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
     "Max number of PV entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
     "Page share factor per proc");
 
 static u_long nkpt2pg = NKPT2PG;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, nkpt2pg, CTLFLAG_RD,
     &nkpt2pg, 0, "Pre-allocated pages for kernel PT2s");
 
 static int sp_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &sp_enabled, 0, "Are large page mappings enabled?");
 
 bool
 pmap_ps_enabled(pmap_t pmap __unused)
 {
 
 	return (sp_enabled != 0);
 }
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, pte1, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "1MB page mapping counters");
 
 static u_long pmap_pte1_demotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pte1_demotions, 0, "1MB page demotions");
 
 static u_long pmap_pte1_mappings;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pte1_mappings, 0, "1MB page mappings");
 
 static u_long pmap_pte1_p_failures;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pte1_p_failures, 0, "1MB page promotion failures");
 
 static u_long pmap_pte1_promotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pte1_promotions, 0, "1MB page promotions");
 
 static u_long pmap_pte1_kern_demotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_demotions, CTLFLAG_RD,
     &pmap_pte1_kern_demotions, 0, "1MB page kernel demotions");
 
 static u_long pmap_pte1_kern_promotions;
 SYSCTL_ULONG(_vm_pmap_pte1, OID_AUTO, kern_promotions, CTLFLAG_RD,
     &pmap_pte1_kern_promotions, 0, "1MB page kernel promotions");
 
 static __inline ttb_entry_t
 pmap_ttb_get(pmap_t pmap)
 {
 
 	return (vtophys(pmap->pm_pt1) | ttb_flags);
 }
 
 /*
  *  Initialize a vm_page's machine-dependent fields.
  *
  *  Variations:
  *  1. Pages for L2 page tables are always not managed. So, pv_list and
  *     pt2_wirecount can share same physical space. However, proper
  *     initialization on a page alloc for page tables and reinitialization
  *     on the page free must be ensured.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	pt2_wirecount_init(m);
 	m->md.pat_mode = VM_MEMATTR_DEFAULT;
 }
 
 /*
  *  Virtualization for faster way how to zero whole page.
  */
 static __inline void
 pagezero(void *page)
 {
 
 	bzero(page, PAGE_SIZE);
 }
 
 /*
  *  Zero L2 page table page.
  *  Use same KVA as in pmap_zero_page().
  */
 static __inline vm_paddr_t
 pmap_pt2pg_zero(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	vm_paddr_t pa;
 	struct pcpu *pc;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * XXX: For now, we map whole page even if it's already zero,
 	 *      to sync it even if the sync is only DSB.
 	 */
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	/*  Even VM_ALLOC_ZERO request is only advisory. */
 	if ((m->flags & PG_ZERO) == 0)
 		pagezero(pc->pc_cmap2_addr);
 	pte2_sync_range((pt2_entry_t *)pc->pc_cmap2_addr, PAGE_SIZE);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 
 	/*
 	 * Unpin the thread before releasing the lock.  Otherwise the thread
 	 * could be rescheduled while still bound to the current CPU, only
 	 * to unpin itself immediately upon resuming execution.
 	 */
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 
 	return (pa);
 }
 
 /*
  *  Init just allocated page as L2 page table(s) holder
  *  and return its physical address.
  */
 static __inline vm_paddr_t
 pmap_pt2pg_init(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	vm_paddr_t pa;
 	pt2_entry_t *pte2p;
 
 	/* Check page attributes. */
 	if (m->md.pat_mode != pt_memattr)
 		pmap_page_set_memattr(m, pt_memattr);
 
 	/* Zero page and init wire counts. */
 	pa = pmap_pt2pg_zero(m);
 	pt2_wirecount_init(m);
 
 	/*
 	 * Map page to PT2MAP address space for given pmap.
 	 * Note that PT2MAP space is shared with all pmaps.
 	 */
 	if (pmap == kernel_pmap)
 		pmap_kenter_pt2tab(va, PTE2_KPT(pa));
 	else {
 		pte2p = pmap_pt2tab_entry(pmap, va);
 		pt2tab_store(pte2p, PTE2_KPT_NG(pa));
 	}
 
 	return (pa);
 }
 
 /*
  *  Initialize the pmap module.
  *  Called by vm_init, to initialize any structures that the pmap
  *  system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_size_t s;
 	pt2_entry_t *pte2p, pte2;
 	u_int i, pte1_idx, pv_npg;
 
 	PDEBUG(1, printf("%s: phys_start = %#x\n", __func__, PHYSADDR));
 
 	/*
 	 * Initialize the vm page array entries for kernel pmap's
 	 * L2 page table pages allocated in advance.
 	 */
 	pte1_idx = pte1_index(KERNBASE - PT2MAP_SIZE);
 	pte2p = kern_pt2tab_entry(KERNBASE - PT2MAP_SIZE);
 	for (i = 0; i < nkpt2pg + NPG_IN_PT2TAB; i++, pte2p++) {
 		vm_paddr_t pa;
 		vm_page_t m;
 
 		pte2 = pte2_load(pte2p);
 		KASSERT(pte2_is_valid(pte2), ("%s: no valid entry", __func__));
 
 		pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pa);
 		KASSERT(m >= vm_page_array &&
 		    m < &vm_page_array[vm_page_array_size],
 		    ("%s: L2 page table page is out of range", __func__));
 
 		m->pindex = pte1_idx;
 		m->phys_addr = pa;
 		pte1_idx += NPT2_IN_PG;
 	}
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled);
 	if (sp_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("%s: can't assign to pagesizes[1]", __func__));
 		pagesizes[1] = PTE1_SIZE;
 	}
 
 	/*
 	 * Calculate the size of the pv head table for sections.
 	 * Handle the possibility that "vm_phys_segs[...].end" is zero.
 	 * Note that the table is only for sections which could be promoted.
 	 */
 	first_managed_pa = pte1_trunc(vm_phys_segs[0].start);
 	pv_npg = (pte1_trunc(vm_phys_segs[vm_phys_nsegs - 1].end - PAGE_SIZE)
 	    - first_managed_pa) / PTE1_SIZE + 1;
 
 	/*
 	 * Allocate memory for the pv head table for sections.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("%s: not enough kvm for pv chunks", __func__);
 	pmap_pte2list_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 }
 
 /*
  *  Add a list of wired pages to the kva
  *  this routine is only used for temporary
  *  kernel mappings that do not need to have
  *  page modification or references recorded.
  *  Note that old mappings are simply written
  *  over.  The page *must* be wired.
  *  Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	u_int anychanged;
 	pt2_entry_t *epte2p, *pte2p, pte2;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	anychanged = 0;
 	pte2p = pt2map_entry(sva);
 	epte2p = pte2p + count;
 	while (pte2p < epte2p) {
 		m = *ma++;
 		pa = VM_PAGE_TO_PHYS(m);
 		pte2 = pte2_load(pte2p);
 		if ((pte2_pa(pte2) != pa) ||
 		    (pte2_attr(pte2) != vm_page_pte2_attr(m))) {
 			anychanged++;
 			pte2_store(pte2p, PTE2_KERN(pa, PTE2_AP_KRW,
 			    vm_page_pte2_attr(m)));
 		}
 		pte2p++;
 	}
 	if (__predict_false(anychanged))
 		tlb_flush_range(sva, count * PAGE_SIZE);
 }
 
 /*
  *  This routine tears out page mappings from the
  *  kernel -- it is meant only for temporary mappings.
  *  Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 /*
  *  Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap ||
 		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace)));
 }
 
 /*
  *  If the given pmap is not the current or kernel pmap, the returned
  *  pte2 must be released by passing it to pmap_pte2_release().
  */
 static pt2_entry_t *
 pmap_pte2(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		panic("%s: attempt to map PTE1", __func__);
 	if (pte1_is_link(pte1)) {
 		/* Are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (pt2map_entry(va));
 		/* Note that L2 page table size is not equal to PAGE_SIZE. */
 		pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 		mtx_lock(&PMAP2mutex);
 		if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) {
 			pte2_store(PMAP2, PTE2_KPT(pt2pg_pa));
 			tlb_flush((vm_offset_t)PADDR2);
 		}
 		return (PADDR2 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 	}
 	return (NULL);
 }
 
 /*
  *  Releases a pte2 that was obtained from pmap_pte2().
  *  Be prepared for the pte2p being NULL.
  */
 static __inline void
 pmap_pte2_release(pt2_entry_t *pte2p)
 {
 
 	if ((pt2_entry_t *)(trunc_page((vm_offset_t)pte2p)) == PADDR2) {
 		mtx_unlock(&PMAP2mutex);
 	}
 }
 
 /*
  *  Super fast pmap_pte2 routine best used when scanning
  *  the pv lists.  This eliminates many coarse-grained
  *  invltlb calls.  Note that many of the pv list
  *  scans are across different pmaps.  It is very wasteful
  *  to do an entire tlb flush for checking a single mapping.
  *
  *  If the given pmap is not the current pmap, pvh_global_lock
  *  must be held and curthread pinned to a CPU.
  */
 static pt2_entry_t *
 pmap_pte2_quick(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		panic("%s: attempt to map PTE1", __func__);
 	if (pte1_is_link(pte1)) {
 		/* Are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (pt2map_entry(va));
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 		KASSERT(curthread->td_pinned > 0,
 		    ("%s: curthread not pinned", __func__));
 		/* Note that L2 page table size is not equal to PAGE_SIZE. */
 		pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 		if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) {
 			pte2_store(PMAP1, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		return (PADDR1 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 	}
 	return (NULL);
 }
 
 /*
  *  Routine: pmap_extract
  *  Function:
  * 	Extract the physical page address associated
  *	with the given map/virtual_address pair.
  */
 vm_paddr_t
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t *pte2p;
 
 	PMAP_LOCK(pmap);
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1))
 		pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 	else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, va);
 		pa = pte2_pa(pte2_load(pte2p)) | (va & PTE2_OFFSET);
 		pmap_pte2_release(pte2p);
 	} else
 		pa = 0;
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *  Routine: pmap_extract_and_hold
  *  Function:
  *	Atomically extract and hold the physical page
  *	with the given pmap and virtual address pair
  *	if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	vm_paddr_t pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2, *pte2p;
 	vm_page_t m;
 
 	m = NULL;
 	PMAP_LOCK(pmap);
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (pte1_is_section(pte1)) {
 		if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) {
 			pa = pte1_pa(pte1) | (va & PTE1_OFFSET);
 			m = PHYS_TO_VM_PAGE(pa);
 			if (!vm_page_wire_mapped(m))
 				m = NULL;
 		}
 	} else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, va);
 		pte2 = pte2_load(pte2p);
 		pmap_pte2_release(pte2p);
 		if (pte2_is_valid(pte2) &&
 		    (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) {
 			pa = pte2_pa(pte2);
 			m = PHYS_TO_VM_PAGE(pa);
 			if (!vm_page_wire_mapped(m))
 				m = NULL;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /*
  *  Grow the number of kernel L2 page table entries, if needed.
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_page_t m;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 
 	PDEBUG(1, printf("%s: addr = %#x\n", __func__, addr));
 	/*
 	 * All the time kernel_vm_end is first KVA for which underlying
 	 * L2 page table is either not allocated or linked from L1 page table
 	 * (not considering sections). Except for two possible cases:
 	 *
 	 *   (1) in the very beginning as long as pmap_growkernel() was
 	 *       not called, it could be first unused KVA (which is not
 	 *       rounded up to PTE1_SIZE),
 	 *
 	 *   (2) when all KVA space is mapped and vm_map_max(kernel_map)
 	 *       address is not rounded up to PTE1_SIZE. (For example,
 	 *       it could be 0xFFFFFFFF.)
 	 */
 	kernel_vm_end = pte1_roundup(kernel_vm_end);
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	addr = roundup2(addr, PTE1_SIZE);
 	if (addr - 1 >= vm_map_max(kernel_map))
 		addr = vm_map_max(kernel_map);
 	while (kernel_vm_end < addr) {
 		pte1 = pte1_load(kern_pte1(kernel_vm_end));
 		if (pte1_is_valid(pte1)) {
 			kernel_vm_end += PTE1_SIZE;
 			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 				kernel_vm_end = vm_map_max(kernel_map);
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * kernel_vm_end_new is used in pmap_pinit() when kernel
 		 * mappings are entered to new pmap all at once to avoid race
 		 * between pmap_kenter_pte1() and kernel_vm_end increase.
 		 * The same aplies to pmap_kenter_pt2tab().
 		 */
 		kernel_vm_end_new = kernel_vm_end + PTE1_SIZE;
 
 		pte2 = pt2tab_load(kern_pt2tab_entry(kernel_vm_end));
 		if (!pte2_is_valid(pte2)) {
 			/*
 			 * Install new PT2s page into kernel PT2TAB.
 			 */
 			m = vm_page_alloc(NULL,
 			    pte1_index(kernel_vm_end) & ~PT2PG_MASK,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (m == NULL)
 				panic("%s: no memory to grow kernel", __func__);
 			/*
 			 * QQQ: To link all new L2 page tables from L1 page
 			 *      table now and so pmap_kenter_pte1() them
 			 *      at once together with pmap_kenter_pt2tab()
 			 *      could be nice speed up. However,
 			 *      pmap_growkernel() does not happen so often...
 			 * QQQ: The other TTBR is another option.
 			 */
 			pt2pg_pa = pmap_pt2pg_init(kernel_pmap, kernel_vm_end,
 			    m);
 		} else
 			pt2pg_pa = pte2_pa(pte2);
 
 		pt2_pa = page_pt2pa(pt2pg_pa, pte1_index(kernel_vm_end));
 		pmap_kenter_pte1(kernel_vm_end, PTE1_LINK(pt2_pa));
 
 		kernel_vm_end = kernel_vm_end_new;
 		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 			kernel_vm_end = vm_map_max(kernel_map);
 			break;
 		}
 	}
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = vm_max_kernel_address - KERNBASE;
 
 	return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size,
     CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 0, 0, kvm_size, "IU",
     "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = vm_max_kernel_address - kernel_vm_end;
 
 	return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free,
     CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 0, 0, kvm_free, "IU",
     "Amount of KVM free");
 
 /***********************************************
  *
  *  Pmap allocation/deallocation routines.
  *
  ***********************************************/
 
 /*
  *  Initialize the pmap for the swapper process.
  */
 void
 pmap_pinit0(pmap_t pmap)
 {
 	PDEBUG(1, printf("%s: pmap = %p\n", __func__, pmap));
 
 	PMAP_LOCK_INIT(pmap);
 
 	/*
 	 * Kernel page table directory and pmap stuff around is already
 	 * initialized, we are using it right now and here. So, finish
 	 * only PMAP structures initialization for process0 ...
 	 *
 	 * Since the L1 page table and PT2TAB is shared with the kernel pmap,
 	 * which is already included in the list "allpmaps", this pmap does
 	 * not need to be inserted into that list.
 	 */
 	pmap->pm_pt1 = kern_pt1;
 	pmap->pm_pt2tab = kern_pt2tab;
 	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	CPU_SET(0, &pmap->pm_active);
 }
 
 static __inline void
 pte1_copy_nosync(pt1_entry_t *spte1p, pt1_entry_t *dpte1p, vm_offset_t sva,
     vm_offset_t eva)
 {
 	u_int idx, count;
 
 	idx = pte1_index(sva);
 	count = (pte1_index(eva) - idx + 1) * sizeof(pt1_entry_t);
 	bcopy(spte1p + idx, dpte1p + idx, count);
 }
 
 static __inline void
 pt2tab_copy_nosync(pt2_entry_t *spte2p, pt2_entry_t *dpte2p, vm_offset_t sva,
     vm_offset_t eva)
 {
 	u_int idx, count;
 
 	idx = pt2tab_index(sva);
 	count = (pt2tab_index(eva) - idx + 1) * sizeof(pt2_entry_t);
 	bcopy(spte2p + idx, dpte2p + idx, count);
 }
 
 /*
  *  Initialize a preallocated and zeroed pmap structure,
  *  such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	vm_paddr_t pa, pt2tab_pa;
 	u_int i;
 
 	PDEBUG(6, printf("%s: pmap = %p, pm_pt1 = %p\n", __func__, pmap,
 	    pmap->pm_pt1));
 
 	/*
 	 * No need to allocate L2 page table space yet but we do need
 	 * a valid L1 page table and PT2TAB table.
 	 *
 	 * Install shared kernel mappings to these tables. It's a little
 	 * tricky as some parts of KVA are reserved for vectors, devices,
 	 * and whatever else. These parts are supposed to be above
 	 * vm_max_kernel_address. Thus two regions should be installed:
 	 *
 	 *   (1) <KERNBASE, kernel_vm_end),
 	 *   (2) <vm_max_kernel_address, 0xFFFFFFFF>.
 	 *
 	 * QQQ: The second region should be stable enough to be installed
 	 *      only once in time when the tables are allocated.
 	 * QQQ: Maybe copy of both regions at once could be faster ...
 	 * QQQ: Maybe the other TTBR is an option.
 	 *
 	 * Finally, install own PT2TAB table to these tables.
 	 */
 
 	if (pmap->pm_pt1 == NULL) {
 		pmap->pm_pt1 = (pt1_entry_t *)kmem_alloc_contig(NB_IN_PT1,
 		    M_NOWAIT | M_ZERO, 0, -1UL, NB_IN_PT1, 0, pt_memattr);
 		if (pmap->pm_pt1 == NULL)
 			return (0);
 	}
 	if (pmap->pm_pt2tab == NULL) {
 		/*
 		 * QQQ: (1) PT2TAB must be contiguous. If PT2TAB is one page
 		 *      only, what should be the only size for 32 bit systems,
 		 *      then we could allocate it with vm_page_alloc() and all
 		 *      the stuff needed as other L2 page table pages.
 		 *      (2) Note that a process PT2TAB is special L2 page table
 		 *      page. Its mapping in kernel_arena is permanent and can
 		 *      be used no matter which process is current. Its mapping
 		 *      in PT2MAP can be used only for current process.
 		 */
 		pmap->pm_pt2tab = (pt2_entry_t *)kmem_alloc_attr(NB_IN_PT2TAB,
 		    M_NOWAIT | M_ZERO, 0, -1UL, pt_memattr);
 		if (pmap->pm_pt2tab == NULL) {
 			/*
 			 * QQQ: As struct pmap is allocated from UMA with
 			 *      UMA_ZONE_NOFREE flag, it's important to leave
 			 *      no allocation in pmap if initialization failed.
 			 */
 			kmem_free((vm_offset_t)pmap->pm_pt1, NB_IN_PT1);
 			pmap->pm_pt1 = NULL;
 			return (0);
 		}
 		/*
 		 * QQQ: Each L2 page table page vm_page_t has pindex set to
 		 *      pte1 index of virtual address mapped by this page.
 		 *      It's not valid for non kernel PT2TABs themselves.
 		 *      The pindex of these pages can not be altered because
 		 *      of the way how they are allocated now. However, it
 		 *      should not be a problem.
 		 */
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	/*
 	 * To avoid race with pmap_kenter_pte1() and pmap_kenter_pt2tab(),
 	 * kernel_vm_end_new is used here instead of kernel_vm_end.
 	 */
 	pte1_copy_nosync(kern_pt1, pmap->pm_pt1, KERNBASE,
 	    kernel_vm_end_new - 1);
 	pte1_copy_nosync(kern_pt1, pmap->pm_pt1, vm_max_kernel_address,
 	    0xFFFFFFFF);
 	pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, KERNBASE,
 	    kernel_vm_end_new - 1);
 	pt2tab_copy_nosync(kern_pt2tab, pmap->pm_pt2tab, vm_max_kernel_address,
 	    0xFFFFFFFF);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Store PT2MAP PT2 pages (a.k.a. PT2TAB) in PT2TAB itself.
 	 * I.e. self reference mapping.  The PT2TAB is private, however mapped
 	 * into shared PT2MAP space, so the mapping should be not global.
 	 */
 	pt2tab_pa = vtophys(pmap->pm_pt2tab);
 	pte2p = pmap_pt2tab_entry(pmap, (vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPG_IN_PT2TAB; i++, pa += PTE2_SIZE) {
 		pt2tab_store(pte2p++, PTE2_KPT_NG(pa));
 	}
 
 	/* Insert PT2MAP PT2s into pmap PT1. */
 	pte1p = pmap_pte1(pmap, (vm_offset_t)PT2MAP);
 	for (pa = pt2tab_pa, i = 0; i < NPT2_IN_PT2TAB; i++, pa += NB_IN_PT2) {
 		pte1_store(pte1p++, PTE1_LINK(pa));
 	}
 
 	/*
 	 * Now synchronize new mapping which was made above.
 	 */
 	pte1_sync_range(pmap->pm_pt1, NB_IN_PT1);
 	pte2_sync_range(pmap->pm_pt2tab, NB_IN_PT2TAB);
 
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 #ifdef INVARIANTS
 static boolean_t
 pt2tab_user_is_empty(pt2_entry_t *tab)
 {
 	u_int i, end;
 
 	end = pt2tab_index(VM_MAXUSER_ADDRESS);
 	for (i = 0; i < end; i++)
 		if (tab[i] != 0) return (FALSE);
 	return (TRUE);
 }
 #endif
 /*
  *  Release any resources held by the given physical map.
  *  Called when a pmap initialized by pmap_pinit is being released.
  *  Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 #ifdef INVARIANTS
 	vm_offset_t start, end;
 #endif
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("%s: pmap resident count %ld != 0", __func__,
 	    pmap->pm_stats.resident_count));
 	KASSERT(pt2tab_user_is_empty(pmap->pm_pt2tab),
 	    ("%s: has allocated user PT2(s)", __func__));
 	KASSERT(CPU_EMPTY(&pmap->pm_active),
 	    ("%s: pmap %p is active on some CPU(s)", __func__, pmap));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 #ifdef INVARIANTS
 	start = pte1_index(KERNBASE) * sizeof(pt1_entry_t);
 	end = (pte1_index(0xFFFFFFFF) + 1) * sizeof(pt1_entry_t);
 	bzero((char *)pmap->pm_pt1 + start, end - start);
 
 	start = pt2tab_index(KERNBASE) * sizeof(pt2_entry_t);
 	end = (pt2tab_index(0xFFFFFFFF) + 1) * sizeof(pt2_entry_t);
 	bzero((char *)pmap->pm_pt2tab + start, end - start);
 #endif
 	/*
 	 * We are leaving PT1 and PT2TAB allocated on released pmap,
 	 * so hopefully UMA vmspace_zone will always be inited with
 	 * UMA_ZONE_NOFREE flag.
 	 */
 }
 
 /*********************************************************
  *
  *  L2 table pages and their pages management routines.
  *
  *********************************************************/
 
 /*
  *  Virtual interface for L2 page table wire counting.
  *
  *  Each L2 page table in a page has own counter which counts a number of
  *  valid mappings in a table. Global page counter counts mappings in all
  *  tables in a page plus a single itself mapping in PT2TAB.
  *
  *  During a promotion we leave the associated L2 page table counter
  *  untouched, so the table (strictly speaking a page which holds it)
  *  is never freed if promoted.
  *
  *  If a page m->ref_count == 1 then no valid mappings exist in any L2 page
  *  table in the page and the page itself is only mapped in PT2TAB.
  */
 
 static __inline void
 pt2_wirecount_init(vm_page_t m)
 {
 	u_int i;
 
 	/*
 	 * Note: A page m is allocated with VM_ALLOC_WIRED flag and
 	 *       m->ref_count should be already set correctly.
 	 *       So, there is no need to set it again herein.
 	 */
 	for (i = 0; i < NPT2_IN_PG; i++)
 		m->md.pt2_wirecount[i] = 0;
 }
 
 static __inline void
 pt2_wirecount_inc(vm_page_t m, uint32_t pte1_idx)
 {
 
 	/*
 	 * Note: A just modificated pte2 (i.e. already allocated)
 	 *       is acquiring one extra reference which must be
 	 *       explicitly cleared. It influences the KASSERTs herein.
 	 *       All L2 page tables in a page always belong to the same
 	 *       pmap, so we allow only one extra reference for the page.
 	 */
 	KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] < (NPTE2_IN_PT2 + 1),
 	    ("%s: PT2 is overflowing ...", __func__));
 	KASSERT(m->ref_count <= (NPTE2_IN_PG + 1),
 	    ("%s: PT2PG is overflowing ...", __func__));
 
 	m->ref_count++;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]++;
 }
 
 static __inline void
 pt2_wirecount_dec(vm_page_t m, uint32_t pte1_idx)
 {
 
 	KASSERT(m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] != 0,
 	    ("%s: PT2 is underflowing ...", __func__));
 	KASSERT(m->ref_count > 1,
 	    ("%s: PT2PG is underflowing ...", __func__));
 
 	m->ref_count--;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]--;
 }
 
 static __inline void
 pt2_wirecount_set(vm_page_t m, uint32_t pte1_idx, uint16_t count)
 {
 
 	KASSERT(count <= NPTE2_IN_PT2,
 	    ("%s: invalid count %u", __func__, count));
 	KASSERT(m->ref_count >  m->md.pt2_wirecount[pte1_idx & PT2PG_MASK],
 	    ("%s: PT2PG corrupting (%u, %u) ...", __func__, m->ref_count,
 	    m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]));
 
 	m->ref_count -= m->md.pt2_wirecount[pte1_idx & PT2PG_MASK];
 	m->ref_count += count;
 	m->md.pt2_wirecount[pte1_idx & PT2PG_MASK] = count;
 
 	KASSERT(m->ref_count <= (NPTE2_IN_PG + 1),
 	    ("%s: PT2PG is overflowed (%u) ...", __func__, m->ref_count));
 }
 
 static __inline uint32_t
 pt2_wirecount_get(vm_page_t m, uint32_t pte1_idx)
 {
 
 	return (m->md.pt2_wirecount[pte1_idx & PT2PG_MASK]);
 }
 
 static __inline boolean_t
 pt2_is_empty(vm_page_t m, vm_offset_t va)
 {
 
 	return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] == 0);
 }
 
 static __inline boolean_t
 pt2_is_full(vm_page_t m, vm_offset_t va)
 {
 
 	return (m->md.pt2_wirecount[pte1_index(va) & PT2PG_MASK] ==
 	    NPTE2_IN_PT2);
 }
 
 static __inline boolean_t
 pt2pg_is_empty(vm_page_t m)
 {
 
 	return (m->ref_count == 1);
 }
 
 /*
  *  This routine is called if the L2 page table
  *  is not mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	uint32_t pte1_idx;
 	pt1_entry_t *pte1p;
 	pt2_entry_t pte2;
 	vm_page_t  m;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 
 	pte1_idx = pte1_index(va);
 	pte1p = pmap->pm_pt1 + pte1_idx;
 
 	KASSERT(pte1_load(pte1p) == 0,
 	    ("%s: pm_pt1[%#x] is not zero: %#x", __func__, pte1_idx,
 	    pte1_load(pte1p)));
 
 	pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, va));
 	if (!pte2_is_valid(pte2)) {
 		/*
 		 * Install new PT2s page into pmap PT2TAB.
 		 */
 		m = vm_page_alloc(NULL, pte1_idx & ~PT2PG_MASK,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 		if (m == NULL) {
 			if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(&pvh_global_lock);
 				vm_wait(NULL);
 				rw_wlock(&pvh_global_lock);
 				PMAP_LOCK(pmap);
 			}
 
 			/*
 			 * Indicate the need to retry.  While waiting,
 			 * the L2 page table page may have been allocated.
 			 */
 			return (NULL);
 		}
 		pmap->pm_stats.resident_count++;
 		pt2pg_pa = pmap_pt2pg_init(pmap, va, m);
 	} else {
 		pt2pg_pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pt2pg_pa);
 	}
 
 	pt2_wirecount_inc(m, pte1_idx);
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx);
 	pte1_store(pte1p, PTE1_LINK(pt2_pa));
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpte2(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	u_int pte1_idx;
 	pt1_entry_t *pte1p, pte1;
 	vm_page_t m;
 
 	pte1_idx = pte1_index(va);
 retry:
 	pte1p = pmap->pm_pt1 + pte1_idx;
 	pte1 = pte1_load(pte1p);
 
 	/*
 	 * This supports switching from a 1MB page to a
 	 * normal 4K page.
 	 */
 	if (pte1_is_section(pte1)) {
 		(void)pmap_demote_pte1(pmap, pte1p, va);
 		/*
 		 * Reload pte1 after demotion.
 		 *
 		 * Note: Demotion can even fail as either PT2 is not find for
 		 *       the virtual address or PT2PG can not be allocated.
 		 */
 		pte1 = pte1_load(pte1p);
 	}
 
 	/*
 	 * If the L2 page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (pte1_is_link(pte1)) {
 		m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 		pt2_wirecount_inc(m, pte1_idx);
 	} else  {
 		/*
 		 * Here if the PT2 isn't mapped, or if it has
 		 * been deallocated.
 		 */
 		m = _pmap_allocpte2(pmap, va, flags);
 		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
 			goto retry;
 	}
 
 	return (m);
 }
 
 /*
  *  Schedule the specified unused L2 page table page to be freed. Specifically,
  *  add the page to the specified list of pages that will be released to the
  *  physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free)
 {
 
 	/*
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 #ifdef PMAP_DEBUG
 	pmap_zero_page_check(m);
 #endif
 	m->flags |= PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
  *  Unwire L2 page tables page.
  */
 static void
 pmap_unwire_pt2pg(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pt1_entry_t *pte1p, opte1 __unused;
 	pt2_entry_t *pte2p;
 	uint32_t i;
 
 	KASSERT(pt2pg_is_empty(m),
 	    ("%s: pmap %p PT2PG %p wired", __func__, pmap, m));
 
 	/*
 	 * Unmap all L2 page tables in the page from L1 page table.
 	 *
 	 * QQQ: Individual L2 page tables (except the last one) can be unmapped
 	 * earlier. However, we are doing that this way.
 	 */
 	KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK),
 	    ("%s: pmap %p va %#x PT2PG %p bad index", __func__, pmap, va, m));
 	pte1p = pmap->pm_pt1 + m->pindex;
 	for (i = 0; i < NPT2_IN_PG; i++, pte1p++) {
 		KASSERT(m->md.pt2_wirecount[i] == 0,
 		    ("%s: pmap %p PT2 %u (PG %p) wired", __func__, pmap, i, m));
 		opte1 = pte1_load(pte1p);
 		if (pte1_is_link(opte1)) {
 			pte1_clear(pte1p);
 			/*
 			 * Flush intermediate TLB cache.
 			 */
 			pmap_tlb_flush(pmap, (m->pindex + i) << PTE1_SHIFT);
 		}
 #ifdef INVARIANTS
 		else
 			KASSERT((opte1 == 0) || pte1_is_section(opte1),
 			    ("%s: pmap %p va %#x bad pte1 %x at %u", __func__,
 			    pmap, va, opte1, i));
 #endif
 	}
 
 	/*
 	 * Unmap the page from PT2TAB.
 	 */
 	pte2p = pmap_pt2tab_entry(pmap, va);
 	(void)pt2tab_load_clear(pte2p);
 	pmap_tlb_flush(pmap, pt2map_pt2pg(va));
 
 	m->ref_count = 0;
 	pmap->pm_stats.resident_count--;
 
 	/*
 	 * This barrier is so that the ordinary store unmapping
 	 * the L2 page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	wmb();
 	vm_wire_sub(1);
 }
 
 /*
  *  Decrements a L2 page table page's wire count, which is used to record the
  *  number of valid page table entries within the page.  If the wire count
  *  drops to zero, then the page table page is unmapped.  Returns TRUE if the
  *  page table page was unmapped and FALSE otherwise.
  */
 static __inline boolean_t
 pmap_unwire_pt2(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 	pt2_wirecount_dec(m, pte1_index(va));
 	if (pt2pg_is_empty(m)) {
 		/*
 		 * QQQ: Wire count is zero, so whole page should be zero and
 		 *      we can set PG_ZERO flag to it.
 		 *      Note that when promotion is enabled, it takes some
 		 *      more efforts. See pmap_unwire_pt2_all() below.
 		 */
 		pmap_unwire_pt2pg(pmap, va, m);
 		pmap_add_delayed_free_list(m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  *  Drop a L2 page table page's wire count at once, which is used to record
  *  the number of valid L2 page table entries within the page. If the wire
  *  count drops to zero, then the L2 page table page is unmapped.
  */
 static __inline void
 pmap_unwire_pt2_all(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free)
 {
 	u_int pte1_idx = pte1_index(va);
 
 	KASSERT(m->pindex == (pte1_idx & ~PT2PG_MASK),
 		("%s: PT2 page's pindex is wrong", __func__));
 	KASSERT(m->ref_count > pt2_wirecount_get(m, pte1_idx),
 	    ("%s: bad pt2 wire count %u > %u", __func__, m->ref_count,
 	    pt2_wirecount_get(m, pte1_idx)));
 
 	/*
 	 * It's possible that the L2 page table was never used.
 	 * It happened in case that a section was created without promotion.
 	 */
 	if (pt2_is_full(m, va)) {
 		pt2_wirecount_set(m, pte1_idx, 0);
 
 		/*
 		 * QQQ: We clear L2 page table now, so when L2 page table page
 		 *      is going to be freed, we can set it PG_ZERO flag ...
 		 *      This function is called only on section mappings, so
 		 *      hopefully it's not to big overload.
 		 *
 		 * XXX: If pmap is current, existing PT2MAP mapping could be
 		 *      used for zeroing.
 		 */
 		pmap_zero_page_area(m, page_pt2off(pte1_idx), NB_IN_PT2);
 	}
 #ifdef INVARIANTS
 	else
 		KASSERT(pt2_is_empty(m, va), ("%s: PT2 is not empty (%u)",
 		    __func__, pt2_wirecount_get(m, pte1_idx)));
 #endif
 	if (pt2pg_is_empty(m)) {
 		pmap_unwire_pt2pg(pmap, va, m);
 		pmap_add_delayed_free_list(m, free);
 	}
 }
 
 /*
  *  After removing a L2 page table entry, this routine is used to
  *  conditionally free the page, and manage the hold/wire counts.
  */
 static boolean_t
 pmap_unuse_pt2(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt1_entry_t pte1;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (FALSE);
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	mpte = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 	return (pmap_unwire_pt2(pmap, va, mpte, free));
 }
 
 /*************************************
  *
  *  Page management routines.
  *
  *************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 11);
 CTASSERT(_NPCPV == 336);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
 
 static const uint32_t pc_freemask[_NPCM] = {
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE10
 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
     "Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
     "Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
     "Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail,
     0, "Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
     "Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs,
     0, "Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
     "Current number of spare pv entries");
 #endif
 
 /*
  *  Is given page managed?
  */
 static __inline bool
 is_managed(vm_paddr_t pa)
 {
 	vm_page_t m;
 
 	m = PHYS_TO_VM_PAGE(pa);
 	if (m == NULL)
 		return (false);
 	return ((m->oflags & VPO_UNMANAGED) == 0);
 }
 
 static __inline bool
 pte1_is_managed(pt1_entry_t pte1)
 {
 
 	return (is_managed(pte1_pa(pte1)));
 }
 
 static __inline bool
 pte2_is_managed(pt2_entry_t pte2)
 {
 
 	return (is_managed(pte2_pa(pte2)));
 }
 
 /*
  *  We are in a serious low memory condition.  Resort to
  *  drastic measures to free some pages so we can allocate
  *  another pv entry chunk.
  */
 static vm_page_t
 pmap_pv_reclaim(pmap_t locked_pmap)
 {
 	struct pch newtail;
 	struct pv_chunk *pc;
 	struct md_page *pvh;
 	pt1_entry_t *pte1p;
 	pmap_t pmap;
 	pt2_entry_t *pte2p, tpte2;
 	pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m, m_pc;
 	struct spglist free;
 	uint32_t inuse;
 	int bit, field, freed;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	pmap = NULL;
 	m_pc = NULL;
 	SLIST_INIT(&free);
 	TAILQ_INIT(&newtail);
 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
 	    SLIST_EMPTY(&free))) {
 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 		if (pmap != pc->pc_pmap) {
 			if (pmap != NULL) {
 				if (pmap != locked_pmap)
 					PMAP_UNLOCK(pmap);
 			}
 			pmap = pc->pc_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
 				pmap = NULL;
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 				continue;
 			}
 		}
 
 		/*
 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
 		 */
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 			    inuse != 0; inuse &= ~(1UL << bit)) {
 				bit = ffs(inuse) - 1;
 				pv = &pc->pc_pventry[field * 32 + bit];
 				va = pv->pv_va;
 				pte1p = pmap_pte1(pmap, va);
 				if (pte1_is_section(pte1_load(pte1p)))
 					continue;
 				pte2p = pmap_pte2(pmap, va);
 				tpte2 = pte2_load(pte2p);
 				if ((tpte2 & PTE2_W) == 0)
 					tpte2 = pte2_load_clear(pte2p);
 				pmap_pte2_release(pte2p);
 				if ((tpte2 & PTE2_W) != 0)
 					continue;
 				KASSERT(tpte2 != 0,
 				    ("pmap_pv_reclaim: pmap %p va %#x zero pte",
 				    pmap, va));
 				pmap_tlb_flush(pmap, va);
 				m = PHYS_TO_VM_PAGE(pte2_pa(tpte2));
 				if (pte2_is_dirty(tpte2))
 					vm_page_dirty(m);
 				if ((tpte2 & PTE2_A) != 0)
 					vm_page_aflag_set(m, PGA_REFERENCED);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						vm_page_aflag_clear(m,
 						    PGA_WRITEABLE);
 					}
 				}
 				pc->pc_map[field] |= 1UL << bit;
 				pmap_unuse_pt2(pmap, va, &free);
 				freed++;
 			}
 		}
 		if (freed == 0) {
 			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 			continue;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap->pm_stats.resident_count -= freed;
 		PV_STAT(pv_entry_frees += freed);
 		PV_STAT(pv_entry_spare += freed);
 		pv_entry_count -= freed;
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		for (field = 0; field < _NPCM; field++)
 			if (pc->pc_map[field] != pc_freemask[field]) {
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 
 				/*
 				 * One freed pv entry in locked_pmap is
 				 * sufficient.
 				 */
 				if (pmap == locked_pmap)
 					goto out;
 				break;
 			}
 		if (field == _NPCM) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			/* Entire chunk is free; return it. */
 			m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 			pmap_qremove((vm_offset_t)pc, 1);
 			pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc);
 			break;
 		}
 	}
 out:
 	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
 	if (pmap != NULL) {
 		if (pmap != locked_pmap)
 			PMAP_UNLOCK(pmap);
 	}
 	if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) {
 		m_pc = SLIST_FIRST(&free);
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->ref_count = 1;
 		vm_wire_add(1);
 	}
 	vm_page_free_pages_toq(&free, false);
 	return (m_pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire_noq(m);
 	vm_page_free(m);
 	pmap_pte2list_free(&pv_vafree, (vm_offset_t)pc);
 }
 
 /*
  *  Free the pv_entry back to the free list.
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 32;
 	bit = idx % 32;
 	pc->pc_map[field] |= 1ul << bit;
 	for (idx = 0; idx < _NPCM; idx++)
 		if (pc->pc_map[idx] != pc_freemask[idx]) {
 			/*
 			 * 98% of the time, pc is already at the head of the
 			 * list.  If it isn't already, move it to the head.
 			 */
 			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
 			    pc)) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			return;
 		}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 /*
  *  Get a new pv_entry, allocating a block from the system
  *  when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, boolean_t try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entries tunable.\n");
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffs(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 32 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			for (field = 0; field < _NPCM; field++)
 				if (pc->pc_map[field] != 0) {
 					PV_STAT(pv_entry_spare--);
 					return (pv);	/* not full, return */
 				}
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/*
 	 * Access to the pte2list "pv_vafree" is synchronized by the pvh
 	 * global lock.  If "pv_vafree" is currently non-empty, it will
 	 * remain non-empty until pmap_pte2list_alloc() completes.
 	 */
 	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = pmap_pv_reclaim(pmap);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	pc = (struct pv_chunk *)pmap_pte2list_alloc(&pv_vafree);
 	pmap_qenter((vm_offset_t)pc, &m, 1);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
 	for (field = 1; field < _NPCM; field++)
 		pc->pc_map[field] = pc_freemask[field];
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 /*
  *  Create a pv entry for page at pa for
  *  (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 }
 
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			break;
 		}
 	}
 	return (pv);
 }
 
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	struct md_page *pvh;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_aflag_clear(m, PGA_WRITEABLE);
 	}
 }
 
 static void
 pmap_pv_demote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PTE1_OFFSET) == 0,
 	    ("pmap_pv_demote_pte1: pa is not 1mpage aligned"));
 
 	/*
 	 * Transfer the 1mpage's pv entry for this mapping to the first
 	 * page's pv list.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = pte1_trunc(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pte1: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	/* Instantiate the remaining NPTE2_IN_PT2 - 1 pv entries. */
 	va_last = va + PTE1_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("pmap_pv_demote_pte1: page %p is not managed", m));
 		va += PAGE_SIZE;
 		pmap_insert_entry(pmap, va, m);
 	} while (va < va_last);
 }
 
 #if VM_NRESERVLEVEL > 0
 static void
 pmap_pv_promote_pte1(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PTE1_OFFSET) == 0,
 	    ("pmap_pv_promote_pte1: pa is not 1mpage aligned"));
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the
 	 * 1mpage's pv list.  Aside from avoiding the cost of a call
 	 * to get_pv_entry(), a transfer avoids the possibility that
 	 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim()
 	 * removes one of the mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = pte1_trunc(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pte1: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	/* Free the remaining NPTE2_IN_PT2 - 1 pv entries. */
 	va_last = va + PTE1_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 #endif
 
 /*
  *  Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water &&
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  *  Create the pv entries for each of the pages within a section.
  */
 static bool
 pmap_pv_insert_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	bool noreclaim;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	noreclaim = (flags & PMAP_ENTER_NORECLAIM) != 0;
 	if ((noreclaim && pv_entry_count >= pv_entry_high_water) ||
 	    (pv = get_pv_entry(pmap, noreclaim)) == NULL)
 		return (false);
 	pv->pv_va = va;
 	pvh = pa_to_pvh(pte1_pa(pte1));
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	return (true);
 }
 
 static inline void
 pmap_tlb_flush_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t npte1)
 {
 
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1))
 		pmap_tlb_flush_range(pmap, pte1_trunc(va), PTE1_SIZE);
 	else
 		pmap_tlb_flush(pmap, pte1_trunc(va));
 }
 
 /*
  *  Update kernel pte1 on all pmaps.
  *
  *  The following function is called only on one cpu with disabled interrupts.
  *  In SMP case, smp_rendezvous_cpus() is used to stop other cpus. This way
  *  nobody can invoke explicit hardware table walk during the update of pte1.
  *  Unsolicited hardware table walk can still happen, invoked by speculative
  *  data or instruction prefetch or even by speculative hardware table walk.
  *
  *  The break-before-make approach should be implemented here. However, it's
  *  not so easy to do that for kernel mappings as it would be unhappy to unmap
  *  itself unexpectedly but voluntarily.
  */
 static void
 pmap_update_pte1_kernel(vm_offset_t va, pt1_entry_t npte1)
 {
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 
 	/*
 	 * Get current pmap. Interrupts should be disabled here
 	 * so PCPU_GET() is done atomically.
 	 */
 	pmap = PCPU_GET(curpmap);
 	if (pmap == NULL)
 		pmap = kernel_pmap;
 
 	/*
 	 * (1) Change pte1 on current pmap.
 	 * (2) Flush all obsolete TLB entries on current CPU.
 	 * (3) Change pte1 on all pmaps.
 	 * (4) Flush all obsolete TLB entries on all CPUs in SMP case.
 	 */
 
 	pte1p = pmap_pte1(pmap, va);
 	pte1_store(pte1p, npte1);
 
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1)) {
 		pmap_pte1_kern_promotions++;
 		tlb_flush_range_local(pte1_trunc(va), PTE1_SIZE);
 	} else {
 		pmap_pte1_kern_demotions++;
 		tlb_flush_local(pte1_trunc(va));
 	}
 
 	/*
 	 * In SMP case, this function is called when all cpus are at smp
 	 * rendezvous, so there is no need to use 'allpmaps_lock' lock here.
 	 * In UP case, the function is called with this lock locked.
 	 */
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		pte1p = pmap_pte1(pmap, va);
 		pte1_store(pte1p, npte1);
 	}
 
 #ifdef SMP
 	/* Kill all the small mappings or the big one only. */
 	if (pte1_is_section(npte1))
 		tlb_flush_range(pte1_trunc(va), PTE1_SIZE);
 	else
 		tlb_flush(pte1_trunc(va));
 #endif
 }
 
 #ifdef SMP
 struct pte1_action {
 	vm_offset_t va;
 	pt1_entry_t npte1;
 	u_int update;		/* CPU that updates the PTE1 */
 };
 
 static void
 pmap_update_pte1_action(void *arg)
 {
 	struct pte1_action *act = arg;
 
 	if (act->update == PCPU_GET(cpuid))
 		pmap_update_pte1_kernel(act->va, act->npte1);
 }
 
 /*
  *  Change pte1 on current pmap.
  *  Note that kernel pte1 must be changed on all pmaps.
  *
  *  According to the architecture reference manual published by ARM,
  *  the behaviour is UNPREDICTABLE when two or more TLB entries map the same VA.
  *  According to this manual, UNPREDICTABLE behaviours must never happen in
  *  a viable system. In contrast, on x86 processors, it is not specified which
  *  TLB entry mapping the virtual address will be used, but the MMU doesn't
  *  generate a bogus translation the way it does on Cortex-A8 rev 2 (Beaglebone
  *  Black).
  *
  *  It's a problem when either promotion or demotion is being done. The pte1
  *  update and appropriate TLB flush must be done atomically in general.
  */
 static void
 pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va,
     pt1_entry_t npte1)
 {
 
 	if (pmap == kernel_pmap) {
 		struct pte1_action act;
 
 		sched_pin();
 		act.va = va;
 		act.npte1 = npte1;
 		act.update = PCPU_GET(cpuid);
 		smp_rendezvous_cpus(all_cpus, smp_no_rendezvous_barrier,
 		    pmap_update_pte1_action, NULL, &act);
 		sched_unpin();
 	} else {
 		register_t cspr;
 
 		/*
 		 * Use break-before-make approach for changing userland
 		 * mappings. It can cause L1 translation aborts on other
 		 * cores in SMP case. So, special treatment is implemented
 		 * in pmap_fault(). To reduce the likelihood that another core
 		 * will be affected by the broken mapping, disable interrupts
 		 * until the mapping change is completed.
 		 */
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		pte1_clear(pte1p);
 		pmap_tlb_flush_pte1(pmap, va, npte1);
 		pte1_store(pte1p, npte1);
 		restore_interrupts(cspr);
 	}
 }
 #else
 static void
 pmap_change_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va,
     pt1_entry_t npte1)
 {
 
 	if (pmap == kernel_pmap) {
 		mtx_lock_spin(&allpmaps_lock);
 		pmap_update_pte1_kernel(va, npte1);
 		mtx_unlock_spin(&allpmaps_lock);
 	} else {
 		register_t cspr;
 
 		/*
 		 * Use break-before-make approach for changing userland
 		 * mappings. It's absolutely safe in UP case when interrupts
 		 * are disabled.
 		 */
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		pte1_clear(pte1p);
 		pmap_tlb_flush_pte1(pmap, va, npte1);
 		pte1_store(pte1p, npte1);
 		restore_interrupts(cspr);
 	}
 }
 #endif
 
 #if VM_NRESERVLEVEL > 0
 /*
  *  Tries to promote the NPTE2_IN_PT2, contiguous 4KB page mappings that are
  *  within a single page table page (PT2) to a single 1MB page mapping.
  *  For promotion to occur, two conditions must be met: (1) the 4KB page
  *  mappings must map aligned, contiguous physical memory and (2) the 4KB page
  *  mappings must have identical characteristics.
  *
  *  Managed (PG_MANAGED) mappings within the kernel address space are not
  *  promoted.  The reason is that kernel PTE1s are replicated in each pmap but
  *  pmap_remove_write(), pmap_clear_modify(), and pmap_clear_reference() only
  *  read the PTE1 from the kernel pmap.
  */
 static void
 pmap_promote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	pt1_entry_t npte1;
 	pt2_entry_t *fpte2p, fpte2, fpte2_fav;
 	pt2_entry_t *pte2p, pte2;
 	vm_offset_t pteva __unused;
 	vm_page_t m __unused;
 
 	PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__,
 	    pmap, va, pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE2 in the specified PT2. Abort if this PTE2 is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 1MB page.
 	 */
 	fpte2p = pmap_pte2_quick(pmap, pte1_trunc(va));
 	fpte2 = pte2_load(fpte2p);
 	if ((fpte2 & ((PTE2_FRAME & PTE1_OFFSET) | PTE2_A | PTE2_V)) !=
 	    (PTE2_A | PTE2_V)) {
 		pmap_pte1_p_failures++;
 		CTR3(KTR_PMAP, "%s: failure(1) for va %#x in pmap %p",
 		    __func__, va, pmap);
 		return;
 	}
 	if (pte2_is_managed(fpte2) && pmap == kernel_pmap) {
 		pmap_pte1_p_failures++;
 		CTR3(KTR_PMAP, "%s: failure(2) for va %#x in pmap %p",
 		    __func__, va, pmap);
 		return;
 	}
 	if ((fpte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) {
 		/*
 		 * When page is not modified, PTE2_RO can be set without
 		 * a TLB invalidation.
 		 */
 		fpte2 |= PTE2_RO;
 		pte2_store(fpte2p, fpte2);
 	}
 
 	/*
 	 * Examine each of the other PTE2s in the specified PT2. Abort if this
 	 * PTE2 maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE2.
 	 */
 	fpte2_fav = (fpte2 & (PTE2_FRAME | PTE2_A | PTE2_V));
 	fpte2_fav += PTE1_SIZE - PTE2_SIZE; /* examine from the end */
 	for (pte2p = fpte2p + NPTE2_IN_PT2 - 1; pte2p > fpte2p; pte2p--) {
 		pte2 = pte2_load(pte2p);
 		if ((pte2 & (PTE2_FRAME | PTE2_A | PTE2_V)) != fpte2_fav) {
 			pmap_pte1_p_failures++;
 			CTR3(KTR_PMAP, "%s: failure(3) for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return;
 		}
 		if ((pte2 & (PTE2_NM | PTE2_RO)) == PTE2_NM) {
 			/*
 			 * When page is not modified, PTE2_RO can be set
 			 * without a TLB invalidation. See note above.
 			 */
 			pte2 |= PTE2_RO;
 			pte2_store(pte2p, pte2);
 			pteva = pte1_trunc(va) | (pte2 & PTE1_OFFSET &
 			    PTE2_FRAME);
 			CTR3(KTR_PMAP, "%s: protect for va %#x in pmap %p",
 			    __func__, pteva, pmap);
 		}
 		if ((pte2 & PTE2_PROMOTE) != (fpte2 & PTE2_PROMOTE)) {
 			pmap_pte1_p_failures++;
 			CTR3(KTR_PMAP, "%s: failure(4) for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return;
 		}
 
 		fpte2_fav -= PTE2_SIZE;
 	}
 	/*
 	 * The page table page in its current state will stay in PT2TAB
 	 * until the PTE1 mapping the section is demoted by pmap_demote_pte1()
 	 * or destroyed by pmap_remove_pte1().
 	 *
 	 * Note that L2 page table size is not equal to PAGE_SIZE.
 	 */
 	m = PHYS_TO_VM_PAGE(trunc_page(pte1_link_pa(pte1_load(pte1p))));
 	KASSERT(m >= vm_page_array && m < &vm_page_array[vm_page_array_size],
 	    ("%s: PT2 page is out of range", __func__));
 	KASSERT(m->pindex == (pte1_index(va) & ~PT2PG_MASK),
 	    ("%s: PT2 page's pindex is wrong", __func__));
 
 	/*
 	 * Get pte1 from pte2 format.
 	 */
 	npte1 = (fpte2 & PTE1_FRAME) | ATTR_TO_L1(fpte2) | PTE1_V;
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if (pte2_is_managed(fpte2))
 		pmap_pv_promote_pte1(pmap, va, pte1_pa(npte1));
 
 	/*
 	 * Promote the mappings.
 	 */
 	pmap_change_pte1(pmap, pte1p, va, npte1);
 
 	pmap_pte1_promotions++;
 	CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p",
 	    __func__, va, pmap);
 
 	PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n",
 	    __func__, pmap, va, npte1, pte1_load(pte1p), pte1p));
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
 /*
  *  Zero L2 page table page.
  */
 static __inline void
 pmap_clear_pt2(pt2_entry_t *fpte2p)
 {
 	pt2_entry_t *pte2p;
 
 	for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++)
 		pte2_clear(pte2p);
 
 }
 
 /*
  *  Removes a 1MB page mapping from the kernel pmap.
  */
 static void
 pmap_remove_kernel_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	vm_page_t m;
 	uint32_t pte1_idx;
 	pt2_entry_t *fpte2p;
 	vm_paddr_t pt2_pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	m = pmap_pt2_page(pmap, va);
 	if (m == NULL)
 		/*
 		 * QQQ: Is this function called only on promoted pte1?
 		 *      We certainly do section mappings directly
 		 *      (without promotion) in kernel !!!
 		 */
 		panic("%s: missing pt2 page", __func__);
 
 	pte1_idx = pte1_index(va);
 
 	/*
 	 * Initialize the L2 page table.
 	 */
 	fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx);
 	pmap_clear_pt2(fpte2p);
 
 	/*
 	 * Remove the mapping.
 	 */
 	pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(m), pte1_idx);
 	pmap_kenter_pte1(va, PTE1_LINK(pt2_pa));
 
 	/*
 	 * QQQ: We do not need to invalidate PT2MAP mapping
 	 * as we did not change it. I.e. the L2 page table page
 	 * was and still is mapped the same way.
 	 */
 }
 
 /*
  *  Do the things to unmap a section in a process
  */
 static void
 pmap_remove_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva,
     struct spglist *free)
 {
 	pt1_entry_t opte1;
 	struct md_page *pvh;
 	vm_offset_t eva, va;
 	vm_page_t m;
 
 	PDEBUG(6, printf("%s(%p): va %#x pte1 %#x at %p\n", __func__, pmap, sva,
 	    pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PTE1_OFFSET) == 0,
 	    ("%s: sva is not 1mpage aligned", __func__));
 
 	/*
 	 * Clear and invalidate the mapping. It should occupy one and only TLB
 	 * entry. So, pmap_tlb_flush() called with aligned address should be
 	 * sufficient.
 	 */
 	opte1 = pte1_load_clear(pte1p);
 	pmap_tlb_flush(pmap, sva);
 
 	if (pte1_is_wired(opte1))
 		pmap->pm_stats.wired_count -= PTE1_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE;
 	if (pte1_is_managed(opte1)) {
 		pvh = pa_to_pvh(pte1_pa(opte1));
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + PTE1_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1));
 		    va < eva; va += PAGE_SIZE, m++) {
 			if (pte1_is_dirty(opte1))
 				vm_page_dirty(m);
 			if (opte1 & PTE1_A)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		/*
 		 * L2 page table(s) can't be removed from kernel map as
 		 * kernel counts on it (stuff around pmap_growkernel()).
 		 */
 		 pmap_remove_kernel_pte1(pmap, pte1p, sva);
 	} else {
 		/*
 		 * Get associated L2 page table page.
 		 * It's possible that the page was never allocated.
 		 */
 		m = pmap_pt2_page(pmap, sva);
 		if (m != NULL)
 			pmap_unwire_pt2_all(pmap, sva, m, free);
 	}
 }
 
 /*
  *  Fills L2 page table page with mappings to consecutive physical pages.
  */
 static __inline void
 pmap_fill_pt2(pt2_entry_t *fpte2p, pt2_entry_t npte2)
 {
 	pt2_entry_t *pte2p;
 
 	for (pte2p = fpte2p; pte2p < fpte2p + NPTE2_IN_PT2; pte2p++) {
 		pte2_store(pte2p, npte2);
 		npte2 += PTE2_SIZE;
 	}
 }
 
 /*
  *  Tries to demote a 1MB page mapping. If demotion fails, the
  *  1MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t va)
 {
 	pt1_entry_t opte1, npte1;
 	pt2_entry_t *fpte2p, npte2;
 	vm_paddr_t pt2pg_pa, pt2_pa;
 	vm_page_t m;
 	struct spglist free;
 	uint32_t pte1_idx, isnew = 0;
 
 	PDEBUG(6, printf("%s(%p): try for va %#x pte1 %#x at %p\n", __func__,
 	    pmap, va, pte1_load(pte1p), pte1p));
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	opte1 = pte1_load(pte1p);
 	KASSERT(pte1_is_section(opte1), ("%s: opte1 not a section", __func__));
 
 	if ((opte1 & PTE1_A) == 0 || (m = pmap_pt2_page(pmap, va)) == NULL) {
 		KASSERT(!pte1_is_wired(opte1),
 		    ("%s: PT2 page for a wired mapping is missing", __func__));
 
 		/*
 		 * Invalidate the 1MB page mapping and return
 		 * "failure" if the mapping was never accessed or the
 		 * allocation of the new page table page fails.
 		 */
 		if ((opte1 & PTE1_A) == 0 || (m = vm_page_alloc(NULL,
 		    pte1_index(va) & ~PT2PG_MASK, VM_ALLOC_NOOBJ |
 		    VM_ALLOC_NORMAL | VM_ALLOC_WIRED)) == NULL) {
 			SLIST_INIT(&free);
 			pmap_remove_pte1(pmap, pte1p, pte1_trunc(va), &free);
 			vm_page_free_pages_toq(&free, false);
 			CTR3(KTR_PMAP, "%s: failure for va %#x in pmap %p",
 			    __func__, va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap->pm_stats.resident_count++;
 
 		isnew = 1;
 
 		/*
 		 * We init all L2 page tables in the page even if
 		 * we are going to change everything for one L2 page
 		 * table in a while.
 		 */
 		pt2pg_pa = pmap_pt2pg_init(pmap, va, m);
 	} else {
 		if (va < VM_MAXUSER_ADDRESS) {
 			if (pt2_is_empty(m, va))
 				isnew = 1; /* Demoting section w/o promotion. */
 #ifdef INVARIANTS
 			else
 				KASSERT(pt2_is_full(m, va), ("%s: bad PT2 wire"
 				    " count %u", __func__,
 				    pt2_wirecount_get(m, pte1_index(va))));
 #endif
 		}
 	}
 
 	pt2pg_pa = VM_PAGE_TO_PHYS(m);
 	pte1_idx = pte1_index(va);
 	/*
 	 * If the pmap is current, then the PT2MAP can provide access to
 	 * the page table page (promoted L2 page tables are not unmapped).
 	 * Otherwise, temporarily map the L2 page table page (m) into
 	 * the kernel's address space at either PADDR1 or PADDR2.
 	 *
 	 * Note that L2 page table size is not equal to PAGE_SIZE.
 	 */
 	if (pmap_is_current(pmap))
 		fpte2p = page_pt2(pt2map_pt2pg(va), pte1_idx);
 	else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) {
 		if (pte2_pa(pte2_load(PMAP1)) != pt2pg_pa) {
 			pte2_store(PMAP1, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			tlb_flush_local((vm_offset_t)PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		fpte2p = page_pt2((vm_offset_t)PADDR1, pte1_idx);
 	} else {
 		mtx_lock(&PMAP2mutex);
 		if (pte2_pa(pte2_load(PMAP2)) != pt2pg_pa) {
 			pte2_store(PMAP2, PTE2_KPT(pt2pg_pa));
 			tlb_flush((vm_offset_t)PADDR2);
 		}
 		fpte2p = page_pt2((vm_offset_t)PADDR2, pte1_idx);
 	}
 	pt2_pa = page_pt2pa(pt2pg_pa, pte1_idx);
 	npte1 = PTE1_LINK(pt2_pa);
 
 	KASSERT((opte1 & PTE1_A) != 0,
 	    ("%s: opte1 is missing PTE1_A", __func__));
 	KASSERT((opte1 & (PTE1_NM | PTE1_RO)) != PTE1_NM,
 	    ("%s: opte1 has PTE1_NM", __func__));
 
 	/*
 	 *  Get pte2 from pte1 format.
 	*/
 	npte2 = pte1_pa(opte1) | ATTR_TO_L2(opte1) | PTE2_V;
 
 	/*
 	 * If the L2 page table page is new, initialize it. If the mapping
 	 * has changed attributes, update the page table entries.
 	 */
 	if (isnew != 0) {
 		pt2_wirecount_set(m, pte1_idx, NPTE2_IN_PT2);
 		pmap_fill_pt2(fpte2p, npte2);
 	} else if ((pte2_load(fpte2p) & PTE2_PROMOTE) !=
 		    (npte2 & PTE2_PROMOTE))
 		pmap_fill_pt2(fpte2p, npte2);
 
 	KASSERT(pte2_pa(pte2_load(fpte2p)) == pte2_pa(npte2),
 	    ("%s: fpte2p and npte2 map different physical addresses",
 	    __func__));
 
 	if (fpte2p == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 
 	/*
 	 * Demote the mapping. This pmap is locked. The old PTE1 has
 	 * PTE1_A set. If the old PTE1 has not PTE1_RO set, it also
 	 * has not PTE1_NM set. Thus, there is no danger of a race with
 	 * another processor changing the setting of PTE1_A and/or PTE1_NM
 	 * between the read above and the store below.
 	 */
 	pmap_change_pte1(pmap, pte1p, va, npte1);
 
 	/*
 	 * Demote the pv entry. This depends on the earlier demotion
 	 * of the mapping. Specifically, the (re)creation of a per-
 	 * page pv entry might trigger the execution of pmap_pv_reclaim(),
 	 * which might reclaim a newly (re)created per-page pv entry
 	 * and destroy the associated mapping. In order to destroy
 	 * the mapping, the PTE1 must have already changed from mapping
 	 * the 1mpage to referencing the page table page.
 	 */
 	if (pte1_is_managed(opte1))
 		pmap_pv_demote_pte1(pmap, va, pte1_pa(opte1));
 
 	pmap_pte1_demotions++;
 	CTR3(KTR_PMAP, "%s: success for va %#x in pmap %p",
 	    __func__, va, pmap);
 
 	PDEBUG(6, printf("%s(%p): success for va %#x pte1 %#x(%#x) at %p\n",
 	    __func__, pmap, va, npte1, pte1_load(pte1p), pte1p));
 	return (TRUE);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p;
 	pt2_entry_t npte2, opte2;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte2, om;
 	int rv;
 
 	va = trunc_page(va);
 	KASSERT(va <= vm_max_kernel_address, ("%s: toobig", __func__));
 	KASSERT(va < UPT2V_MIN_ADDRESS || va >= UPT2V_MAX_ADDRESS,
 	    ("%s: invalid to pmap_enter page table pages (va: 0x%x)", __func__,
 	    va));
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
 	    va >= kmi.clean_eva,
 	    ("%s: managed mapping within the clean submap", __func__));
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		VM_PAGE_OBJECT_BUSY_ASSERT(m);
 	KASSERT((flags & PMAP_ENTER_RESERVED) == 0,
 	    ("%s: flags %u has reserved bits set", __func__, flags));
 	pa = VM_PAGE_TO_PHYS(m);
 	npte2 = PTE2(pa, PTE2_A, vm_page_pte2_attr(m));
 	if ((flags & VM_PROT_WRITE) == 0)
 		npte2 |= PTE2_NM;
 	if ((prot & VM_PROT_WRITE) == 0)
 		npte2 |= PTE2_RO;
 	KASSERT((npte2 & (PTE2_NM | PTE2_RO)) != PTE2_RO,
 	    ("%s: flags includes VM_PROT_WRITE but prot doesn't", __func__));
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		npte2 |= PTE2_NX;
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		npte2 |= PTE2_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		npte2 |= PTE2_U;
 	if (pmap != kernel_pmap)
 		npte2 |= PTE2_NG;
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 	if (psind == 1) {
 		/* Assert the required virtual and physical alignment. */
 		KASSERT((va & PTE1_OFFSET) == 0,
 		    ("%s: va unaligned", __func__));
 		KASSERT(m->psind > 0, ("%s: m->psind < psind", __func__));
 		rv = pmap_enter_pte1(pmap, va, PTE1_PA(pa) | ATTR_TO_L1(npte2) |
 		    PTE1_V, flags, m);
 		goto out;
 	}
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte2 = pmap_allocpte2(pmap, va, flags);
 		if (mpte2 == NULL) {
 			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
 			    ("pmap_allocpte2 failed with sleep allowed"));
 			rv = KERN_RESOURCE_SHORTAGE;
 			goto out;
 		}
 	} else
 		mpte2 = NULL;
 	pte1p = pmap_pte1(pmap, va);
 	if (pte1_is_section(pte1_load(pte1p)))
 		panic("%s: attempted on 1MB page", __func__);
 	pte2p = pmap_pte2_quick(pmap, va);
 	if (pte2p == NULL)
 		panic("%s: invalid L1 page table entry va=%#x", __func__, va);
 
 	om = NULL;
 	opte2 = pte2_load(pte2p);
 	opa = pte2_pa(opte2);
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (pte2_is_valid(opte2) && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT2 pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT2 page will be also.
 		 */
 		if (pte2_is_wired(npte2) && !pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count++;
 		else if (!pte2_is_wired(npte2) && pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte2 reference
 		 */
 		if (mpte2)
 			pt2_wirecount_dec(mpte2, pte1_index(va));
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			om = m;
 		goto validate;
 	}
 
 	/*
 	 * QQQ: We think that changing physical address on writeable mapping
 	 *      is not safe. Well, maybe on kernel address space with correct
 	 *      locking, it can make a sense. However, we have no idea why
 	 *      anyone should do that on user address space. Are we wrong?
 	 */
 	KASSERT((opa == 0) || (opa == pa) ||
 	    !pte2_is_valid(opte2) || ((opte2 & PTE2_RO) != 0),
 	    ("%s: pmap %p va %#x(%#x) opa %#x pa %#x - gotcha %#x %#x!",
 	    __func__, pmap, va, opte2, opa, pa, flags, prot));
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 		om = PHYS_TO_VM_PAGE(opa);
 		if (om != NULL && (om->oflags & VPO_UNMANAGED) != 0)
 			om = NULL;
 		if (om != NULL)
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 
 		/*
 		 * Remove extra pte2 reference
 		 */
 		if (mpte2 != NULL)
 			pt2_wirecount_dec(mpte2, va >> PTE1_SHIFT);
 	} else
 		pmap->pm_stats.resident_count++;
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		if (pv == NULL) {
 			pv = get_pv_entry(pmap, FALSE);
 			pv->pv_va = va;
 		}
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (pte2_is_wired(npte2))
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	if (prot & VM_PROT_WRITE) {
 		if ((m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * If the mapping or permission bits are different, we need
 	 * to update the pte2.
 	 *
 	 * QQQ: Think again and again what to do
 	 *      if the mapping is going to be changed!
 	 */
 	if ((opte2 & ~(PTE2_NM | PTE2_A)) != (npte2 & ~(PTE2_NM | PTE2_A))) {
 		/*
 		 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 		 * is set. Do it now, before the mapping is stored and made
 		 * valid for hardware table walk. If done later, there is a race
 		 * for other threads of current process in lazy loading case.
 		 * Don't do it for kernel memory which is mapped with exec
 		 * permission even if the memory isn't going to hold executable
 		 * code. The only time when icache sync is needed is after
 		 * kernel module is loaded and the relocation info is processed.
 		 * And it's done in elf_cpu_load_file().
 		 *
 		 * QQQ: (1) Does it exist any better way where
 		 *          or how to sync icache?
 		 *      (2) Now, we do it on a page basis.
 		 */
 		if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
 		    m->md.pat_mode == VM_MEMATTR_WB_WA &&
 		    (opa != pa || (opte2 & PTE2_NX)))
 			cache_icache_sync_fresh(va, pa, PAGE_SIZE);
 
 		if (opte2 & PTE2_V) {
 			/* Change mapping with break-before-make approach. */
 			opte2 = pte2_load_clear(pte2p);
 			pmap_tlb_flush(pmap, va);
 			pte2_store(pte2p, npte2);
 			if (om != NULL) {
 				KASSERT((om->oflags & VPO_UNMANAGED) == 0,
 				    ("%s: om %p unmanaged", __func__, om));
 				if ((opte2 & PTE2_A) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				if (pte2_is_dirty(opte2))
 					vm_page_dirty(om);
 				if (TAILQ_EMPTY(&om->md.pv_list) &&
 				    ((om->flags & PG_FICTITIOUS) != 0 ||
 				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 					vm_page_aflag_clear(om, PGA_WRITEABLE);
 			}
 		} else
 			pte2_store(pte2p, npte2);
 	}
 #if 0
 	else {
 		/*
 		 * QQQ: In time when both access and not mofified bits are
 		 *      emulated by software, this should not happen. Some
 		 *      analysis is need, if this really happen. Missing
 		 *      tlb flush somewhere could be the reason.
 		 */
 		panic("%s: pmap %p va %#x opte2 %x npte2 %x !!", __func__, pmap,
 		    va, opte2, npte2);
 	}
 #endif
 
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * If both the L2 page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte2 == NULL || pt2_is_full(mpte2, va)) &&
 	    sp_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pte1(pmap, pte1p, va);
 #endif
 
 	rv = KERN_SUCCESS;
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *  Do the things to unmap a page in a process.
  */
 static int
 pmap_remove_pte2(pmap_t pmap, pt2_entry_t *pte2p, vm_offset_t va,
     struct spglist *free)
 {
 	pt2_entry_t opte2;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/* Clear and invalidate the mapping. */
 	opte2 = pte2_load_clear(pte2p);
 	pmap_tlb_flush(pmap, va);
 
 	KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %#x not link pte2 %#x",
 	    __func__, pmap, va, opte2));
 
 	if (opte2 & PTE2_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap->pm_stats.resident_count -= 1;
 	if (pte2_is_managed(opte2)) {
 		m = PHYS_TO_VM_PAGE(pte2_pa(opte2));
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		if (opte2 & PTE2_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	}
 	return (pmap_unuse_pt2(pmap, va, free));
 }
 
 /*
  *  Remove a single page from a process address space.
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt2_entry_t *pte2p;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT(curthread->td_pinned > 0,
 	    ("%s: curthread not pinned", __func__));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((pte2p = pmap_pte2_quick(pmap, va)) == NULL ||
 	    !pte2_is_valid(pte2_load(pte2p)))
 		return;
 	pmap_remove_pte2(pmap, pte2p, va, free);
 }
 
 /*
  *  Remove the given range of addresses from the specified map.
  *
  *  It is assumed that the start and end are properly
  *  rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	struct spglist free;
 
 	/*
 	 * Perform an unsynchronized read. This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	SLIST_INIT(&free);
 
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * Special handling of removing one page. A very common
 	 * operation and easy to short circuit some code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pte1 = pte1_load(pmap_pte1(pmap, sva));
 		if (pte1_is_link(pte1)) {
 			pmap_remove_page(pmap, sva, &free);
 			goto out;
 		}
 	}
 
 	for (; sva < eva; sva = nextva) {
 		/*
 		 * Calculate address for next L2 page table.
 		 */
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the L1 page
 		 * table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pmap_remove_pte1(pmap, pte1p, sva, &free);
 				continue;
 			} else if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 #ifdef INVARIANTS
 			else {
 				/* Update pte1 after demotion. */
 				pte1 = pte1_load(pte1p);
 			}
 #endif
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva;
 		    pte2p++, sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2))
 				continue;
 			if (pmap_remove_pte2(pmap, pte2p, sva, &free))
 				break;
 		}
 	}
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt2_entry_t *pte2p, opte2;
 	pt1_entry_t *pte1p;
 	vm_offset_t va;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		(void)pmap_demote_pte1(pmap, pte1p, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found "
 		    "a 1mpage in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load_clear(pte2p);
 		pmap_tlb_flush(pmap, pv->pv_va);
 		KASSERT(pte2_is_valid(opte2), ("%s: pmap %p va %x zero pte2",
 		    __func__, pmap, pv->pv_va));
 		if (pte2_is_wired(opte2))
 			pmap->pm_stats.wired_count--;
 		if (opte2 & PTE2_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		pmap_unuse_pt2(pmap, pv->pv_va, &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *  Just subroutine for pmap_remove_pages() to reasonably satisfy
  *  good coding style, a.k.a. 80 character line width limit hell.
  */
 static __inline void
 pmap_remove_pte1_quick(pmap_t pmap, pt1_entry_t pte1, pv_entry_t pv,
     struct spglist *free)
 {
 	vm_paddr_t pa;
 	vm_page_t m, mt, mpt2pg;
 	struct md_page *pvh;
 
 	pa = pte1_pa(pte1);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x",
 	    __func__, m, m->phys_addr, pa));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 	    m < &vm_page_array[vm_page_array_size],
 	    ("%s: bad pte1 %#x", __func__, pte1));
 
 	if (pte1_is_dirty(pte1)) {
 		for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 			vm_page_dirty(mt);
 	}
 
 	pmap->pm_stats.resident_count -= PTE1_SIZE / PAGE_SIZE;
 	pvh = pa_to_pvh(pa);
 	TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 	if (TAILQ_EMPTY(&pvh->pv_list)) {
 		for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 			if (TAILQ_EMPTY(&mt->md.pv_list))
 				vm_page_aflag_clear(mt, PGA_WRITEABLE);
 	}
 	mpt2pg = pmap_pt2_page(pmap, pv->pv_va);
 	if (mpt2pg != NULL)
 		pmap_unwire_pt2_all(pmap, pv->pv_va, mpt2pg, free);
 }
 
 /*
  *  Just subroutine for pmap_remove_pages() to reasonably satisfy
  *  good coding style, a.k.a. 80 character line width limit hell.
  */
 static __inline void
 pmap_remove_pte2_quick(pmap_t pmap, pt2_entry_t pte2, pv_entry_t pv,
     struct spglist *free)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
 	struct md_page *pvh;
 
 	pa = pte2_pa(pte2);
 	m = PHYS_TO_VM_PAGE(pa);
 
 	KASSERT(m->phys_addr == pa, ("%s: vm_page_t %p addr mismatch %#x %#x",
 	    __func__, m, m->phys_addr, pa));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 	    m < &vm_page_array[vm_page_array_size],
 	    ("%s: bad pte2 %#x", __func__, pte2));
 
 	if (pte2_is_dirty(pte2))
 		vm_page_dirty(m);
 
 	pmap->pm_stats.resident_count--;
 	TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 	if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(pa);
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_aflag_clear(m, PGA_WRITEABLE);
 	}
 	pmap_unuse_pt2(pmap, pv->pv_va, free);
 }
 
 /*
  *  Remove all pages from specified address space this aids process
  *  exit speeds. Also, this code is special cased for current process
  *  only, but can have the more generic (and slightly slower) mode enabled.
  *  This is much faster than pmap_remove in the case of running down
  *  an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct spglist free;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
 	boolean_t allfree;
 
 	/*
 	 * Assert that the given pmap is only active on the current
 	 * CPU.  Unfortunately, we cannot block another CPU from
 	 * activating the pmap while this function is executing.
 	 */
 	KASSERT(pmap == vmspace_pmap(curthread->td_proc->p_vmspace),
 	    ("%s: non-current pmap %p", __func__, pmap));
 #if defined(SMP) && defined(INVARIANTS)
 	{
 		cpuset_t other_cpus;
 
 		sched_pin();
 		other_cpus = pmap->pm_active;
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		sched_unpin();
 		KASSERT(CPU_EMPTY(&other_cpus),
 		    ("%s: pmap %p active on other cpus", __func__, pmap));
 	}
 #endif
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		KASSERT(pc->pc_pmap == pmap, ("%s: wrong pmap %p %p",
 		    __func__, pmap, pc->pc_pmap));
 		allfree = TRUE;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = (~(pc->pc_map[field])) & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffs(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 32 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				/*
 				 * Note that we cannot remove wired pages
 				 * from a process' mapping at this time
 				 */
 				pte1p = pmap_pte1(pmap, pv->pv_va);
 				pte1 = pte1_load(pte1p);
 				if (pte1_is_section(pte1)) {
 					if (pte1_is_wired(pte1))  {
 						allfree = FALSE;
 						continue;
 					}
 					pte1_clear(pte1p);
 					pmap_remove_pte1_quick(pmap, pte1, pv,
 					    &free);
 				}
 				else if (pte1_is_link(pte1)) {
 					pte2p = pt2map_entry(pv->pv_va);
 					pte2 = pte2_load(pte2p);
 
 					if (!pte2_is_valid(pte2)) {
 						printf("%s: pmap %p va %#x "
 						    "pte2 %#x\n", __func__,
 						    pmap, pv->pv_va, pte2);
 						panic("bad pte2");
 					}
 
 					if (pte2_is_wired(pte2))   {
 						allfree = FALSE;
 						continue;
 					}
 					pte2_clear(pte2p);
 					pmap_remove_pte2_quick(pmap, pte2, pv,
 					    &free);
 				} else {
 					printf("%s: pmap %p va %#x pte1 %#x\n",
 					    __func__, pmap, pv->pv_va, pte1);
 					panic("bad pte1");
 				}
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 			}
 		}
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	tlb_flush_all_ng_local();
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, false);
 }
 
 /*
  *  This code makes some *MAJOR* assumptions:
  *  1. Current pmap & pmap exists.
  *  2. Not wired.
  *  3. Read access.
  *  4. No L2 page table pages.
  *  but is *MUCH* faster than pmap_enter...
  */
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpt2pg)
 {
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pa;
 	struct spglist free;
 	uint32_t l2prot;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("%s: managed mapping within the clean submap", __func__));
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a L2 page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		u_int pte1_idx;
 		pt1_entry_t pte1, *pte1p;
 		vm_paddr_t pt2_pa;
 
 		/*
 		 * Get L1 page table things.
 		 */
 		pte1_idx = pte1_index(va);
 		pte1p = pmap_pte1(pmap, va);
 		pte1 = pte1_load(pte1p);
 
 		if (mpt2pg && (mpt2pg->pindex == (pte1_idx & ~PT2PG_MASK))) {
 			/*
 			 * Each of NPT2_IN_PG L2 page tables on the page can
 			 * come here. Make sure that associated L1 page table
 			 * link is established.
 			 *
 			 * QQQ: It comes that we don't establish all links to
 			 *      L2 page tables for newly allocated L2 page
 			 *      tables page.
 			 */
 			KASSERT(!pte1_is_section(pte1),
 			    ("%s: pte1 %#x is section", __func__, pte1));
 			if (!pte1_is_link(pte1)) {
 				pt2_pa = page_pt2pa(VM_PAGE_TO_PHYS(mpt2pg),
 				    pte1_idx);
 				pte1_store(pte1p, PTE1_LINK(pt2_pa));
 			}
 			pt2_wirecount_inc(mpt2pg, pte1_idx);
 		} else {
 			/*
 			 * If the L2 page table page is mapped, we just
 			 * increment the hold count, and activate it.
 			 */
 			if (pte1_is_section(pte1)) {
 				return (NULL);
 			} else if (pte1_is_link(pte1)) {
 				mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 				pt2_wirecount_inc(mpt2pg, pte1_idx);
 			} else {
 				mpt2pg = _pmap_allocpte2(pmap, va,
 				    PMAP_ENTER_NOSLEEP);
 				if (mpt2pg == NULL)
 					return (NULL);
 			}
 		}
 	} else {
 		mpt2pg = NULL;
 	}
 
 	/*
 	 * This call to pt2map_entry() makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte2_quick().
 	 * But that isn't as quick as pt2map_entry().
 	 */
 	pte2p = pt2map_entry(va);
 	pte2 = pte2_load(pte2p);
 	if (pte2_is_valid(pte2)) {
 		if (mpt2pg != NULL) {
 			/*
 			 * Remove extra pte2 reference
 			 */
 			pt2_wirecount_dec(mpt2pg, pte1_index(va));
 			mpt2pg = NULL;
 		}
 		return (NULL);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpt2pg != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_pt2(pmap, va, mpt2pg, &free)) {
 				pmap_tlb_flush(pmap, va);
 				vm_page_free_pages_toq(&free, false);
 			}
 
 			mpt2pg = NULL;
 		}
 		return (NULL);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	pa = VM_PAGE_TO_PHYS(m);
 	l2prot = PTE2_RO | PTE2_NM;
 	if (va < VM_MAXUSER_ADDRESS)
 		l2prot |= PTE2_U | PTE2_NG;
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		l2prot |= PTE2_NX;
 	else if (m->md.pat_mode == VM_MEMATTR_WB_WA && pmap != kernel_pmap) {
 		/*
 		 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 		 * is set. QQQ: For more info, see comments in pmap_enter().
 		 */
 		cache_icache_sync_fresh(va, pa, PAGE_SIZE);
 	}
 	pte2_store(pte2p, PTE2(pa, l2prot, vm_page_pte2_attr(m)));
 
 	return (mpt2pg);
 }
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  Tries to create a read- and/or execute-only 1 MB page mapping.  Returns
  *  true if successful.  Returns false if (1) a mapping already exists at the
  *  specified virtual address or (2) a PV entry cannot be allocated without
  *  reclaiming another PV entry.
  */
 static bool
 pmap_enter_1mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pa = VM_PAGE_TO_PHYS(m);
 	pte1 = PTE1(pa, PTE1_NM | PTE1_RO, ATTR_TO_L1(vm_page_pte2_attr(m)));
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pte1 |= PTE1_NX;
 	if (va < VM_MAXUSER_ADDRESS)
 		pte1 |= PTE1_U;
 	if (pmap != kernel_pmap)
 		pte1 |= PTE1_NG;
 	return (pmap_enter_pte1(pmap, va, pte1, PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, m) == KERN_SUCCESS);
 }
 
 /*
  *  Tries to create the specified 1 MB page mapping.  Returns KERN_SUCCESS if
  *  the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
  *  otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
  *  a mapping already exists at the specified virtual address.  Returns
  *  KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NORECLAIM was specified and PV entry
  *  allocation failed.
  */
 static int
 pmap_enter_pte1(pmap_t pmap, vm_offset_t va, pt1_entry_t pte1, u_int flags,
     vm_page_t m)
 {
 	struct spglist free;
 	pt1_entry_t opte1, *pte1p;
 	pt2_entry_t pte2, *pte2p;
 	vm_offset_t cur, end;
 	vm_page_t mt;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pte1 & (PTE1_NM | PTE1_RO)) == 0 ||
 	    (pte1 & (PTE1_NM | PTE1_RO)) == (PTE1_NM | PTE1_RO),
 	    ("%s: pte1 has inconsistent NM and RO attributes", __func__));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pte1p = pmap_pte1(pmap, va);
 	opte1 = pte1_load(pte1p);
 	if (pte1_is_valid(opte1)) {
 		if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
 			CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p",
 			    __func__, va, pmap);
 			return (KERN_FAILURE);
 		}
 		/* Break the existing mapping(s). */
 		SLIST_INIT(&free);
 		if (pte1_is_section(opte1)) {
 			/*
 			 * If the section resulted from a promotion, then a
 			 * reserved PT page could be freed.
 			 */
 			pmap_remove_pte1(pmap, pte1p, va, &free);
 		} else {
 			sched_pin();
 			end = va + PTE1_SIZE;
 			for (cur = va, pte2p = pmap_pte2_quick(pmap, va);
 			    cur != end; cur += PAGE_SIZE, pte2p++) {
 				pte2 = pte2_load(pte2p);
 				if (!pte2_is_valid(pte2))
 					continue;
 				if (pmap_remove_pte2(pmap, pte2p, cur, &free))
 					break;
 			}
 			sched_unpin();
 		}
 		vm_page_free_pages_toq(&free, false);
 	}
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pte1(pmap, va, pte1, flags)) {
 			CTR3(KTR_PMAP, "%s: failure for va %#lx in pmap %p",
 			    __func__, va, pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		if ((pte1 & PTE1_RO) == 0) {
 			for (mt = m; mt < &m[PTE1_SIZE / PAGE_SIZE]; mt++)
 				vm_page_aflag_set(mt, PGA_WRITEABLE);
 		}
 	}
 
 	/*
 	 * Increment counters.
 	 */
 	if (pte1_is_wired(pte1))
 		pmap->pm_stats.wired_count += PTE1_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count += PTE1_SIZE / PAGE_SIZE;
 
 	/*
 	 * Sync icache if exec permission and attribute VM_MEMATTR_WB_WA
 	 * is set.  QQQ: For more info, see comments in pmap_enter().
 	 */
 	if ((pte1 & PTE1_NX) == 0 && m->md.pat_mode == VM_MEMATTR_WB_WA &&
 	    pmap != kernel_pmap && (!pte1_is_section(opte1) ||
 	    pte1_pa(opte1) != VM_PAGE_TO_PHYS(m) || (opte1 & PTE2_NX) != 0))
 		cache_icache_sync_fresh(va, VM_PAGE_TO_PHYS(m), PTE1_SIZE);
 
 	/*
 	 * Map the section.
 	 */
 	pte1_store(pte1p, pte1);
 
 	pmap_pte1_mappings++;
 	CTR3(KTR_PMAP, "%s: success for va %#lx in pmap %p", __func__, va,
 	    pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  *  Maps a sequence of resident pages belonging to the same object.
  *  The sequence begins with the given page m_start.  This page is
  *  mapped at the given virtual address start.  Each subsequent page is
  *  mapped at a virtual address that is offset from start by the same
  *  amount as the page is offset from m_start within the object.  The
  *  last page in the sequence is the page with the largest offset from
  *  m_start that can be mapped at a virtual address less than the given
  *  virtual address end.  Not every virtual page between start and end
  *  is mapped; only those for which a resident page exists with the
  *  corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m, mpt2pg;
 	vm_pindex_t diff, psize;
 
 	PDEBUG(6, printf("%s: pmap %p start %#x end  %#x m %p prot %#x\n",
 	    __func__, pmap, start, end, m_start, prot));
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 	psize = atop(end - start);
 	mpt2pg = NULL;
 	m = m_start;
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PTE1_OFFSET) == 0 && va + PTE1_SIZE <= end &&
 		    m->psind == 1 && sp_enabled &&
 		    pmap_enter_1mpage(pmap, va, m, prot))
 			m = &m[PTE1_SIZE / PAGE_SIZE - 1];
 		else
 			mpt2pg = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpt2pg);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  This code maps large physical mmap regions into the
  *  processor address space.  Note that some shortcuts
  *  are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pt1_entry_t *pte1p;
 	vm_paddr_t pa, pte2_pa;
 	vm_page_t p;
 	vm_memattr_t pat_mode;
 	u_int l1attr, l1prot;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("%s: non-device object", __func__));
 	if ((addr & PTE1_OFFSET) == 0 && (size & PTE1_OFFSET) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("%s: invalid page %p", __func__, p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 1MB page boundary.
 		 */
 		pte2_pa = VM_PAGE_TO_PHYS(p);
 		if (pte2_pa & PTE1_OFFSET)
 			return;
 
 		/*
 		 * Skip the first page. Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = pte2_pa + PAGE_SIZE; pa < pte2_pa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("%s: invalid page %p", __func__, p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 1MB pages.
 		 *
 		 * QQQ: Well, we are mapping a section, so same condition must
 		 * be hold like during promotion. It looks that only RW mapping
 		 * is done here, so readonly mapping must be done elsewhere.
 		 */
 		l1prot = PTE1_U | PTE1_NG | PTE1_RW | PTE1_M | PTE1_A;
 		l1attr = ATTR_TO_L1(vm_memattr_to_pte2(pat_mode));
 		PMAP_LOCK(pmap);
 		for (pa = pte2_pa; pa < pte2_pa + size; pa += PTE1_SIZE) {
 			pte1p = pmap_pte1(pmap, addr);
 			if (!pte1_is_valid(pte1_load(pte1p))) {
 				pte1_store(pte1p, PTE1(pa, l1prot, l1attr));
 				pmap->pm_stats.resident_count += PTE1_SIZE /
 				    PAGE_SIZE;
 				pmap_pte1_mappings++;
 			}
 			/* Else continue on if the PTE1 is already valid. */
 			addr += PTE1_SIZE;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *  Do the things to protect a 1mpage in a process.
  */
 static void
 pmap_protect_pte1(pmap_t pmap, pt1_entry_t *pte1p, vm_offset_t sva,
     vm_prot_t prot)
 {
 	pt1_entry_t npte1, opte1;
 	vm_offset_t eva, va;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PTE1_OFFSET) == 0,
 	    ("%s: sva is not 1mpage aligned", __func__));
 
 	opte1 = npte1 = pte1_load(pte1p);
 	if (pte1_is_managed(opte1) && pte1_is_dirty(opte1)) {
 		eva = sva + PTE1_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(pte1_pa(opte1));
 		    va < eva; va += PAGE_SIZE, m++)
 			vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		npte1 |= PTE1_RO | PTE1_NM;
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		npte1 |= PTE1_NX;
 
 	/*
 	 * QQQ: Herein, execute permission is never set.
 	 *      It only can be cleared. So, no icache
 	 *      syncing is needed.
 	 */
 
 	if (npte1 != opte1) {
 		pte1_store(pte1p, npte1);
 		pmap_tlb_flush(pmap, sva);
 	}
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	boolean_t pv_lists_locked;
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, opte2, npte2;
 
 	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE | VM_PROT_EXECUTE))
 		return;
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = nextva) {
 		/*
 		 * Calculate address for next L2 page table.
 		 */
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that L1 page
 		 * page table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pmap_protect_pte1(pmap, pte1p, sva, prot);
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 					/*
 					 * The large page mapping
 					 * was destroyed.
 					 */
 					continue;
 				}
 #ifdef INVARIANTS
 				else {
 					/* Update pte1 after demotion */
 					pte1 = pte1_load(pte1p);
 				}
 #endif
 			}
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being protected.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++,
 		    sva += PAGE_SIZE) {
 			vm_page_t m;
 
 			opte2 = npte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(opte2))
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if (pte2_is_managed(opte2) &&
 				    pte2_is_dirty(opte2)) {
 					m = PHYS_TO_VM_PAGE(pte2_pa(opte2));
 					vm_page_dirty(m);
 				}
 				npte2 |= PTE2_RO | PTE2_NM;
 			}
 
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				npte2 |= PTE2_NX;
 
 			/*
 			 * QQQ: Herein, execute permission is never set.
 			 *      It only can be cleared. So, no icache
 			 *      syncing is needed.
 			 */
 
 			if (npte2 != opte2) {
 				pte2_store(pte2p, npte2);
 				pmap_tlb_flush(pmap, sva);
 			}
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	pmap_pvh_wired_mappings:
  *
  *	Return the updated number "count" of managed mappings that are wired.
  */
 static int
 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 {
 	pmap_t pmap;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			if (pte1_is_wired(pte1))
 				count++;
 		} else {
 			KASSERT(pte1_is_link(pte1),
 			    ("%s: pte1 %#x is not link", __func__, pte1));
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			if (pte2_is_wired(pte2))
 				count++;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	return (count);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&pvh_global_lock);
 	count = pmap_pvh_wired_mappings(&m->md, count);
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
 		    count);
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  *  Returns TRUE if any of the given mappings were used to modify
  *  physical memory.  Otherwise, returns FALSE.  Both page and 1mpage
  *  mappings are supported.
  */
 static boolean_t
 pmap_is_modified_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			rv = pte1_is_dirty(pte1);
 		} else {
 			KASSERT(pte1_is_link(pte1),
 			    ("%s: pte1 %#x is not link", __func__, pte1));
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			rv = pte2_is_dirty(pte2);
 		}
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 
 	/*
 	 * If the page is not busied then this check is racy.
 	 */
 	if (!pmap_page_is_write_mapped(m))
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pte1 = pte1_load(pmap_pte1(pmap, addr));
 	if (pte1_is_link(pte1)) {
 		pte2 = pte2_load(pt2map_entry(addr));
 		rv = !pte2_is_valid(pte2) ;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *  Returns TRUE if any of the given mappings were referenced and FALSE
  *  otherwise. Both page and 1mpage mappings are supported.
  */
 static boolean_t
 pmap_is_referenced_pvh(struct md_page *pvh)
 {
 
 	pv_entry_t pv;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1 = pte1_load(pmap_pte1(pmap, pv->pv_va));
 		if (pte1_is_section(pte1)) {
 			rv = (pte1 & (PTE1_A | PTE1_V)) == (PTE1_A | PTE1_V);
 		} else {
 			pte2 = pte2_load(pmap_pte2_quick(pmap, pv->pv_va));
 			rv = (pte2 & (PTE2_A | PTE2_V)) == (PTE2_A | PTE2_V);
 		}
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_referenced_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	As an optimization, update the page's dirty field if a modified bit is
  *	found while counting reference bits.  This opportunistic update can be
  *	performed at low cost and can eliminate the need for some future calls
  *	to pmap_is_modified().  However, since this function stops after
  *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
  *	dirty pages.  Those dirty pages will only be detected by a future call
  *	to pmap_is_modified().
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	pt1_entry_t  *pte1p, opte1;
 	pt2_entry_t *pte2p, opte2;
 	vm_paddr_t pa;
 	int rtval = 0;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	pa = VM_PAGE_TO_PHYS(m);
 	pvh = pa_to_pvh(pa);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0 ||
 	    (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		opte1 = pte1_load(pte1p);
 		if (pte1_is_dirty(opte1)) {
 			/*
 			 * Although "opte1" is mapping a 1MB page, because
 			 * this function is called at a 4KB page granularity,
 			 * we only update the 4KB page under test.
 			 */
 			vm_page_dirty(m);
 		}
 		if ((opte1 & PTE1_A) != 0) {
 			/*
 			 * Since this reference bit is shared by 256 4KB pages,
 			 * it should not be cleared every time it is tested.
 			 * Apply a simple "hash" function on the physical page
 			 * number, the virtual section number, and the pmap
 			 * address to select one 4KB page out of the 256
 			 * on which testing the reference bit will result
 			 * in clearing that bit. This function is designed
 			 * to avoid the selection of the same 4KB page
 			 * for every 1MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the section is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PTE1_SHIFT) ^
 			    (uintptr_t)pmap) & (NPTE2_IN_PG - 1)) == 0 &&
 			    !pte1_is_wired(opte1)) {
 				pte1_clear_bit(pte1p, PTE1_A);
 				pmap_tlb_flush(pmap, pv->pv_va);
 			}
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		}
 		if (rtval >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(pte1_is_link(pte1_load(pte1p)),
 		    ("%s: not found a link in page %p's pv list", __func__, m));
 
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load(pte2p);
 		if (pte2_is_dirty(opte2))
 			vm_page_dirty(m);
 		if ((opte2 & PTE2_A) != 0) {
 			pte2_clear_bit(pte2p, PTE2_A);
 			pmap_tlb_flush(pmap, pv->pv_va);
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval <
 	    PMAP_TS_REFERENCED_MAX);
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	return (rtval);
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t nextva;
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	boolean_t pv_lists_locked;
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = nextva) {
 		nextva = pte1_trunc(sva + PTE1_SIZE);
 		if (nextva < sva)
 			nextva = eva;
 
 		pte1p = pmap_pte1(pmap, sva);
 		pte1 = pte1_load(pte1p);
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that L1 page
 		 * page table is always allocated, and in kernel virtual.
 		 */
 		if (pte1 == 0)
 			continue;
 
 		if (pte1_is_section(pte1)) {
 			if (!pte1_is_wired(pte1))
 				panic("%s: pte1 %#x not wired", __func__, pte1);
 
 			/*
 			 * Are we unwiring the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + PTE1_SIZE == nextva && eva >= nextva) {
 				pte1_clear_bit(pte1p, PTE1_W);
 				pmap->pm_stats.wired_count -= PTE1_SIZE /
 				    PAGE_SIZE;
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						/* Repeat sva. */
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pte1(pmap, pte1p, sva))
 					panic("%s: demotion failed", __func__);
 #ifdef INVARIANTS
 				else {
 					/* Update pte1 after demotion */
 					pte1 = pte1_load(pte1p);
 				}
 #endif
 			}
 		}
 
 		KASSERT(pte1_is_link(pte1), ("%s: pmap %p va %#x pte1 %#x at %p"
 		    " is not link", __func__, pmap, sva, pte1, pte1p));
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current L2 page table page, or to the end of the
 		 * range being protected.
 		 */
 		if (nextva > eva)
 			nextva = eva;
 
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != nextva; pte2p++,
 		    sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2))
 				continue;
 			if (!pte2_is_wired(pte2))
 				panic("%s: pte2 %#x is missing PTE2_W",
 				    __func__, pte2);
 
 			/*
 			 * PTE2_W must be cleared atomically. Although the pmap
 			 * lock synchronizes access to PTE2_W, another processor
 			 * could be changing PTE2_NM and/or PTE2_A concurrently.
 			 */
 			pte2_clear_bit(pte2p, PTE2_W);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *  Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pt1_entry_t *pte1p;
 	pt2_entry_t *pte2p, opte2;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	vm_page_assert_busied(m);
 
 	if (!pmap_page_is_write_mapped(m))
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		if (!(pte1_load(pte1p) & PTE1_RO))
 			(void)pmap_demote_pte1(pmap, pte1p, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found"
 		    " a section in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		opte2 = pte2_load(pte2p);
 		if (!(opte2 & PTE2_RO)) {
 			pte2_store(pte2p, opte2 | PTE2_RO | PTE2_NM);
 			if (pte2_is_dirty(opte2))
 				vm_page_dirty(m);
 			pmap_tlb_flush(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 	pt1_entry_t *pte1p, opte1;
 	pt2_entry_t *pte2p, pte2;
 	vm_offset_t pdnxt;
 	vm_page_t m;
 	boolean_t pv_lists_locked;
 
 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
 		return;
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pdnxt = pte1_trunc(sva + PTE1_SIZE);
 		if (pdnxt < sva)
 			pdnxt = eva;
 		pte1p = pmap_pte1(pmap, sva);
 		opte1 = pte1_load(pte1p);
 		if (!pte1_is_valid(opte1)) /* XXX */
 			continue;
 		else if (pte1_is_section(opte1)) {
 			if (!pte1_is_managed(opte1))
 				continue;
 			if (!pv_lists_locked) {
 				pv_lists_locked = TRUE;
 				if (!rw_try_wlock(&pvh_global_lock)) {
 					PMAP_UNLOCK(pmap);
 					goto resume;
 				}
 				sched_pin();
 			}
 			if (!pmap_demote_pte1(pmap, pte1p, sva)) {
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 
 			/*
 			 * Unless the page mappings are wired, remove the
 			 * mapping to a single page so that a subsequent
 			 * access may repromote.  Since the underlying L2 page
 			 * table is fully populated, this removal never
 			 * frees a L2 page table page.
 			 */
 			if (!pte1_is_wired(opte1)) {
 				pte2p = pmap_pte2_quick(pmap, sva);
 				KASSERT(pte2_is_valid(pte2_load(pte2p)),
 				    ("%s: invalid PTE2", __func__));
 				pmap_remove_pte2(pmap, pte2p, sva, NULL);
 			}
 		}
 		if (pdnxt > eva)
 			pdnxt = eva;
 		for (pte2p = pmap_pte2_quick(pmap, sva); sva != pdnxt; pte2p++,
 		    sva += PAGE_SIZE) {
 			pte2 = pte2_load(pte2p);
 			if (!pte2_is_valid(pte2) || !pte2_is_managed(pte2))
 				continue;
 			else if (pte2_is_dirty(pte2)) {
 				if (advice == MADV_DONTNEED) {
 					/*
 					 * Future calls to pmap_is_modified()
 					 * can be avoided by making the page
 					 * dirty now.
 					 */
 					m = PHYS_TO_VM_PAGE(pte2_pa(pte2));
 					vm_page_dirty(m);
 				}
 				pte2_set_bit(pte2p, PTE2_NM);
 				pte2_clear_bit(pte2p, PTE2_A);
 			} else if ((pte2 & PTE2_A) != 0)
 				pte2_clear_bit(pte2p, PTE2_A);
 			else
 				continue;
 			pmap_tlb_flush(pmap, sva);
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pt1_entry_t *pte1p, opte1;
 	pt2_entry_t *pte2p, opte2;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	vm_page_assert_busied(m);
 
 	if (!pmap_page_is_write_mapped(m))
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, va);
 		opte1 = pte1_load(pte1p);
 		if (!(opte1 & PTE1_RO)) {
 			if (pmap_demote_pte1(pmap, pte1p, va) &&
 			    !pte1_is_wired(opte1)) {
 				/*
 				 * Write protect the mapping to a
 				 * single page so that a subsequent
 				 * write access may repromote.
 				 */
 				va += VM_PAGE_TO_PHYS(m) - pte1_pa(opte1);
 				pte2p = pmap_pte2_quick(pmap, va);
 				opte2 = pte2_load(pte2p);
 				if ((opte2 & PTE2_V)) {
 					pte2_set_bit(pte2p, PTE2_NM | PTE2_RO);
 					vm_page_dirty(m);
 					pmap_tlb_flush(pmap, va);
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte1p = pmap_pte1(pmap, pv->pv_va);
 		KASSERT(!pte1_is_section(pte1_load(pte1p)), ("%s: found"
 		    " a section in page %p's pv list", __func__, m));
 		pte2p = pmap_pte2_quick(pmap, pv->pv_va);
 		if (pte2_is_dirty(pte2_load(pte2p))) {
 			pte2_set_bit(pte2p, PTE2_NM);
 			pmap_tlb_flush(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 
 /*
  *  Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 	pt2_entry_t *cmap2_pte2p;
 	vm_memattr_t oma;
 	vm_paddr_t pa;
 	struct pcpu *pc;
 
 	oma = m->md.pat_mode;
 	m->md.pat_mode = ma;
 
 	CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m,
 	    VM_PAGE_TO_PHYS(m), oma, ma);
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return;
 #if 0
 	/*
 	 * If "m" is a normal page, flush it from the cache.
 	 *
 	 * First, try to find an existing mapping of the page by sf
 	 * buffer. sf_buf_invalidate_cache() modifies mapping and
 	 * flushes the cache.
 	 */
 	if (sf_buf_invalidate_cache(m, oma))
 		return;
 #endif
 	/*
 	 * If page is not mapped by sf buffer, map the page
 	 * transient and do invalidation.
 	 */
 	if (ma != oma) {
 		pa = VM_PAGE_TO_PHYS(m);
 		sched_pin();
 		pc = get_pcpu();
 		cmap2_pte2p = pc->pc_cmap2_pte2p;
 		mtx_lock(&pc->pc_cmap_lock);
 		if (pte2_load(cmap2_pte2p) != 0)
 			panic("%s: CMAP2 busy", __func__);
 		pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW,
 		    vm_memattr_to_pte2(ma)));
 		dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE);
 		pte2_clear(cmap2_pte2p);
 		tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 		sched_unpin();
 		mtx_unlock(&pc->pc_cmap_lock);
 	}
 }
 
 /*
  *  Miscellaneous support routines follow
  */
 
 /*
  *  Returns TRUE if the given page is mapped individually or as part of
  *  a 1mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *  Returns true if the pmap's pv is one of the first
  *  16 pvs linked to from this page.  This count may
  *  be changed upwards or downwards in the future; it
  *  is only necessary that true be returned for a small
  *  subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("%s: page %p is not managed", __func__, m));
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	pagezero(pc->pc_cmap2_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero(pc->pc_cmap2_addr);
 	else
 		bzero(pc->pc_cmap2_addr + off, size);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 	pt2_entry_t *cmap1_pte2p, *cmap2_pte2p;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap1_pte2p = pc->pc_cmap1_pte2p;
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap1_pte2p) != 0)
 		panic("%s: CMAP1 busy", __func__);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(src),
 	    PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(src)));
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(dst),
 	    PTE2_AP_KRW, vm_page_pte2_attr(dst)));
 	bcopy(pc->pc_cmap1_addr, pc->pc_cmap2_addr, PAGE_SIZE);
 	pte2_clear(cmap1_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap1_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	pt2_entry_t *cmap1_pte2p, *cmap2_pte2p;
 	vm_page_t a_pg, b_pg;
 	char *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	struct pcpu *pc;
 	int cnt;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap1_pte2p = pc->pc_cmap1_pte2p;
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap1_pte2p) != 0)
 		panic("pmap_copy_pages: CMAP1 busy");
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("pmap_copy_pages: CMAP2 busy");
 	while (xfersize > 0) {
 		a_pg = ma[a_offset >> PAGE_SHIFT];
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		b_pg = mb[b_offset >> PAGE_SHIFT];
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		pte2_store(cmap1_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(a_pg),
 		    PTE2_AP_KR | PTE2_NM, vm_page_pte2_attr(a_pg)));
 		tlb_flush_local((vm_offset_t)pc->pc_cmap1_addr);
 		pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(b_pg),
 		    PTE2_AP_KRW, vm_page_pte2_attr(b_pg)));
 		tlb_flush_local((vm_offset_t)pc->pc_cmap2_addr);
 		a_cp = pc->pc_cmap1_addr + a_pg_offset;
 		b_cp = pc->pc_cmap2_addr + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 	pte2_clear(cmap1_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap1_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 	struct pcpu *pc;
 	pt2_entry_t *pte2p;
 
 	critical_enter();
 	pc = get_pcpu();
 	pte2p = pc->pc_qmap_pte2p;
 
 	KASSERT(pte2_load(pte2p) == 0, ("%s: PTE2 busy", __func__));
 
 	pte2_store(pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	return (pc->pc_qmap_addr);
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 	struct pcpu *pc;
 	pt2_entry_t *pte2p;
 
 	pc = get_pcpu();
 	pte2p = pc->pc_qmap_pte2p;
 
 	KASSERT(addr == pc->pc_qmap_addr, ("%s: invalid address", __func__));
 	KASSERT(pte2_load(pte2p) != 0, ("%s: PTE2 not in use", __func__));
 
 	pte2_clear(pte2p);
 	tlb_flush(pc->pc_qmap_addr);
 	critical_exit();
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	struct spglist free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t nextva;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	rw_wlock(&pvh_global_lock);
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	sched_pin();
 	for (addr = src_addr; addr < end_addr; addr = nextva) {
 		pt2_entry_t *src_pte2p, *dst_pte2p;
 		vm_page_t dst_mpt2pg, src_mpt2pg;
 		pt1_entry_t src_pte1;
 		u_int pte1_idx;
 
 		KASSERT(addr < VM_MAXUSER_ADDRESS,
 		    ("%s: invalid to pmap_copy page tables", __func__));
 
 		nextva = pte1_trunc(addr + PTE1_SIZE);
 		if (nextva < addr)
 			nextva = end_addr;
 
 		pte1_idx = pte1_index(addr);
 		src_pte1 = src_pmap->pm_pt1[pte1_idx];
 		if (pte1_is_section(src_pte1)) {
 			if ((addr & PTE1_OFFSET) != 0 ||
 			    (addr + PTE1_SIZE) > end_addr)
 				continue;
 			if (dst_pmap->pm_pt1[pte1_idx] == 0 &&
 			    (!pte1_is_managed(src_pte1) ||
 			    pmap_pv_insert_pte1(dst_pmap, addr, src_pte1,
 			    PMAP_ENTER_NORECLAIM))) {
 				dst_pmap->pm_pt1[pte1_idx] = src_pte1 &
 				    ~PTE1_W;
 				dst_pmap->pm_stats.resident_count +=
 				    PTE1_SIZE / PAGE_SIZE;
 				pmap_pte1_mappings++;
 			}
 			continue;
 		} else if (!pte1_is_link(src_pte1))
 			continue;
 
 		src_mpt2pg = PHYS_TO_VM_PAGE(pte1_link_pa(src_pte1));
 
 		/*
 		 * We leave PT2s to be linked from PT1 even if they are not
 		 * referenced until all PT2s in a page are without reference.
 		 *
 		 * QQQ: It could be changed ...
 		 */
 #if 0 /* single_pt2_link_is_cleared */
 		KASSERT(pt2_wirecount_get(src_mpt2pg, pte1_idx) > 0,
 		    ("%s: source page table page is unused", __func__));
 #else
 		if (pt2_wirecount_get(src_mpt2pg, pte1_idx) == 0)
 			continue;
 #endif
 		if (nextva > end_addr)
 			nextva = end_addr;
 
 		src_pte2p = pt2map_entry(addr);
 		while (addr < nextva) {
 			pt2_entry_t temp_pte2;
 			temp_pte2 = pte2_load(src_pte2p);
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if (pte2_is_managed(temp_pte2)) {
 				dst_mpt2pg = pmap_allocpte2(dst_pmap, addr,
 				    PMAP_ENTER_NOSLEEP);
 				if (dst_mpt2pg == NULL)
 					goto out;
 				dst_pte2p = pmap_pte2_quick(dst_pmap, addr);
 				if (!pte2_is_valid(pte2_load(dst_pte2p)) &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(pte2_pa(temp_pte2)))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					temp_pte2 &=  ~(PTE2_W | PTE2_A);
 					temp_pte2 |= PTE2_NM;
 					pte2_store(dst_pte2p, temp_pte2);
 					dst_pmap->pm_stats.resident_count++;
 				} else {
 					SLIST_INIT(&free);
 					if (pmap_unwire_pt2(dst_pmap, addr,
 					    dst_mpt2pg, &free)) {
 						pmap_tlb_flush(dst_pmap, addr);
 						vm_page_free_pages_toq(&free,
 						    false);
 					}
 					goto out;
 				}
 				if (pt2_wirecount_get(dst_mpt2pg, pte1_idx) >=
 				    pt2_wirecount_get(src_mpt2pg, pte1_idx))
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte2p++;
 		}
 	}
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more section mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t pte1_offset;
 
 	if (size < PTE1_SIZE)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	pte1_offset = offset & PTE1_OFFSET;
 	if (size - ((PTE1_SIZE - pte1_offset) & PTE1_OFFSET) < PTE1_SIZE ||
 	    (*addr & PTE1_OFFSET) == pte1_offset)
 		return;
 	if ((*addr & PTE1_OFFSET) < pte1_offset)
 		*addr = pte1_trunc(*addr) + pte1_offset;
 	else
 		*addr = pte1_roundup(*addr) + pte1_offset;
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t pmap, oldpmap;
 	u_int cpuid, ttb;
 
 	PDEBUG(9, printf("%s: td = %08x\n", __func__, (uint32_t)td));
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 	cpuid = PCPU_GET(cpuid);
 
 #if defined(SMP)
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 #else
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 	CPU_SET(cpuid, &pmap->pm_active);
 #endif
 
 	ttb = pmap_ttb_get(pmap);
 
 	/*
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	td->td_pcb->pcb_pagedir = ttb;
 	cp15_ttbr_set(ttb);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 /*
  * Perform the pmap work for mincore(2).  If the page is not both referenced and
  * modified by this pmap, returns its physical address so that the caller can
  * find other mappings.
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 	vm_paddr_t pa;
 	bool managed;
 	int val;
 
 	PMAP_LOCK(pmap);
 	pte1p = pmap_pte1(pmap, addr);
 	pte1 = pte1_load(pte1p);
 	if (pte1_is_section(pte1)) {
 		pa = trunc_page(pte1_pa(pte1) | (addr & PTE1_OFFSET));
 		managed = pte1_is_managed(pte1);
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pte1_is_dirty(pte1))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if (pte1 & PTE1_A)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	} else if (pte1_is_link(pte1)) {
 		pte2p = pmap_pte2(pmap, addr);
 		pte2 = pte2_load(pte2p);
 		pmap_pte2_release(pte2p);
 		pa = pte2_pa(pte2);
 		managed = pte2_is_managed(pte2);
 		val = MINCORE_INCORE;
 		if (pte2_is_dirty(pte2))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if (pte2 & PTE2_A)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	} else {
 		managed = false;
 		val = 0;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		*pap = pa;
 	}
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa)
 {
 	vm_offset_t sva;
 	uint32_t l2attr;
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("%s: device mapping not page-sized", __func__));
 
 	sva = va;
 	l2attr = vm_memattr_to_pte2(VM_MEMATTR_DEVICE);
 	while (size != 0) {
 		pmap_kenter_prot_attr(va, pa, PTE2_AP_KRW, l2attr);
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 void
 pmap_kremove_device(vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t sva;
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("%s: device mapping not page-sized", __func__));
 
 	sva = va;
 	while (size != 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	tlb_flush_range(sva, va - sva);
 }
 
 void
 pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb)
 {
 
 	pcb->pcb_pagedir = pmap_ttb_get(pmap);
 }
 
 
 /*
  *  Clean L1 data cache range by physical address.
  *  The range must be within a single page.
  */
 static void
 pmap_dcache_wb_pou(vm_paddr_t pa, vm_size_t size, uint32_t attr)
 {
 	pt2_entry_t *cmap2_pte2p;
 	struct pcpu *pc;
 
 	KASSERT(((pa & PAGE_MASK) + size) <= PAGE_SIZE,
 	    ("%s: not on single page", __func__));
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, attr));
 	dcache_wb_pou((vm_offset_t)pc->pc_cmap2_addr + (pa & PAGE_MASK), size);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 /*
  *  Sync instruction cache range which is not mapped yet.
  */
 void
 cache_icache_sync_fresh(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 {
 	uint32_t len, offset;
 	vm_page_t m;
 
 	/* Write back d-cache on given address range. */
 	offset = pa & PAGE_MASK;
 	for ( ; size != 0; size -= len, pa += len, offset = 0) {
 		len = min(PAGE_SIZE - offset, size);
 		m = PHYS_TO_VM_PAGE(pa);
 		KASSERT(m != NULL, ("%s: vm_page_t is null for %#x",
 		  __func__, pa));
 		pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m));
 	}
 	/*
 	 * I-cache is VIPT. Only way how to flush all virtual mappings
 	 * on given physical address is to invalidate all i-cache.
 	 */
 	icache_inv_all();
 }
 
 void
 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t size)
 {
 
 	/* Write back d-cache on given address range. */
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		dcache_wb_pou(va, size);
 	} else {
 		uint32_t len, offset;
 		vm_paddr_t pa;
 		vm_page_t m;
 
 		offset = va & PAGE_MASK;
 		for ( ; size != 0; size -= len, va += len, offset = 0) {
 			pa = pmap_extract(pmap, va); /* offset is preserved */
 			len = min(PAGE_SIZE - offset, size);
 			m = PHYS_TO_VM_PAGE(pa);
 			KASSERT(m != NULL, ("%s: vm_page_t is null for %#x",
 				__func__, pa));
 			pmap_dcache_wb_pou(pa, len, vm_page_pte2_attr(m));
 		}
 	}
 	/*
 	 * I-cache is VIPT. Only way how to flush all virtual mappings
 	 * on given physical address is to invalidate all i-cache.
 	 */
 	icache_inv_all();
 }
 
 /*
  *  The implementation of pmap_fault() uses IN_RANGE2() macro which
  *  depends on the fact that given range size is a power of 2.
  */
 CTASSERT(powerof2(NB_IN_PT1));
 CTASSERT(powerof2(PT2MAP_SIZE));
 
 #define IN_RANGE2(addr, start, size)	\
     ((vm_offset_t)(start) == ((vm_offset_t)(addr) & ~((size) - 1)))
 
 /*
  *  Handle access and R/W emulation faults.
  */
 int
 pmap_fault(pmap_t pmap, vm_offset_t far, uint32_t fsr, int idx, bool usermode)
 {
 	pt1_entry_t *pte1p, pte1;
 	pt2_entry_t *pte2p, pte2;
 
 	if (pmap == NULL)
 		pmap = kernel_pmap;
 
 	/*
 	 * In kernel, we should never get abort with FAR which is in range of
 	 * pmap->pm_pt1 or PT2MAP address spaces. If it happens, stop here
 	 * and print out a useful abort message and even get to the debugger
 	 * otherwise it likely ends with never ending loop of aborts.
 	 */
 	if (__predict_false(IN_RANGE2(far, pmap->pm_pt1, NB_IN_PT1))) {
 		/*
 		 * All L1 tables should always be mapped and present.
 		 * However, we check only current one herein. For user mode,
 		 * only permission abort from malicious user is not fatal.
 		 * And alignment abort as it may have higher priority.
 		 */
 		if (!usermode || (idx != FAULT_ALIGN && idx != FAULT_PERM_L2)) {
 			CTR4(KTR_PMAP, "%s: pmap %#x pm_pt1 %#x far %#x",
 			    __func__, pmap, pmap->pm_pt1, far);
 			panic("%s: pm_pt1 abort", __func__);
 		}
 		return (KERN_INVALID_ADDRESS);
 	}
 	if (__predict_false(IN_RANGE2(far, PT2MAP, PT2MAP_SIZE))) {
 		/*
 		 * PT2MAP should be always mapped and present in current
 		 * L1 table. However, only existing L2 tables are mapped
 		 * in PT2MAP. For user mode, only L2 translation abort and
 		 * permission abort from malicious user is not fatal.
 		 * And alignment abort as it may have higher priority.
 		 */
 		if (!usermode || (idx != FAULT_ALIGN &&
 		    idx != FAULT_TRAN_L2 && idx != FAULT_PERM_L2)) {
 			CTR4(KTR_PMAP, "%s: pmap %#x PT2MAP %#x far %#x",
 			    __func__, pmap, PT2MAP, far);
 			panic("%s: PT2MAP abort", __func__);
 		}
 		return (KERN_INVALID_ADDRESS);
 	}
 
 	/*
 	 * A pmap lock is used below for handling of access and R/W emulation
 	 * aborts. They were handled by atomic operations before so some
 	 * analysis of new situation is needed to answer the following question:
 	 * Is it safe to use the lock even for these aborts?
 	 *
 	 * There may happen two cases in general:
 	 *
 	 * (1) Aborts while the pmap lock is locked already - this should not
 	 * happen as pmap lock is not recursive. However, under pmap lock only
 	 * internal kernel data should be accessed and such data should be
 	 * mapped with A bit set and NM bit cleared. If double abort happens,
 	 * then a mapping of data which has caused it must be fixed. Further,
 	 * all new mappings are always made with A bit set and the bit can be
 	 * cleared only on managed mappings.
 	 *
 	 * (2) Aborts while another lock(s) is/are locked - this already can
 	 * happen. However, there is no difference here if it's either access or
 	 * R/W emulation abort, or if it's some other abort.
 	 */
 
 	PMAP_LOCK(pmap);
 #ifdef INVARIANTS
 	pte1 = pte1_load(pmap_pte1(pmap, far));
 	if (pte1_is_link(pte1)) {
 		/*
 		 * Check in advance that associated L2 page table is mapped into
 		 * PT2MAP space. Note that faulty access to not mapped L2 page
 		 * table is caught in more general check above where "far" is
 		 * checked that it does not lay in PT2MAP space. Note also that
 		 * L1 page table and PT2TAB always exist and are mapped.
 		 */
 		pte2 = pt2tab_load(pmap_pt2tab_entry(pmap, far));
 		if (!pte2_is_valid(pte2))
 			panic("%s: missing L2 page table (%p, %#x)",
 			    __func__, pmap, far);
 	}
 #endif
 #ifdef SMP
 	/*
 	 * Special treatment is due to break-before-make approach done when
 	 * pte1 is updated for userland mapping during section promotion or
 	 * demotion. If not caught here, pmap_enter() can find a section
 	 * mapping on faulting address. That is not allowed.
 	 */
 	if (idx == FAULT_TRAN_L1 && usermode && cp15_ats1cur_check(far) == 0) {
 		PMAP_UNLOCK(pmap);
 		return (KERN_SUCCESS);
 	}
 #endif
 	/*
 	 * Accesss bits for page and section. Note that the entry
 	 * is not in TLB yet, so TLB flush is not necessary.
 	 *
 	 * QQQ: This is hardware emulation, we do not call userret()
 	 *      for aborts from user mode.
 	 */
 	if (idx == FAULT_ACCESS_L2) {
 		pte1 = pte1_load(pmap_pte1(pmap, far));
 		if (pte1_is_link(pte1)) {
 			/* L2 page table should exist and be mapped. */
 			pte2p = pt2map_entry(far);
 			pte2 = pte2_load(pte2p);
 			if (pte2_is_valid(pte2)) {
 				pte2_store(pte2p, pte2 | PTE2_A);
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L2 access fault but PTE1 is not a link.
 			 * Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L2 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 	if (idx == FAULT_ACCESS_L1) {
 		pte1p = pmap_pte1(pmap, far);
 		pte1 = pte1_load(pte1p);
 		if (pte1_is_section(pte1)) {
 			pte1_store(pte1p, pte1 | PTE1_A);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		} else {
 			/*
 			 * We got L1 access fault but PTE1 is not section
 			 * mapping. Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_ACCESS_L1 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 
 	/*
 	 * Handle modify bits for page and section. Note that the modify
 	 * bit is emulated by software. So PTEx_RO is software read only
 	 * bit and PTEx_NM flag is real hardware read only bit.
 	 *
 	 * QQQ: This is hardware emulation, we do not call userret()
 	 *      for aborts from user mode.
 	 */
 	if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L2)) {
 		pte1 = pte1_load(pmap_pte1(pmap, far));
 		if (pte1_is_link(pte1)) {
 			/* L2 page table should exist and be mapped. */
 			pte2p = pt2map_entry(far);
 			pte2 = pte2_load(pte2p);
 			if (pte2_is_valid(pte2) && !(pte2 & PTE2_RO) &&
 			    (pte2 & PTE2_NM)) {
 				pte2_store(pte2p, pte2 & ~PTE2_NM);
 				tlb_flush(trunc_page(far));
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L2 permission fault but PTE1 is not a link.
 			 * Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_PERM_L2 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 	if ((fsr & FSR_WNR) && (idx == FAULT_PERM_L1)) {
 		pte1p = pmap_pte1(pmap, far);
 		pte1 = pte1_load(pte1p);
 		if (pte1_is_section(pte1)) {
 			if (!(pte1 & PTE1_RO) && (pte1 & PTE1_NM)) {
 				pte1_store(pte1p, pte1 & ~PTE1_NM);
 				tlb_flush(pte1_trunc(far));
 				PMAP_UNLOCK(pmap);
 				return (KERN_SUCCESS);
 			}
 		} else {
 			/*
 			 * We got L1 permission fault but PTE1 is not section
 			 * mapping. Probably some race happened, do nothing.
 			 */
 			CTR3(KTR_PMAP, "%s: FAULT_PERM_L1 - pmap %#x far %#x",
 			    __func__, pmap, far);
 			PMAP_UNLOCK(pmap);
 			return (KERN_SUCCESS);
 		}
 	}
 
 	/*
 	 * QQQ: The previous code, mainly fast handling of access and
 	 *      modify bits aborts, could be moved to ASM. Now we are
 	 *      starting to deal with not fast aborts.
 	 */
 	PMAP_UNLOCK(pmap);
 	return (KERN_FAILURE);
 }
 
 #if defined(PMAP_DEBUG)
 /*
  *  Reusing of KVA used in pmap_zero_page function !!!
  */
 static void
 pmap_zero_page_check(vm_page_t m)
 {
 	pt2_entry_t *cmap2_pte2p;
 	uint32_t *p, *end;
 	struct pcpu *pc;
 
 	sched_pin();
 	pc = get_pcpu();
 	cmap2_pte2p = pc->pc_cmap2_pte2p;
 	mtx_lock(&pc->pc_cmap_lock);
 	if (pte2_load(cmap2_pte2p) != 0)
 		panic("%s: CMAP2 busy", __func__);
 	pte2_store(cmap2_pte2p, PTE2_KERN_NG(VM_PAGE_TO_PHYS(m), PTE2_AP_KRW,
 	    vm_page_pte2_attr(m)));
 	end = (uint32_t*)(pc->pc_cmap2_addr + PAGE_SIZE);
 	for (p = (uint32_t*)pc->pc_cmap2_addr; p < end; p++)
 		if (*p != 0)
 			panic("%s: page %p not zero, va: %p", __func__, m,
 			    pc->pc_cmap2_addr);
 	pte2_clear(cmap2_pte2p);
 	tlb_flush((vm_offset_t)pc->pc_cmap2_addr);
 	sched_unpin();
 	mtx_unlock(&pc->pc_cmap_lock);
 }
 
 int
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte2 = 0;
 	int i, j, index;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_pid != pid || p->p_vmspace == NULL)
 			continue;
 		index = 0;
 		pmap = vmspace_pmap(p->p_vmspace);
 		for (i = 0; i < NPTE1_IN_PT1; i++) {
 			pt1_entry_t pte1;
 			pt2_entry_t *pte2p, pte2;
 			vm_offset_t base, va;
 			vm_paddr_t pa;
 			vm_page_t m;
 
 			base = i << PTE1_SHIFT;
 			pte1 = pte1_load(&pmap->pm_pt1[i]);
 
 			if (pte1_is_section(pte1)) {
 				/*
 				 * QQQ: Do something here!
 				 */
 			} else if (pte1_is_link(pte1)) {
 				for (j = 0; j < NPTE2_IN_PT2; j++) {
 					va = base + (j << PAGE_SHIFT);
 					if (va >= VM_MIN_KERNEL_ADDRESS) {
 						if (index) {
 							index = 0;
 							printf("\n");
 						}
 						sx_sunlock(&allproc_lock);
 						return (npte2);
 					}
 					pte2p = pmap_pte2(pmap, va);
 					pte2 = pte2_load(pte2p);
 					pmap_pte2_release(pte2p);
 					if (!pte2_is_valid(pte2))
 						continue;
 
 					pa = pte2_pa(pte2);
 					m = PHYS_TO_VM_PAGE(pa);
 					printf("va: 0x%x, pa: 0x%x, w: %d, "
 					    "f: 0x%x", va, pa,
 					    m->ref_count, m->flags);
 					npte2++;
 					index++;
 					if (index >= 2) {
 						index = 0;
 						printf("\n");
 					} else {
 						printf(" ");
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return (npte2);
 }
 
 #endif
 
 #ifdef DDB
 static pt2_entry_t *
 pmap_pte2_ddb(pmap_t pmap, vm_offset_t va)
 {
 	pt1_entry_t pte1;
 	vm_paddr_t pt2pg_pa;
 
 	pte1 = pte1_load(pmap_pte1(pmap, va));
 	if (!pte1_is_link(pte1))
 		return (NULL);
 
 	if (pmap_is_current(pmap))
 		return (pt2map_entry(va));
 
 	/* Note that L2 page table size is not equal to PAGE_SIZE. */
 	pt2pg_pa = trunc_page(pte1_link_pa(pte1));
 	if (pte2_pa(pte2_load(PMAP3)) != pt2pg_pa) {
 		pte2_store(PMAP3, PTE2_KPT(pt2pg_pa));
 #ifdef SMP
 		PMAP3cpu = PCPU_GET(cpuid);
 #endif
 		tlb_flush_local((vm_offset_t)PADDR3);
 	}
 #ifdef SMP
 	else if (PMAP3cpu != PCPU_GET(cpuid)) {
 		PMAP3cpu = PCPU_GET(cpuid);
 		tlb_flush_local((vm_offset_t)PADDR3);
 	}
 #endif
 	return (PADDR3 + (arm32_btop(va) & (NPTE2_IN_PG - 1)));
 }
 
 static void
 dump_pmap(pmap_t pmap)
 {
 
 	printf("pmap %p\n", pmap);
 	printf("  pm_pt1: %p\n", pmap->pm_pt1);
 	printf("  pm_pt2tab: %p\n", pmap->pm_pt2tab);
 	printf("  pm_active: 0x%08lX\n", pmap->pm_active.__bits[0]);
 }
 
 DB_SHOW_COMMAND(pmaps, pmap_list_pmaps)
 {
 
 	pmap_t pmap;
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		dump_pmap(pmap);
 	}
 }
 
 static int
 pte2_class(pt2_entry_t pte2)
 {
 	int cls;
 
 	cls = (pte2 >> 2) & 0x03;
 	cls |= (pte2 >> 4) & 0x04;
 	return (cls);
 }
 
 static void
 dump_section(pmap_t pmap, uint32_t pte1_idx)
 {
 }
 
 static void
 dump_link(pmap_t pmap, uint32_t pte1_idx, boolean_t invalid_ok)
 {
 	uint32_t i;
 	vm_offset_t va;
 	pt2_entry_t *pte2p, pte2;
 	vm_page_t m;
 
 	va = pte1_idx << PTE1_SHIFT;
 	pte2p = pmap_pte2_ddb(pmap, va);
 	for (i = 0; i < NPTE2_IN_PT2; i++, pte2p++, va += PAGE_SIZE) {
 		pte2 = pte2_load(pte2p);
 		if (pte2 == 0)
 			continue;
 		if (!pte2_is_valid(pte2)) {
 			printf(" 0x%08X: 0x%08X", va, pte2);
 			if (!invalid_ok)
 				printf(" - not valid !!!");
 			printf("\n");
 			continue;
 		}
 		m = PHYS_TO_VM_PAGE(pte2_pa(pte2));
 		printf(" 0x%08X: 0x%08X, TEX%d, s:%d, g:%d, m:%p", va , pte2,
 		    pte2_class(pte2), !!(pte2 & PTE2_S), !(pte2 & PTE2_NG), m);
 		if (m != NULL) {
 			printf(" v:%d w:%d f:0x%04X\n", m->valid,
 			    m->ref_count, m->flags);
 		} else {
 			printf("\n");
 		}
 	}
 }
 
 static __inline boolean_t
 is_pv_chunk_space(vm_offset_t va)
 {
 
 	if ((((vm_offset_t)pv_chunkbase) <= va) &&
 	    (va < ((vm_offset_t)pv_chunkbase + PAGE_SIZE * pv_maxchunks)))
 		return (TRUE);
 	return (FALSE);
 }
 
 DB_SHOW_COMMAND(pmap, pmap_pmap_print)
 {
 	/* XXX convert args. */
 	pmap_t pmap = (pmap_t)addr;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	vm_offset_t va, eva;
 	vm_page_t m;
 	uint32_t i;
 	boolean_t invalid_ok, dump_link_ok, dump_pv_chunk;
 
 	if (have_addr) {
 		pmap_t pm;
 
 		LIST_FOREACH(pm, &allpmaps, pm_list)
 			if (pm == pmap) break;
 		if (pm == NULL) {
 			printf("given pmap %p is not in allpmaps list\n", pmap);
 			return;
 		}
 	} else
 		pmap = PCPU_GET(curpmap);
 
 	eva = (modif[0] == 'u') ? VM_MAXUSER_ADDRESS : 0xFFFFFFFF;
 	dump_pv_chunk = FALSE; /* XXX evaluate from modif[] */
 
 	printf("pmap: 0x%08X\n", (uint32_t)pmap);
 	printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP);
 	printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab);
 
 	for(i = 0; i < NPTE1_IN_PT1; i++) {
 		pte1 = pte1_load(&pmap->pm_pt1[i]);
 		if (pte1 == 0)
 			continue;
 		va = i << PTE1_SHIFT;
 		if (va >= eva)
 			break;
 
 		if (pte1_is_section(pte1)) {
 			printf("0x%08X: Section 0x%08X, s:%d g:%d\n", va, pte1,
 			    !!(pte1 & PTE1_S), !(pte1 & PTE1_NG));
 			dump_section(pmap, i);
 		} else if (pte1_is_link(pte1)) {
 			dump_link_ok = TRUE;
 			invalid_ok = FALSE;
 			pte2 = pte2_load(pmap_pt2tab_entry(pmap, va));
 			m = PHYS_TO_VM_PAGE(pte1_link_pa(pte1));
 			printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X m: %p",
 			    va, pte1, pte2, m);
 			if (is_pv_chunk_space(va)) {
 				printf(" - pv_chunk space");
 				if (dump_pv_chunk)
 					invalid_ok = TRUE;
 				else
 					dump_link_ok = FALSE;
 			}
 			else if (m != NULL)
 				printf(" w:%d w2:%u", m->ref_count,
 				    pt2_wirecount_get(m, pte1_index(va)));
 			if (pte2 == 0)
 				printf(" !!! pt2tab entry is ZERO");
 			else if (pte2_pa(pte1) != pte2_pa(pte2))
 				printf(" !!! pt2tab entry is DIFFERENT - m: %p",
 				    PHYS_TO_VM_PAGE(pte2_pa(pte2)));
 			printf("\n");
 			if (dump_link_ok)
 				dump_link(pmap, i, invalid_ok);
 		} else
 			printf("0x%08X: Invalid entry 0x%08X\n", va, pte1);
 	}
 }
 
 static void
 dump_pt2tab(pmap_t pmap)
 {
 	uint32_t i;
 	pt2_entry_t pte2;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	printf("PT2TAB:\n");
 	for (i = 0; i < PT2TAB_ENTRIES; i++) {
 		pte2 = pte2_load(&pmap->pm_pt2tab[i]);
 		if (!pte2_is_valid(pte2))
 			continue;
 		va = i << PT2TAB_SHIFT;
 		pa = pte2_pa(pte2);
 		m = PHYS_TO_VM_PAGE(pa);
 		printf(" 0x%08X: 0x%08X, TEX%d, s:%d, m:%p", va, pte2,
 		    pte2_class(pte2), !!(pte2 & PTE2_S), m);
 		if (m != NULL)
 			printf(" , w: %d, f: 0x%04X pidx: %lld",
 			    m->ref_count, m->flags, m->pindex);
 		printf("\n");
 	}
 }
 
 DB_SHOW_COMMAND(pmap_pt2tab, pmap_pt2tab_print)
 {
 	/* XXX convert args. */
 	pmap_t pmap = (pmap_t)addr;
 	pt1_entry_t pte1;
 	pt2_entry_t pte2;
 	vm_offset_t va;
 	uint32_t i, start;
 
 	if (have_addr) {
 		printf("supported only on current pmap\n");
 		return;
 	}
 
 	pmap = PCPU_GET(curpmap);
 	printf("curpmap: 0x%08X\n", (uint32_t)pmap);
 	printf("PT2MAP: 0x%08X\n", (uint32_t)PT2MAP);
 	printf("pt2tab: 0x%08X\n", (uint32_t)pmap->pm_pt2tab);
 
 	start = pte1_index((vm_offset_t)PT2MAP);
 	for (i = start; i < (start + NPT2_IN_PT2TAB); i++) {
 		pte1 = pte1_load(&pmap->pm_pt1[i]);
 		if (pte1 == 0)
 			continue;
 		va = i << PTE1_SHIFT;
 		if (pte1_is_section(pte1)) {
 			printf("0x%08X: Section 0x%08X, s:%d\n", va, pte1,
 			    !!(pte1 & PTE1_S));
 			dump_section(pmap, i);
 		} else if (pte1_is_link(pte1)) {
 			pte2 = pte2_load(pmap_pt2tab_entry(pmap, va));
 			printf("0x%08X: Link 0x%08X, pt2tab: 0x%08X\n", va,
 			    pte1, pte2);
 			if (pte2 == 0)
 				printf("  !!! pt2tab entry is ZERO\n");
 		} else
 			printf("0x%08X: Invalid entry 0x%08X\n", va, pte1);
 	}
 	dump_pt2tab(pmap);
 }
 #endif
Index: head/sys/arm/include/physmem.h
===================================================================
--- head/sys/arm/include/physmem.h	(revision 360081)
+++ head/sys/arm/include/physmem.h	(nonexistent)
@@ -1,94 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef	_MACHINE_PHYSMEM_H_
-#define	_MACHINE_PHYSMEM_H_
-
-/*
- * The physical address at which the kernel was loaded.
- */
-extern vm_paddr_t arm_physmem_kernaddr;
-
-/*
- * Routines to help configure physical ram.
- *
- * Multiple regions of contiguous physical ram can be added (in any order).
- *
- * Multiple regions of physical ram that should be excluded from crash dumps, or
- * memory allocation, or both, can be added (in any order).
- *
- * After all early kernel init is done and it's time to configure all
- * remainining non-excluded physical ram for use by other parts of the kernel,
- * arm_physmem_init_kernel_globals() processes the hardware regions and
- * exclusion regions to generate the global dump_avail and phys_avail arrays
- * that communicate physical ram configuration to other parts of the kernel.
- */
-
-#define	EXFLAG_NODUMP	0x01
-#define	EXFLAG_NOALLOC	0x02
-
-void arm_physmem_hardware_region(uint64_t pa, uint64_t sz);
-void arm_physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t flags);
-size_t arm_physmem_avail(vm_paddr_t *avail, size_t maxavail);
-void arm_physmem_init_kernel_globals(void);
-void arm_physmem_print_tables(void);
-
-/*
- * Convenience routines for FDT.
- */
-
-#ifdef FDT
-
-#include <machine/ofw_machdep.h>
-
-static inline void
-arm_physmem_hardware_regions(struct mem_region * mrptr, int mrcount)
-{
-	while (mrcount--) {
-		arm_physmem_hardware_region(mrptr->mr_start, mrptr->mr_size);
-		++mrptr;
-	}
-}
-
-static inline void
-arm_physmem_exclude_regions(struct mem_region * mrptr, int mrcount,
-    uint32_t exflags)
-{
-	while (mrcount--) {
-		arm_physmem_exclude_region(mrptr->mr_start, mrptr->mr_size,
-		    exflags);
-		++mrptr;
-	}
-}
-
-#endif /* FDT */
-
-#endif
-

Property changes on: head/sys/arm/include/physmem.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/arm/include/md_var.h
===================================================================
--- head/sys/arm/include/md_var.h	(revision 360081)
+++ head/sys/arm/include/md_var.h	(revision 360082)
@@ -1,80 +1,81 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1995 Bruce D. Evans.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_MD_VAR_H_
 #define	_MACHINE_MD_VAR_H_
 
 extern long Maxmem;
 extern char sigcode[];
 extern int szsigcode;
 extern uint32_t *vm_page_dump;
 extern int vm_page_dump_size;
 extern u_long elf_hwcap;
 extern u_long elf_hwcap2;
+extern vm_paddr_t arm_physmem_kernaddr;
 
 extern int (*_arm_memcpy)(void *, void *, int, int);
 extern int (*_arm_bzero)(void *, int, int);
 
 extern int _min_memcpy_size;
 extern int _min_bzero_size;
 
 #define DST_IS_USER	0x1
 #define SRC_IS_USER	0x2
 #define IS_PHYSICAL	0x4
 
 enum cpu_class {
 	CPU_CLASS_NONE,
 	CPU_CLASS_ARM9TDMI,
 	CPU_CLASS_ARM9ES,
 	CPU_CLASS_ARM9EJS,
 	CPU_CLASS_ARM10E,
 	CPU_CLASS_ARM10EJ,
 	CPU_CLASS_CORTEXA,
 	CPU_CLASS_KRAIT,
 	CPU_CLASS_XSCALE,
 	CPU_CLASS_ARM11J,
 	CPU_CLASS_MARVELL
 };
 extern enum cpu_class cpu_class;
 
 struct dumperinfo;
 extern int busdma_swi_pending;
 void busdma_swi(void);
 void dump_add_page(vm_paddr_t);
 void dump_drop_page(vm_paddr_t);
 int minidumpsys(struct dumperinfo *);
 
 extern uint32_t initial_fpscr;
 
 #endif /* !_MACHINE_MD_VAR_H_ */
Index: head/sys/arm64/arm64/machdep.c
===================================================================
--- head/sys/arm64/arm64/machdep.c	(revision 360081)
+++ head/sys/arm64/arm64/machdep.c	(revision 360082)
@@ -1,1298 +1,1297 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include "opt_acpi.h"
 #include "opt_platform.h"
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/csan.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
+#include <sys/physmem.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vdso.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/debug_monitor.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/reg.h>
 #include <machine/undefined.h>
 #include <machine/vmparam.h>
 
-#include <arm/include/physmem.h>
-
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef DEV_ACPI
 #include <contrib/dev/acpica/include/acpi.h>
 #include <machine/acpica_machdep.h>
 #endif
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static void set_fpcontext(struct thread *td, mcontext_t *mcp);
 
 enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
 
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 int early_boot = 1;
 int cold = 1;
 static int boot_el;
 
 struct kva_md_info kmi;
 
 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
 int has_pan;
 
 /*
  * Physical address of the EFI System Table. Stashed from the metadata hints
  * passed into the kernel and used by the EFI code to call runtime services.
  */
 vm_paddr_t efi_systbl_phys;
 
 /* pagezero_* implementations are provided in support.S */
 void pagezero_simple(void *);
 void pagezero_cache(void *);
 
 /* pagezero_simple is default pagezero */
 void (*pagezero)(void *p) = pagezero_simple;
 
 static void
 pan_setup(void)
 {
 	uint64_t id_aa64mfr1;
 
 	id_aa64mfr1 = READ_SPECIALREG(id_aa64mmfr1_el1);
 	if (ID_AA64MMFR1_PAN_VAL(id_aa64mfr1) != ID_AA64MMFR1_PAN_NONE)
 		has_pan = 1;
 }
 
 void
 pan_enable(void)
 {
 
 	/*
 	 * The LLVM integrated assembler doesn't understand the PAN
 	 * PSTATE field. Because of this we need to manually create
 	 * the instruction in an asm block. This is equivalent to:
 	 * msr pan, #1
 	 *
 	 * This sets the PAN bit, stopping the kernel from accessing
 	 * memory when userspace can also access it unless the kernel
 	 * uses the userspace load/store instructions.
 	 */
 	if (has_pan) {
 		WRITE_SPECIALREG(sctlr_el1,
 		    READ_SPECIALREG(sctlr_el1) & ~SCTLR_SPAN);
 		__asm __volatile(".inst 0xd500409f | (0x1 << 8)");
 	}
 }
 
 bool
 has_hyp(void)
 {
 
 	return (boot_el == 2);
 }
 
 static void
 cpu_startup(void *dummy)
 {
 
 	undef_init();
 	identify_cpu();
 	install_cpu_errata();
 
 	vm_ksubmap_init(&kmi);
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sp = frame->tf_sp;
 	regs->lr = frame->tf_lr;
 	regs->elr = frame->tf_elr;
 	regs->spsr = frame->tf_spsr;
 
 	memcpy(regs->x, frame->tf_x, sizeof(regs->x));
 
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * We may be called here for a 32bits process, if we're using a
 	 * 64bits debugger. If so, put PC and SPSR where it expects it.
 	 */
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		regs->x[15] = frame->tf_elr;
 		regs->x[16] = frame->tf_spsr;
 	}
 #endif
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sp = regs->sp;
 	frame->tf_lr = regs->lr;
 	frame->tf_elr = regs->elr;
 	frame->tf_spsr &= ~PSR_FLAGS;
 	frame->tf_spsr |= regs->spsr & PSR_FLAGS;
 
 	memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x));
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		/*
 		 * We may be called for a 32bits process if we're using
 		 * a 64bits debugger. If so, get PC and SPSR from where
 		 * it put it.
 		 */
 		frame->tf_elr = regs->x[15];
 		frame->tf_spsr = regs->x[16] & PSR_FLAGS;
 	}
 #endif
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		if (td == curthread)
 			vfp_save_state(td, pcb);
 
 		KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 		    ("Called fill_fpregs while the kernel is using the VFP"));
 		memcpy(regs->fp_q, pcb->pcb_fpustate.vfp_regs,
 		    sizeof(regs->fp_q));
 		regs->fp_cr = pcb->pcb_fpustate.vfp_fpcr;
 		regs->fp_sr = pcb->pcb_fpustate.vfp_fpsr;
 	} else
 #endif
 		memset(regs, 0, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
 	    ("Called set_fpregs while the kernel is using the VFP"));
 	memcpy(pcb->pcb_fpustate.vfp_regs, regs->fp_q, sizeof(regs->fp_q));
 	pcb->pcb_fpustate.vfp_fpcr = regs->fp_cr;
 	pcb->pcb_fpustate.vfp_fpsr = regs->fp_sr;
 #endif
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	int count, i;
 	uint8_t debug_ver, nbkpts;
 
 	memset(regs, 0, sizeof(*regs));
 
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_DebugVer_SHIFT,
 	    &debug_ver);
 	extract_user_id_field(ID_AA64DFR0_EL1, ID_AA64DFR0_BRPs_SHIFT,
 	    &nbkpts);
 
 	/*
 	 * The BRPs field contains the number of breakpoints - 1. Armv8-A
 	 * allows the hardware to provide 2-16 breakpoints so this won't
 	 * overflow an 8 bit value.
 	 */
 	count = nbkpts + 1;
 
 	regs->db_info = debug_ver;
 	regs->db_info <<= 8;
 	regs->db_info |= count;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	if ((monitor->dbg_flags & DBGMON_ENABLED) != 0) {
 		for (i = 0; i < count; i++) {
 			regs->db_regs[i].dbr_addr = monitor->dbg_bvr[i];
 			regs->db_regs[i].dbr_ctrl = monitor->dbg_bcr[i];
 		}
 	}
 
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 	struct debug_monitor_state *monitor;
 	int count;
 	int i;
 
 	monitor = &td->td_pcb->pcb_dbg_regs;
 	count = 0;
 	monitor->dbg_enable_count = 0;
 	for (i = 0; i < DBG_BRP_MAX; i++) {
 		/* TODO: Check these values */
 		monitor->dbg_bvr[i] = regs->db_regs[i].dbr_addr;
 		monitor->dbg_bcr[i] = regs->db_regs[i].dbr_ctrl;
 		if ((monitor->dbg_bcr[i] & 1) != 0)
 			monitor->dbg_enable_count++;
 	}
 	if (monitor->dbg_enable_count > 0)
 		monitor->dbg_flags |= DBGMON_ENABLED;
 
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD32
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		regs->r[i] = tf->tf_x[i];
 	/* For arm32, SP is r13 and LR is r14 */
 	regs->r_sp = tf->tf_x[13];
 	regs->r_lr = tf->tf_x[14];
 	regs->r_pc = tf->tf_elr;
 	regs->r_cpsr = tf->tf_spsr;
 
 	return (0);
 }
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	int i;
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	for (i = 0; i < 13; i++)
 		tf->tf_x[i] = regs->r[i];
 	/* For arm 32, SP is r13 an LR is r14 */
 	tf->tf_x[13] = regs->r_sp;
 	tf->tf_x[14] = regs->r_lr;
 	tf->tf_elr = regs->r_pc;
 	tf->tf_spsr = regs->r_cpsr;
 
 
 	return (0);
 }
 
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	printf("ARM64TODO: fill_fpregs32");
 	return (EDOOFUS);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *regs)
 {
 
 	printf("ARM64TODO: set_fpregs32");
 	return (EDOOFUS);
 }
 
 int
 fill_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	printf("ARM64TODO: fill_dbregs32");
 	return (EDOOFUS);
 }
 
 int
 set_dbregs32(struct thread *td, struct dbreg32 *regs)
 {
 
 	printf("ARM64TODO: set_dbregs32");
 	return (EDOOFUS);
 }
 #endif
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_elr = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr |= PSR_SS;
 	td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr &= ~PSR_SS;
 	td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 	return (0);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, uintptr_t stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	tf->tf_x[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_lr = imgp->entry_addr;
 	tf->tf_elr = imgp->entry_addr;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct gpregs *)0)->gp_x);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct reg *)0)->x);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_x[0] = 0;
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gpregs.gp_x[0] = tf->tf_x[0];
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr;
 	}
 
 	memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1],
 	    sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1));
 
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_lr = tf->tf_lr;
 	mcp->mc_gpregs.gp_elr = tf->tf_elr;
 	get_fpcontext(td, mcp);
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf = td->td_frame;
 	uint32_t spsr;
 
 	spsr = mcp->mc_gpregs.gp_spsr;
 	if ((spsr & PSR_M_MASK) != PSR_M_EL0t ||
 	    (spsr & PSR_AARCH32) != 0 ||
 	    (spsr & PSR_DAIF) != (td->td_frame->tf_spsr & PSR_DAIF))
 		return (EINVAL); 
 
 	memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x));
 
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_lr = mcp->mc_gpregs.gp_lr;
 	tf->tf_elr = mcp->mc_gpregs.gp_elr;
 	tf->tf_spsr = mcp->mc_gpregs.gp_spsr;
 	set_fpcontext(td, mcp);
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		vfp_save_state(td, curpcb);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called get_fpcontext while the kernel is using the VFP"));
 		KASSERT((curpcb->pcb_fpflags & ~PCB_FP_USERMASK) == 0,
 		    ("Non-userspace FPU flags set in get_fpcontext"));
 		memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_fpustate.vfp_regs,
 		    sizeof(mcp->mc_fpregs));
 		mcp->mc_fpregs.fp_cr = curpcb->pcb_fpustate.vfp_fpcr;
 		mcp->mc_fpregs.fp_sr = curpcb->pcb_fpustate.vfp_fpsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 
 		/*
 		 * Discard any vfp state for the current thread, we
 		 * are about to override it.
 		 */
 		vfp_discard(td);
 
 		KASSERT(curpcb->pcb_fpusaved == &curpcb->pcb_fpustate,
 		    ("Called set_fpcontext while the kernel is using the VFP"));
 		memcpy(curpcb->pcb_fpustate.vfp_regs, mcp->mc_fpregs.fp_q,
 		    sizeof(mcp->mc_fpregs));
 		curpcb->pcb_fpustate.vfp_fpcr = mcp->mc_fpregs.fp_cr;
 		curpcb->pcb_fpustate.vfp_fpsr = mcp->mc_fpregs.fp_sr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags & PCB_FP_USERMASK;
 	}
 
 	critical_exit();
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "dsb sy \n"
 		    "wfi    \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	/* We should have shutdown by now, if not enter a low power sleep */
 	intr_disable();
 	while (1) {
 		__asm __volatile("wfi");
 	}
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* ARM64TODO TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	struct pcpu *pc;
 
 	pc = pcpu_find(cpu_id);
 	if (pc == NULL || rate == NULL)
 		return (EINVAL);
 
 	if (pc->pc_clock == 0)
 		return (EOPNOTSUPP);
 
 	*rate = pc->pc_clock;
 	return (0);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		daif = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_daif = daif;
 		critical_enter();
 	} else
 		td->td_md.md_spinlock_count++;
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	daif = td->td_md.md_saved_daif;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0) {
 		critical_exit();
 		intr_restore(daif);
 	}
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	int error;
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	int i;
 
 	for (i = 0; i < PCB_LR; i++)
 		pcb->pcb_x[i] = tf->tf_x[i];
 
 	pcb->pcb_x[PCB_LR] = tf->tf_lr;
 	pcb->pcb_pc = tf->tf_elr;
 	pcb->pcb_sp = tf->tf_sp;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int onstack, sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	bzero(&frame, sizeof(frame));
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) != 0 ?
 	    (onstack ? SS_ONSTACK : 0) : SS_DISABLE;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_x[0]= sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_elr = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 	struct pcpu *pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
 	thread0.td_pcb->pcb_fpusaved = &thread0.td_pcb->pcb_fpustate;
 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 typedef struct {
 	uint32_t type;
 	uint64_t phys_start;
 	uint64_t virt_start;
 	uint64_t num_pages;
 	uint64_t attr;
 } EFI_MEMORY_DESCRIPTOR;
 
 typedef void (*efi_map_entry_cb)(struct efi_md *);
 
 static void
 foreach_efi_map_entry(struct efi_map_header *efihdr, efi_map_entry_cb cb)
 {
 	struct efi_md *map, *p;
 	size_t efisz;
 	int ndesc, i;
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz); 
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	for (i = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		cb(p);
 	}
 }
 
 static void
 exclude_efi_map_entry(struct efi_md *p)
 {
 
 	switch (p->md_type) {
 	case EFI_MD_TYPE_CODE:
 	case EFI_MD_TYPE_DATA:
 	case EFI_MD_TYPE_BS_CODE:
 	case EFI_MD_TYPE_BS_DATA:
 	case EFI_MD_TYPE_FREE:
 		/*
 		 * We're allowed to use any entry with these types.
 		 */
 		break;
 	default:
-		arm_physmem_exclude_region(p->md_phys, p->md_pages * PAGE_SIZE,
+		physmem_exclude_region(p->md_phys, p->md_pages * PAGE_SIZE,
 		    EXFLAG_NOALLOC);
 	}
 }
 
 static void
 exclude_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	foreach_efi_map_entry(efihdr, exclude_efi_map_entry);
 }
 
 static void
 add_efi_map_entry(struct efi_md *p)
 {
 
 	switch (p->md_type) {
 	case EFI_MD_TYPE_RT_DATA:
 		/*
 		 * Runtime data will be excluded after the DMAP
 		 * region is created to stop it from being added
 		 * to phys_avail.
 		 */
 	case EFI_MD_TYPE_CODE:
 	case EFI_MD_TYPE_DATA:
 	case EFI_MD_TYPE_BS_CODE:
 	case EFI_MD_TYPE_BS_DATA:
 	case EFI_MD_TYPE_FREE:
 		/*
 		 * We're allowed to use any entry with these types.
 		 */
-		arm_physmem_hardware_region(p->md_phys,
+		physmem_hardware_region(p->md_phys,
 		    p->md_pages * PAGE_SIZE);
 		break;
 	}
 }
 
 static void
 add_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	foreach_efi_map_entry(efihdr, add_efi_map_entry);
 }
 
 static void
 print_efi_map_entry(struct efi_md *p)
 {
 	const char *type;
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode",
 		"PersistentMemory"
 	};
 
 	if (p->md_type < nitems(types))
 		type = types[p->md_type];
 	else
 		type = "<INVALID>";
 	printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 	    p->md_virt, p->md_pages);
 	if (p->md_attr & EFI_MD_ATTR_UC)
 		printf("UC ");
 	if (p->md_attr & EFI_MD_ATTR_WC)
 		printf("WC ");
 	if (p->md_attr & EFI_MD_ATTR_WT)
 		printf("WT ");
 	if (p->md_attr & EFI_MD_ATTR_WB)
 		printf("WB ");
 	if (p->md_attr & EFI_MD_ATTR_UCE)
 		printf("UCE ");
 	if (p->md_attr & EFI_MD_ATTR_WP)
 		printf("WP ");
 	if (p->md_attr & EFI_MD_ATTR_RP)
 		printf("RP ");
 	if (p->md_attr & EFI_MD_ATTR_XP)
 		printf("XP ");
 	if (p->md_attr & EFI_MD_ATTR_NV)
 		printf("NV ");
 	if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 		printf("MORE_RELIABLE ");
 	if (p->md_attr & EFI_MD_ATTR_RO)
 		printf("RO ");
 	if (p->md_attr & EFI_MD_ATTR_RT)
 		printf("RUNTIME");
 	printf("\n");
 }
 
 static void
 print_efi_map_entries(struct efi_map_header *efihdr)
 {
 
 	printf("%23s %12s %12s %8s %4s\n",
 	    "Type", "Physical", "Virtual", "#Pages", "Attr");
 	foreach_efi_map_entry(efihdr, print_efi_map_entry);
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp)
 {
 	vm_offset_t dtbp;
 
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == 0)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (dtbp == (vm_offset_t)NULL) {
 		printf("ERROR loading DTB\n");
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 	parse_fdt_bootargs();
 }
 #endif
 
 static bool
 bus_probe(void)
 {
 	bool has_acpi, has_fdt;
 	char *order, *env;
 
 	has_acpi = has_fdt = false;
 
 #ifdef FDT
 	has_fdt = (OF_peer(0) != 0);
 #endif
 #ifdef DEV_ACPI
 	has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0);
 #endif
 
 	env = kern_getenv("kern.cfg.order");
 	if (env != NULL) {
 		order = env;
 		while (order != NULL) {
 			if (has_acpi &&
 			    strncmp(order, "acpi", 4) == 0 &&
 			    (order[4] == ',' || order[4] == '\0')) {
 				arm64_bus_method = ARM64_BUS_ACPI;
 				break;
 			}
 			if (has_fdt &&
 			    strncmp(order, "fdt", 3) == 0 &&
 			    (order[3] == ',' || order[3] == '\0')) {
 				arm64_bus_method = ARM64_BUS_FDT;
 				break;
 			}
 			order = strchr(order, ',');
 		}
 		freeenv(env);
 
 		/* If we set the bus method it is valid */
 		if (arm64_bus_method != ARM64_BUS_NONE)
 			return (true);
 	}
 	/* If no order or an invalid order was set use the default */
 	if (arm64_bus_method == ARM64_BUS_NONE) {
 		if (has_fdt)
 			arm64_bus_method = ARM64_BUS_FDT;
 		else if (has_acpi)
 			arm64_bus_method = ARM64_BUS_ACPI;
 	}
 
 	/*
 	 * If no option was set the default is valid, otherwise we are
 	 * setting one to get cninit() working, then calling panic to tell
 	 * the user about the invalid bus setup.
 	 */
 	return (env == NULL);
 }
 
 static void
 cache_setup(void)
 {
 	int dczva_line_shift;
 	uint32_t dczid_el0;
 
 	identify_cache(READ_SPECIALREG(ctr_el0));
 
 	dczid_el0 = READ_SPECIALREG(dczid_el0);
 
 	/* Check if dc zva is not prohibited */
 	if (dczid_el0 & DCZID_DZP)
 		dczva_line_size = 0;
 	else {
 		/* Same as with above calculations */
 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
 		dczva_line_size = sizeof(int) << dczva_line_shift;
 
 		/* Change pagezero function */
 		pagezero = pagezero_cache;
 	}
 }
 
 void
 initarm(struct arm64_bootparams *abp)
 {
 	struct efi_fb *efifb;
 	struct efi_map_header *efihdr;
 	struct pcpu *pcpup;
 	char *env;
 #ifdef FDT
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	int mem_regions_sz;
 #endif
 	vm_offset_t lastaddr;
 	caddr_t kmdp;
 	bool valid;
 
 	boot_el = abp->boot_el;
 
 	/* Parse loader or FDT boot parametes. Determine last used address. */
 	lastaddr = parse_boot_param(abp);
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
 	link_elf_ireloc(kmdp);
 	try_load_dtb(kmdp);
 
 	efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t);
 
 	/* Load the physical memory ranges */
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL)
 		add_efi_map_entries(efihdr);
 #ifdef FDT
 	else {
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
 		    NULL) != 0)
 			panic("Cannot get physical memory regions");
-		arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
+		physmem_hardware_regions(mem_regions, mem_regions_sz);
 	}
 	if (fdt_get_reserved_mem(mem_regions, &mem_regions_sz) == 0)
-		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
+		physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 #endif
 
 	/* Exclude the EFI framebuffer from our view of physical memory. */
 	efifb = (struct efi_fb *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_FB);
 	if (efifb != NULL)
-		arm_physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
+		physmem_exclude_region(efifb->fb_addr, efifb->fb_size,
 		    EXFLAG_NOALLOC);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 	pan_setup();
 
 	/* Bootstrap enough of pmap  to enter the kernel proper */
 	pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
 	    KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
 	/* Exclude entries neexed in teh DMAP region, but not phys_avail */
 	if (efihdr != NULL)
 		exclude_efi_map_entries(efihdr);
-	arm_physmem_init_kernel_globals();
+	physmem_init_kernel_globals();
 
 	devmap_bootstrap(0, NULL);
 
 	valid = bus_probe();
 
 	cninit();
 
 	if (!valid)
 		panic("Invalid bus configuration: %s",
 		    kern_getenv("kern.cfg.order"));
 
 	init_proc0(abp->kern_stack);
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 
 	dbg_init();
 	kdb_init();
 	pan_enable();
 
 	kcsan_cpu_init(0);
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (boothowto & RB_VERBOSE) {
 		print_efi_map_entries(efihdr);
-		arm_physmem_print_tables();
+		physmem_print_tables();
 	}
 
 	early_boot = 0;
 }
 
 void
 dbg_init(void)
 {
 
 	/* Clear OS lock */
 	WRITE_SPECIALREG(oslar_el1, 0);
 
 	/* This permits DDB to use debug registers for watchpoints. */
 	dbg_monitor_init();
 
 	/* TODO: Eventually will need to initialize debug registers here. */
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(specialregs, db_show_spregs)
 {
 #define	PRINT_REG(reg)	\
     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
 
 	PRINT_REG(actlr_el1);
 	PRINT_REG(afsr0_el1);
 	PRINT_REG(afsr1_el1);
 	PRINT_REG(aidr_el1);
 	PRINT_REG(amair_el1);
 	PRINT_REG(ccsidr_el1);
 	PRINT_REG(clidr_el1);
 	PRINT_REG(contextidr_el1);
 	PRINT_REG(cpacr_el1);
 	PRINT_REG(csselr_el1);
 	PRINT_REG(ctr_el0);
 	PRINT_REG(currentel);
 	PRINT_REG(daif);
 	PRINT_REG(dczid_el0);
 	PRINT_REG(elr_el1);
 	PRINT_REG(esr_el1);
 	PRINT_REG(far_el1);
 #if 0
 	/* ARM64TODO: Enable VFP before reading floating-point registers */
 	PRINT_REG(fpcr);
 	PRINT_REG(fpsr);
 #endif
 	PRINT_REG(id_aa64afr0_el1);
 	PRINT_REG(id_aa64afr1_el1);
 	PRINT_REG(id_aa64dfr0_el1);
 	PRINT_REG(id_aa64dfr1_el1);
 	PRINT_REG(id_aa64isar0_el1);
 	PRINT_REG(id_aa64isar1_el1);
 	PRINT_REG(id_aa64pfr0_el1);
 	PRINT_REG(id_aa64pfr1_el1);
 	PRINT_REG(id_afr0_el1);
 	PRINT_REG(id_dfr0_el1);
 	PRINT_REG(id_isar0_el1);
 	PRINT_REG(id_isar1_el1);
 	PRINT_REG(id_isar2_el1);
 	PRINT_REG(id_isar3_el1);
 	PRINT_REG(id_isar4_el1);
 	PRINT_REG(id_isar5_el1);
 	PRINT_REG(id_mmfr0_el1);
 	PRINT_REG(id_mmfr1_el1);
 	PRINT_REG(id_mmfr2_el1);
 	PRINT_REG(id_mmfr3_el1);
 #if 0
 	/* Missing from llvm */
 	PRINT_REG(id_mmfr4_el1);
 #endif
 	PRINT_REG(id_pfr0_el1);
 	PRINT_REG(id_pfr1_el1);
 	PRINT_REG(isr_el1);
 	PRINT_REG(mair_el1);
 	PRINT_REG(midr_el1);
 	PRINT_REG(mpidr_el1);
 	PRINT_REG(mvfr0_el1);
 	PRINT_REG(mvfr1_el1);
 	PRINT_REG(mvfr2_el1);
 	PRINT_REG(revidr_el1);
 	PRINT_REG(sctlr_el1);
 	PRINT_REG(sp_el0);
 	PRINT_REG(spsel);
 	PRINT_REG(spsr_el1);
 	PRINT_REG(tcr_el1);
 	PRINT_REG(tpidr_el0);
 	PRINT_REG(tpidr_el1);
 	PRINT_REG(tpidrro_el0);
 	PRINT_REG(ttbr0_el1);
 	PRINT_REG(ttbr1_el1);
 	PRINT_REG(vbar_el1);
 #undef PRINT_REG
 }
 
 DB_SHOW_COMMAND(vtop, db_show_vtop)
 {
 	uint64_t phys;
 
 	if (have_addr) {
 		phys = arm64_address_translate_s1e1r(addr);
 		db_printf("EL1 physical address reg (read):  0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e1w(addr);
 		db_printf("EL1 physical address reg (write): 0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e0r(addr);
 		db_printf("EL0 physical address reg (read):  0x%016lx\n", phys);
 		phys = arm64_address_translate_s1e0w(addr);
 		db_printf("EL0 physical address reg (write): 0x%016lx\n", phys);
 	} else
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif
Index: head/sys/arm64/arm64/pmap.c
===================================================================
--- head/sys/arm64/arm64/pmap.c	(revision 360081)
+++ head/sys/arm64/arm64/pmap.c	(revision 360082)
@@ -1,6488 +1,6487 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  * Copyright (c) 2014-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * This software was developed by Andrew Turner under sponsorship from
  * the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/bitstring.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
+#include <sys/physmem.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/_unrhdr.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
-#include <arm/include/physmem.h>
-
 #define	PMAP_ASSERT_STAGE1(pmap)	MPASS((pmap)->pm_stage == PM_STAGE1)
 
 #define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #define	NUL0E		L0_ENTRIES
 #define	NUL1E		(NUL0E * NL1PG)
 #define	NUL2E		(NUL1E * NL2PG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 #define	pa_to_pvh(pa)		(&pv_table[pmap_l2_pindex(pa)])
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 /*
  * The presence of this flag indicates that the mapping is writeable.
  * If the ATTR_S1_AP_RO bit is also set, then the mapping is clean, otherwise
  * it is dirty.  This flag may only be set on managed mappings.
  *
  * The DBM bit is reserved on ARMv8.0 but it seems we can safely treat it
  * as a software managed bit.
  */
 #define	ATTR_SW_DBM	ATTR_DBM
 
 struct pmap kernel_pmap_store;
 
 /* Used for mapping ACPI memory before VM is initialized */
 #define	PMAP_PREINIT_MAPPING_COUNT	32
 #define	PMAP_PREINIT_MAPPING_SIZE	(PMAP_PREINIT_MAPPING_COUNT * L2_SIZE)
 static vm_offset_t preinit_map_va;	/* Start VA of pre-init mapping space */
 static int vm_initialized = 0;		/* No need to use pre-init maps when set */
 
 /*
  * Reserve a few L2 blocks starting from 'preinit_map_va' pointer.
  * Always map entire L2 block for simplicity.
  * VA of L2 block = preinit_map_va + i * L2_SIZE
  */
 static struct pmap_preinit_mapping {
 	vm_paddr_t	pa;
 	vm_offset_t	va;
 	vm_size_t	size;
 } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 vm_offset_t kernel_vm_end = 0;
 
 /*
  * Data for the pv entry allocation mechanism.
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 static struct md_page *pv_table;
 static struct md_page pv_dummy;
 
 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
 vm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
 vm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
 
 /* This code assumes all L1 DMAP entries will be used */
 CTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
 CTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
 
 #define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
 extern pt_entry_t pagetable_dmap[];
 
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 static vm_paddr_t physmap[PHYSMAP_SIZE];
 static u_int physmap_idx;
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "VM/pmap parameters");
 
 /*
  * This ASID allocator uses a bit vector ("asid_set") to remember which ASIDs
  * that it has currently allocated to a pmap, a cursor ("asid_next") to
  * optimize its search for a free ASID in the bit vector, and an epoch number
  * ("asid_epoch") to indicate when it has reclaimed all previously allocated
  * ASIDs that are not currently active on a processor.
  *
  * The current epoch number is always in the range [0, INT_MAX).  Negative
  * numbers and INT_MAX are reserved for special cases that are described
  * below.
  */
 struct asid_set {
 	int asid_bits;
 	bitstr_t *asid_set;
 	int asid_set_size;
 	int asid_next;
 	int asid_epoch;
 	struct mtx asid_set_mutex;
 };
 
 static struct asid_set asids;
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, asid, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "ASID allocator");
 SYSCTL_INT(_vm_pmap_asid, OID_AUTO, bits, CTLFLAG_RD, &asids.asid_bits, 0,
     "The number of bits in an ASID");
 SYSCTL_INT(_vm_pmap_asid, OID_AUTO, next, CTLFLAG_RD, &asids.asid_next, 0,
     "The last allocated ASID plus one");
 SYSCTL_INT(_vm_pmap_asid, OID_AUTO, epoch, CTLFLAG_RD, &asids.asid_epoch, 0,
     "The current epoch number");
 
 /*
  * A pmap's cookie encodes an ASID and epoch number.  Cookies for reserved
  * ASIDs have a negative epoch number, specifically, INT_MIN.  Cookies for
  * dynamically allocated ASIDs have a non-negative epoch number.
  *
  * An invalid ASID is represented by -1.
  *
  * There are two special-case cookie values: (1) COOKIE_FROM(-1, INT_MIN),
  * which indicates that an ASID should never be allocated to the pmap, and
  * (2) COOKIE_FROM(-1, INT_MAX), which indicates that an ASID should be
  * allocated when the pmap is next activated.
  */
 #define	COOKIE_FROM(asid, epoch)	((long)((u_int)(asid) |	\
 					    ((u_long)(epoch) << 32)))
 #define	COOKIE_TO_ASID(cookie)		((int)(cookie))
 #define	COOKIE_TO_EPOCH(cookie)		((int)((u_long)(cookie) >> 32))
 
 static int superpages_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
     "Are large page mappings enabled?");
 
 /*
  * Internal flags for pmap_enter()'s helper functions.
  */
 #define	PMAP_ENTER_NORECLAIM	0x1000000	/* Don't reclaim PV entries. */
 #define	PMAP_ENTER_NOREPLACE	0x2000000	/* Don't replace mappings. */
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 
 static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
 static bool pmap_activate_int(pmap_t pmap);
 static void pmap_alloc_asid(pmap_t pmap);
 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
     vm_offset_t va, struct rwlock **lockp);
 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
     u_int flags, vm_page_t m, struct rwlock **lockp);
 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
     pd_entry_t l1e, struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp);
 static void pmap_reset_asid_set(pmap_t pmap);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
 /*
  * These load the old table data and store the new value.
  * They need to be atomic as the System MMU may write to the table at
  * the same time as the CPU.
  */
 #define	pmap_clear(table)		atomic_store_64(table, 0)
 #define	pmap_clear_bits(table, bits)	atomic_clear_64(table, bits)
 #define	pmap_load(table)		(*table)
 #define	pmap_load_clear(table)		atomic_swap_64(table, 0)
 #define	pmap_load_store(table, entry)	atomic_swap_64(table, entry)
 #define	pmap_set_bits(table, bits)	atomic_set_64(table, bits)
 #define	pmap_store(table, entry)	atomic_store_64(table, entry)
 
 /********************/
 /* Inline functions */
 /********************/
 
 static __inline void
 pagecopy(void *s, void *d)
 {
 
 	memcpy(d, s, PAGE_SIZE);
 }
 
 static __inline pd_entry_t *
 pmap_l0(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_l0[pmap_l0_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 	return (&l1[pmap_l1_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l0;
 
 	l0 = pmap_l0(pmap, va);
 	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (NULL);
 
 	return (pmap_l0_to_l1(l0, va));
 }
 
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
 {
 	pd_entry_t *l2;
 
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 	return (&l2[pmap_l2_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l2(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = pmap_l1(pmap, va);
 	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (NULL);
 
 	return (pmap_l1_to_l2(l1, va));
 }
 
 static __inline pt_entry_t *
 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
 {
 	pt_entry_t *l3;
 
 	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
 	return (&l3[pmap_l3_index(va)]);
 }
 
 /*
  * Returns the lowest valid pde for a given virtual address.
  * The next level may or may not point to a valid page or block.
  */
 static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l0, *l1, *l2, desc;
 
 	l0 = pmap_l0(pmap, va);
 	desc = pmap_load(l0) & ATTR_DESCR_MASK;
 	if (desc != L0_TABLE) {
 		*level = -1;
 		return (NULL);
 	}
 
 	l1 = pmap_l0_to_l1(l0, va);
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc != L1_TABLE) {
 		*level = 0;
 		return (l0);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc != L2_TABLE) {
 		*level = 1;
 		return (l1);
 	}
 
 	*level = 2;
 	return (l2);
 }
 
 /*
  * Returns the lowest valid pte block or table entry for a given virtual
  * address. If there are no valid entries return NULL and set the level to
  * the first invalid level.
  */
 static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l1, *l2, desc;
 	pt_entry_t *l3;
 
 	l1 = pmap_l1(pmap, va);
 	if (l1 == NULL) {
 		*level = 0;
 		return (NULL);
 	}
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc == L1_BLOCK) {
 		*level = 1;
 		return (l1);
 	}
 
 	if (desc != L1_TABLE) {
 		*level = 1;
 		return (NULL);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc == L2_BLOCK) {
 		*level = 2;
 		return (l2);
 	}
 
 	if (desc != L2_TABLE) {
 		*level = 2;
 		return (NULL);
 	}
 
 	*level = 3;
 	l3 = pmap_l2_to_l3(l2, va);
 	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
 		return (NULL);
 
 	return (l3);
 }
 
 bool
 pmap_ps_enabled(pmap_t pmap __unused)
 {
 
 	return (superpages_enabled != 0);
 }
 
 bool
 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
     pd_entry_t **l2, pt_entry_t **l3)
 {
 	pd_entry_t *l0p, *l1p, *l2p;
 
 	if (pmap->pm_l0 == NULL)
 		return (false);
 
 	l0p = pmap_l0(pmap, va);
 	*l0 = l0p;
 
 	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (false);
 
 	l1p = pmap_l0_to_l1(l0p, va);
 	*l1 = l1p;
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
 		*l2 = NULL;
 		*l3 = NULL;
 		return (true);
 	}
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (false);
 
 	l2p = pmap_l1_to_l2(l1p, va);
 	*l2 = l2p;
 
 	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
 		*l3 = NULL;
 		return (true);
 	}
 
 	if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE)
 		return (false);
 
 	*l3 = pmap_l2_to_l3(l2p, va);
 
 	return (true);
 }
 
 static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
 	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
 }
 
 
 CTASSERT(L1_BLOCK == L2_BLOCK);
 
 /*
  * Checks if the PTE is dirty.
  */
 static inline int
 pmap_pte_dirty(pmap_t pmap, pt_entry_t pte)
 {
 
 	PMAP_ASSERT_STAGE1(pmap);
 	KASSERT((pte & ATTR_SW_MANAGED) != 0, ("pte %#lx is unmanaged", pte));
 	KASSERT((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) != 0,
 	    ("pte %#lx is writeable and missing ATTR_SW_DBM", pte));
 
 	return ((pte & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
 	    (ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_SW_DBM));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 static pt_entry_t *
 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
     u_int *l2_slot)
 {
 	pt_entry_t *l2;
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)l1pt;
 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
 
 	/* Check locore has used a table L1 map */
 	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
 	   ("Invalid bootstrap L1 table"));
 	/* Find the address of the L2 table */
 	l2 = (pt_entry_t *)init_pt_va;
 	*l2_slot = pmap_l2_index(va);
 
 	return (l2);
 }
 
 static vm_paddr_t
 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 
 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
 
 	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
 }
 
 static vm_offset_t
 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa,
     vm_offset_t freemempos)
 {
 	pt_entry_t *l2;
 	vm_offset_t va;
 	vm_paddr_t l2_pa, pa;
 	u_int l1_slot, l2_slot, prev_l1_slot;
 	int i;
 
 	dmap_phys_base = min_pa & ~L1_OFFSET;
 	dmap_phys_max = 0;
 	dmap_max_addr = 0;
 	l2 = NULL;
 	prev_l1_slot = -1;
 
 #define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
 	memset(pagetable_dmap, 0, PAGE_SIZE * DMAP_TABLES);
 
 	for (i = 0; i < (physmap_idx * 2); i += 2) {
 		pa = physmap[i] & ~L2_OFFSET;
 		va = pa - dmap_phys_base + DMAP_MIN_ADDRESS;
 
 		/* Create L2 mappings at the start of the region */
 		if ((pa & L1_OFFSET) != 0) {
 			l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
 			if (l1_slot != prev_l1_slot) {
 				prev_l1_slot = l1_slot;
 				l2 = (pt_entry_t *)freemempos;
 				l2_pa = pmap_early_vtophys(kern_l1,
 				    (vm_offset_t)l2);
 				freemempos += PAGE_SIZE;
 
 				pmap_store(&pagetable_dmap[l1_slot],
 				    (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE);
 
 				memset(l2, 0, PAGE_SIZE);
 			}
 			KASSERT(l2 != NULL,
 			    ("pmap_bootstrap_dmap: NULL l2 map"));
 			for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1];
 			    pa += L2_SIZE, va += L2_SIZE) {
 				/*
 				 * We are on a boundary, stop to
 				 * create a level 1 block
 				 */
 				if ((pa & L1_OFFSET) == 0)
 					break;
 
 				l2_slot = pmap_l2_index(va);
 				KASSERT(l2_slot != 0, ("..."));
 				pmap_store(&l2[l2_slot],
 				    (pa & ~L2_OFFSET) | ATTR_DEFAULT |
 				    ATTR_S1_XN |
 				    ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
 				    L2_BLOCK);
 			}
 			KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS),
 			    ("..."));
 		}
 
 		for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] &&
 		    (physmap[i + 1] - pa) >= L1_SIZE;
 		    pa += L1_SIZE, va += L1_SIZE) {
 			l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
 			pmap_store(&pagetable_dmap[l1_slot],
 			    (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_S1_XN |
 			    ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L1_BLOCK);
 		}
 
 		/* Create L2 mappings at the end of the region */
 		if (pa < physmap[i + 1]) {
 			l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
 			if (l1_slot != prev_l1_slot) {
 				prev_l1_slot = l1_slot;
 				l2 = (pt_entry_t *)freemempos;
 				l2_pa = pmap_early_vtophys(kern_l1,
 				    (vm_offset_t)l2);
 				freemempos += PAGE_SIZE;
 
 				pmap_store(&pagetable_dmap[l1_slot],
 				    (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE);
 
 				memset(l2, 0, PAGE_SIZE);
 			}
 			KASSERT(l2 != NULL,
 			    ("pmap_bootstrap_dmap: NULL l2 map"));
 			for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1];
 			    pa += L2_SIZE, va += L2_SIZE) {
 				l2_slot = pmap_l2_index(va);
 				pmap_store(&l2[l2_slot],
 				    (pa & ~L2_OFFSET) | ATTR_DEFAULT |
 				    ATTR_S1_XN |
 				    ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) |
 				    L2_BLOCK);
 			}
 		}
 
 		if (pa > dmap_phys_max) {
 			dmap_phys_max = pa;
 			dmap_max_addr = va;
 		}
 	}
 
 	cpu_tlb_flushID();
 
 	return (freemempos);
 }
 
 static vm_offset_t
 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
 {
 	vm_offset_t l2pt;
 	vm_paddr_t pa;
 	pd_entry_t *l1;
 	u_int l1_slot;
 
 	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
 
 	l1 = (pd_entry_t *)l1pt;
 	l1_slot = pmap_l1_index(va);
 	l2pt = l2_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
 
 		pa = pmap_early_vtophys(l1pt, l2pt);
 		pmap_store(&l1[l1_slot],
 		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
 		l2pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l2_start, 0, l2pt - l2_start);
 
 	return l2pt;
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
 	vm_offset_t l3pt;
 	vm_paddr_t pa;
 	pd_entry_t *l2;
 	u_int l2_slot;
 
 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
 
 		pa = pmap_early_vtophys(l1pt, l3pt);
 		pmap_store(&l2[l2_slot],
 		    (pa & ~Ln_TABLE_MASK) | ATTR_S1_UXN | L2_TABLE);
 		l3pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
 
 	return l3pt;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
     vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 	vm_offset_t va, freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t start_pa, pa, min_pa;
 	uint64_t kern_delta;
 	int i;
 
 	/* Verify that the ASID is set through TTBR0. */
 	KASSERT((READ_SPECIALREG(tcr_el1) & TCR_A1) == 0,
 	    ("pmap_bootstrap: TCR_EL1.A1 != 0"));
 
 	kern_delta = KERNBASE - kernstart;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 	printf("%lx\n", l1pt);
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_l0_paddr = l0pt - kern_delta;
 	kernel_pmap->pm_cookie = COOKIE_FROM(-1, INT_MIN);
 	kernel_pmap->pm_stage = PM_STAGE1;
 	kernel_pmap->pm_asid_set = &asids;
 
 	/* Assume the address we were loaded to is a valid physical address */
 	min_pa = KERNBASE - kern_delta;
 
-	physmap_idx = arm_physmem_avail(physmap, nitems(physmap));
+	physmap_idx = physmem_avail(physmap, nitems(physmap));
 	physmap_idx /= 2;
 
 	/*
 	 * Find the minimum physical address. physmap is sorted,
 	 * but may contain empty ranges.
 	 */
 	for (i = 0; i < (physmap_idx * 2); i += 2) {
 		if (physmap[i] == physmap[i + 1])
 			continue;
 		if (physmap[i] <= min_pa)
 			min_pa = physmap[i];
 	}
 
 	freemempos = KERNBASE + kernlen;
 	freemempos = roundup2(freemempos, PAGE_SIZE);
 
 	/* Create a direct map region early so we can use it for pa -> va */
 	freemempos = pmap_bootstrap_dmap(l1pt, min_pa, freemempos);
 
 	va = KERNBASE;
 	start_pa = pa = KERNBASE - kern_delta;
 
 	/*
 	 * Read the page table to find out what is already mapped.
 	 * This assumes we have mapped a block of memory from KERNBASE
 	 * using a single L1 entry.
 	 */
 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
 
 	/* Sanity check the index, KERNBASE should be the first VA */
 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
 
 	/* Find how many pages we have mapped */
 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
 		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
 			break;
 
 		/* Check locore used L2 blocks */
 		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
 		    ("Invalid bootstrap L2 table"));
 		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
 		    ("Incorrect PA in L2 table"));
 
 		va += L2_SIZE;
 		pa += L2_SIZE;
 	}
 
 	va = roundup2(va, L1_SIZE);
 
 	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
 	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
 	/* And the l3 tables for the early devmap */
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE), freemempos);
 
 	cpu_tlb_flushID();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	msgbufp = (void *)msgbufpv;
 
 	/* Reserve some VA space for early BIOS/ACPI mapping */
 	preinit_map_va = roundup2(freemempos, L2_SIZE);
 
 	virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE;
 	virtual_avail = roundup2(virtual_avail, L1_SIZE);
 	virtual_end = VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE);
 	kernel_vm_end = virtual_avail;
 
 	pa = pmap_early_vtophys(l1pt, freemempos);
 
-	arm_physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
+	physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC);
 
 	cpu_tlb_flushID();
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
 }
 
 static void
 pmap_init_asids(struct asid_set *set, int bits)
 {
 	int i;
 
 	set->asid_bits = bits;
 
 	/*
 	 * We may be too early in the overall initialization process to use
 	 * bit_alloc().
 	 */
 	set->asid_set_size = 1 << set->asid_bits;
 	set->asid_set = (bitstr_t *)kmem_malloc(bitstr_size(set->asid_set_size),
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < ASID_FIRST_AVAILABLE; i++)
 		bit_set(set->asid_set, i);
 	set->asid_next = ASID_FIRST_AVAILABLE;
 	mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN);
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
 	if (superpages_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = L2_SIZE;
 	}
 
 	/*
 	 * Initialize the ASID allocator.
 	 */
 	pmap_init_asids(&asids,
 	    (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8);
 
 	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 	TAILQ_INIT(&pv_dummy.pv_list);
 
 	vm_initialized = 1;
 }
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "2MB page mapping counters");
 
 static u_long pmap_l2_demotions;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_l2_demotions, 0, "2MB page demotions");
 
 static u_long pmap_l2_mappings;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_l2_mappings, 0, "2MB page mappings");
 
 static u_long pmap_l2_p_failures;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_l2_p_failures, 0, "2MB page promotion failures");
 
 static u_long pmap_l2_promotions;
 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_l2_promotions, 0, "2MB page promotions");
 
 /*
  * Invalidate a single TLB entry.
  */
 static __inline void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	uint64_t r;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	dsb(ishst);
 	if (pmap == kernel_pmap) {
 		r = atop(va);
 		__asm __volatile("tlbi vaae1is, %0" : : "r" (r));
 	} else {
 		r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) | atop(va);
 		__asm __volatile("tlbi vae1is, %0" : : "r" (r));
 	}
 	dsb(ish);
 	isb();
 }
 
 static __inline void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	uint64_t end, r, start;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	dsb(ishst);
 	if (pmap == kernel_pmap) {
 		start = atop(sva);
 		end = atop(eva);
 		for (r = start; r < end; r++)
 			__asm __volatile("tlbi vaae1is, %0" : : "r" (r));
 	} else {
 		start = end = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
 		start |= atop(sva);
 		end |= atop(eva);
 		for (r = start; r < end; r++)
 			__asm __volatile("tlbi vae1is, %0" : : "r" (r));
 	}
 	dsb(ish);
 	isb();
 }
 
 static __inline void
 pmap_invalidate_all(pmap_t pmap)
 {
 	uint64_t r;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	dsb(ishst);
 	if (pmap == kernel_pmap) {
 		__asm __volatile("tlbi vmalle1is");
 	} else {
 		r = ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie));
 		__asm __volatile("tlbi aside1is, %0" : : "r" (r));
 	}
 	dsb(ish);
 	isb();
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int lvl;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
 	 * Find the block or page map for this virtual address. pmap_pte
 	 * will return either a valid block/page entry, or NULL.
 	 */
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 		pa = tpte & ~ATTR_MASK;
 		switch(lvl) {
 		case 1:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 			    ("pmap_extract: Invalid L1 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L1_OFFSET);
 			break;
 		case 2:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 			    ("pmap_extract: Invalid L2 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L2_OFFSET);
 			break;
 		case 3:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 			    ("pmap_extract: Invalid L3 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L3_OFFSET);
 			break;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pt_entry_t *pte, tpte;
 	vm_offset_t off;
 	vm_page_t m;
 	int lvl;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	m = NULL;
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 
 		KASSERT(lvl > 0 && lvl <= 3,
 		    ("pmap_extract_and_hold: Invalid level %d", lvl));
 		CTASSERT(L1_BLOCK == L2_BLOCK);
 		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
 		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
 		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
 		     tpte & ATTR_DESCR_MASK));
 		if (((tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)) ||
 		    ((prot & VM_PROT_WRITE) == 0)) {
 			switch(lvl) {
 			case 1:
 				off = va & L1_OFFSET;
 				break;
 			case 2:
 				off = va & L2_OFFSET;
 				break;
 			case 3:
 			default:
 				off = 0;
 			}
 			m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
 			if (!vm_page_wire_mapped(m))
 				m = NULL;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
 		return (DMAP_TO_PHYS(va));
 	pte = pmap_l1(kernel_pmap, va);
 	if (pte == NULL)
 		return (0);
 
 	/*
 	 * A concurrent pmap_update_entry() will clear the entry's valid bit
 	 * but leave the rest of the entry unchanged.  Therefore, we treat a
 	 * non-zero entry as being valid, and we ignore the valid bit when
 	 * determining whether the entry maps a block, page, or table.
 	 */
 	tpte = pmap_load(pte);
 	if (tpte == 0)
 		return (0);
 	if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK)
 		return ((tpte & ~ATTR_MASK) | (va & L1_OFFSET));
 	pte = pmap_l1_to_l2(&tpte, va);
 	tpte = pmap_load(pte);
 	if (tpte == 0)
 		return (0);
 	if ((tpte & ATTR_DESCR_TYPE_MASK) == ATTR_DESCR_TYPE_BLOCK)
 		return ((tpte & ~ATTR_MASK) | (va & L2_OFFSET));
 	pte = pmap_l2_to_l3(&tpte, va);
 	tpte = pmap_load(pte);
 	if (tpte == 0)
 		return (0);
 	return ((tpte & ~ATTR_MASK) | (va & L3_OFFSET));
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 void
 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, attr;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((pa & L3_OFFSET) == 0,
 	   ("pmap_kenter: Invalid physical address"));
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kenter: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kenter: Mapping is not page-sized"));
 
 	attr = ATTR_DEFAULT | ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
 	    ATTR_S1_IDX(mode) | L3_PAGE;
 	va = sva;
 	while (size != 0) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, (pa & ~L3_OFFSET) | attr);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
 
 	pmap_kenter(sva, size, pa, VM_MEMATTR_DEVICE);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 	int lvl;
 
 	pte = pmap_pte(kernel_pmap, va, &lvl);
 	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
 	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
 	pmap_clear(pte);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kremove_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		pmap_clear(pte);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, pa;
 	vm_offset_t va;
 	vm_page_t m;
 	int i, lvl;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2,
 		    ("pmap_qenter: Invalid level %d", lvl));
 
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT |
 		    ATTR_S1_AP(ATTR_S1_AP_RW) | ATTR_S1_XN |
 		    ATTR_S1_IDX(m->md.pv_memattr) | L3_PAGE;
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, pa);
 
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	va = sva;
 	while (count-- > 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		if (pte != NULL) {
 			pmap_clear(pte);
 		}
 
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
  * Decrements a page table page's reference count, which is used to record the
  * number of valid page table entries within the page.  If the reference count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->ref_count;
 	if (m->ref_count == 0) {
 		_pmap_unwire_l3(pmap, va, m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 static void
 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= (NUL2E + NUL1E)) {
 		/* l1 page */
 		pd_entry_t *l0;
 
 		l0 = pmap_l0(pmap, va);
 		pmap_clear(l0);
 	} else if (m->pindex >= NUL2E) {
 		/* l2 page */
 		pd_entry_t *l1;
 
 		l1 = pmap_l1(pmap, va);
 		pmap_clear(l1);
 	} else {
 		/* l3 page */
 		pd_entry_t *l2;
 
 		l2 = pmap_l2(pmap, va);
 		pmap_clear(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUL2E) {
 		/* We just released an l3, unhold the matching l2 */
 		pd_entry_t *l1, tl1;
 		vm_page_t l2pg;
 
 		l1 = pmap_l1(pmap, va);
 		tl1 = pmap_load(l1);
 		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l2pg, free);
 	} else if (m->pindex < (NUL2E + NUL1E)) {
 		/* We just released an l2, unhold the matching l1 */
 		pd_entry_t *l0, tl0;
 		vm_page_t l1pg;
 
 		l0 = pmap_l0(pmap, va);
 		tl0 = pmap_load(l0);
 		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l1pg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
 	/*
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the reference count.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
 	return (pmap_unwire_l3(pmap, va, mpte, free));
 }
 
 /*
  * Release a page table page reference after a failed attempt to create a
  * mapping.
  */
 static void
 pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 {
 	struct spglist free;
 
 	SLIST_INIT(&free);
 	if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 		/*
 		 * Although "va" was never mapped, the TLB could nonetheless
 		 * have intermediate entries that refer to the freed page
 		 * table pages.  Invalidate those entries.
 		 *
 		 * XXX redundant invalidation (See _pmap_unwire_l3().)
 		 */
 		pmap_invalidate_page(pmap, va);
 		vm_page_free_pages_toq(&free, true);
 	}
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_l0_paddr = READ_SPECIALREG(ttbr0_el1);
 	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 	pmap->pm_root.rt_root = 0;
 	pmap->pm_cookie = COOKIE_FROM(ASID_RESERVED_FOR_PID_0, INT_MIN);
 	pmap->pm_stage = PM_STAGE1;
 	pmap->pm_asid_set = &asids;
 
 	PCPU_SET(curpmap, pmap);
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_page_t l0pt;
 
 	/*
 	 * allocate the l0 page
 	 */
 	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		vm_wait(NULL);
 
 	pmap->pm_l0_paddr = VM_PAGE_TO_PHYS(l0pt);
 	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(pmap->pm_l0_paddr);
 
 	if ((l0pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l0);
 
 	pmap->pm_root.rt_root = 0;
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_cookie = COOKIE_FROM(-1, INT_MAX);
 	pmap->pm_stage = PM_STAGE1;
 	pmap->pm_asid_set = &asids;
 	/* XXX Temporarily disable deferred ASID allocation. */
 	pmap_alloc_asid(pmap);
 
 	return (1);
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, l1pg, l2pg;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			vm_wait(NULL);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Because of AArch64's weak memory consistency model, we must have a
 	 * barrier here to ensure that the stores for zeroing "m", whether by
 	 * pmap_zero_page() or an earlier function, are visible before adding
 	 * "m" to the page table.  Otherwise, a page table walk by another
 	 * processor's MMU could see the mapping to "m" and a stale, non-zero
 	 * PTE within "m".
 	 */
 	dmb(ishst);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= (NUL2E + NUL1E)) {
 		pd_entry_t *l0;
 		vm_pindex_t l0index;
 
 		l0index = ptepindex - (NUL2E + NUL1E);
 		l0 = &pmap->pm_l0[l0index];
 		pmap_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
 	} else if (ptepindex >= NUL2E) {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1;
 		pd_entry_t tl0;
 
 		l1index = ptepindex - NUL2E;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
 			    lockp) == NULL) {
 				vm_page_unwire_noq(m);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 			l1pg->ref_count++;
 		}
 
 		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 		l1 = &l1[ptepindex & Ln_ADDR_MASK];
 		pmap_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 	} else {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1, *l2;
 		pd_entry_t tl0, tl1;
 
 		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 			    lockp) == NULL) {
 				vm_page_unwire_noq(m);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 			tl0 = pmap_load(l0);
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 		} else {
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 			tl1 = pmap_load(l1);
 			if (tl1 == 0) {
 				/* recurse for allocating page dir */
 				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 				    lockp) == NULL) {
 					vm_page_unwire_noq(m);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
 			} else {
 				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 				l2pg->ref_count++;
 			}
 		}
 
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
 		pmap_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static pd_entry_t *
 pmap_alloc_l2(pmap_t pmap, vm_offset_t va, vm_page_t *l2pgp,
     struct rwlock **lockp)
 {
 	pd_entry_t *l1, *l2;
 	vm_page_t l2pg;
 	vm_pindex_t l2pindex;
 
 retry:
 	l1 = pmap_l1(pmap, va);
 	if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) {
 		l2 = pmap_l1_to_l2(l1, va);
 		if (va < VM_MAXUSER_ADDRESS) {
 			/* Add a reference to the L2 page. */
 			l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
 			l2pg->ref_count++;
 		} else
 			l2pg = NULL;
 	} else if (va < VM_MAXUSER_ADDRESS) {
 		/* Allocate a L2 page. */
 		l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
 		l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
 		if (l2pg == NULL) {
 			if (lockp != NULL)
 				goto retry;
 			else
 				return (NULL);
 		}
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg));
 		l2 = &l2[pmap_l2_index(va)];
 	} else
 		panic("pmap_alloc_l2: missing page table page for va %#lx",
 		    va);
 	*l2pgp = l2pg;
 	return (l2);
 }
 
 static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pde, tpde;
 #ifdef INVARIANTS
 	pt_entry_t *pte;
 #endif
 	vm_page_t m;
 	int lvl;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_l2_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	pde = pmap_pde(pmap, va, &lvl);
 
 	/*
 	 * If the page table page is mapped, we just increment the hold count,
 	 * and activate it. If we get a level 2 pde it will point to a level 3
 	 * table.
 	 */
 	switch (lvl) {
 	case -1:
 		break;
 	case 0:
 #ifdef INVARIANTS
 		pte = pmap_l0_to_l1(pde, va);
 		KASSERT(pmap_load(pte) == 0,
 		    ("pmap_alloc_l3: TODO: l0 superpages"));
 #endif
 		break;
 	case 1:
 #ifdef INVARIANTS
 		pte = pmap_l1_to_l2(pde, va);
 		KASSERT(pmap_load(pte) == 0,
 		    ("pmap_alloc_l3: TODO: l1 superpages"));
 #endif
 		break;
 	case 2:
 		tpde = pmap_load(pde);
 		if (tpde != 0) {
 			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
 			m->ref_count++;
 			return (m);
 		}
 		break;
 	default:
 		panic("pmap_alloc_l3: Invalid level %d", lvl);
 	}
 
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 	if (m == NULL && lockp != NULL)
 		goto retry;
 
 	return (m);
 }
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	struct asid_set *set;
 	vm_page_t m;
 	int asid;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 	PMAP_ASSERT_STAGE1(pmap);
 
 	set = pmap->pm_asid_set;
 	KASSERT(set != NULL, ("%s: NULL asid set", __func__));
 
 	mtx_lock_spin(&set->asid_set_mutex);
 	if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch) {
 		asid = COOKIE_TO_ASID(pmap->pm_cookie);
 		KASSERT(asid >= ASID_FIRST_AVAILABLE &&
 		    asid < set->asid_set_size,
 		    ("pmap_release: pmap cookie has out-of-range asid"));
 		bit_clear(set->asid_set, asid);
 	}
 	mtx_unlock_spin(&set->asid_set_mutex);
 
 	m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
 	vm_page_unwire_noq(m);
 	vm_page_free_zero(m);
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
     0, 0, kvm_size, "LU",
     "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE,
     0, 0, kvm_free, "LU",
     "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *l0, *l1, *l2;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	addr = roundup2(addr, L2_SIZE);
 	if (addr - 1 >= vm_map_max(kernel_map))
 		addr = vm_map_max(kernel_map);
 	while (kernel_vm_end < addr) {
 		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
 		KASSERT(pmap_load(l0) != 0,
 		    ("pmap_growkernel: No level 0 kernel entry"));
 
 		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			/* See the dmb() in _pmap_alloc_l3(). */
 			dmb(ishst);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 			pmap_store(l1, paddr | L1_TABLE);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 		if (pmap_load(l2) != 0) {
 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 			if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 				kernel_vm_end = vm_map_max(kernel_map);
 				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		/* See the dmb() in _pmap_alloc_l3(). */
 		dmb(ishst);
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_store(l2, paddr | L2_TABLE);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 		if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 			kernel_vm_end = vm_map_max(kernel_map);
 			break;
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 #endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 	struct pv_chunk *pc, *pc_marker, *pc_marker_end;
 	struct pv_chunk_header pc_marker_b, pc_marker_end_b;
 	struct md_page *pvh;
 	pd_entry_t *pde;
 	pmap_t next_pmap, pmap;
 	pt_entry_t *pte, tpte;
 	pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m, m_pc;
 	struct spglist free;
 	uint64_t inuse;
 	int bit, field, freed, lvl;
 	static int active_reclaims = 0;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
 
 	pmap = NULL;
 	m_pc = NULL;
 	SLIST_INIT(&free);
 	bzero(&pc_marker_b, sizeof(pc_marker_b));
 	bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
 	pc_marker = (struct pv_chunk *)&pc_marker_b;
 	pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
 
 	mtx_lock(&pv_chunks_mutex);
 	active_reclaims++;
 	TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
 	while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
 	    SLIST_EMPTY(&free)) {
 		next_pmap = pc->pc_pmap;
 		if (next_pmap == NULL) {
 			/*
 			 * The next chunk is a marker.  However, it is
 			 * not our marker, so active_reclaims must be
 			 * > 1.  Consequently, the next_chunk code
 			 * will not rotate the pv_chunks list.
 			 */
 			goto next_chunk;
 		}
 		mtx_unlock(&pv_chunks_mutex);
 
 		/*
 		 * A pv_chunk can only be removed from the pc_lru list
 		 * when both pv_chunks_mutex is owned and the
 		 * corresponding pmap is locked.
 		 */
 		if (pmap != next_pmap) {
 			if (pmap != NULL && pmap != locked_pmap)
 				PMAP_UNLOCK(pmap);
 			pmap = next_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap) {
 				RELEASE_PV_LIST_LOCK(lockp);
 				PMAP_LOCK(pmap);
 				mtx_lock(&pv_chunks_mutex);
 				continue;
 			} else if (pmap != locked_pmap) {
 				if (PMAP_TRYLOCK(pmap)) {
 					mtx_lock(&pv_chunks_mutex);
 					continue;
 				} else {
 					pmap = NULL; /* pmap is not locked */
 					mtx_lock(&pv_chunks_mutex);
 					pc = TAILQ_NEXT(pc_marker, pc_lru);
 					if (pc == NULL ||
 					    pc->pc_pmap != next_pmap)
 						continue;
 					goto next_chunk;
 				}
 			}
 		}
 
 		/*
 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
 		 */
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 			    inuse != 0; inuse &= ~(1UL << bit)) {
 				bit = ffsl(inuse) - 1;
 				pv = &pc->pc_pventry[field * 64 + bit];
 				va = pv->pv_va;
 				pde = pmap_pde(pmap, va, &lvl);
 				if (lvl != 2)
 					continue;
 				pte = pmap_l2_to_l3(pde, va);
 				tpte = pmap_load(pte);
 				if ((tpte & ATTR_SW_WIRED) != 0)
 					continue;
 				tpte = pmap_load_clear(pte);
 				m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
 				if (pmap_pte_dirty(pmap, tpte))
 					vm_page_dirty(m);
 				if ((tpte & ATTR_AF) != 0) {
 					pmap_invalidate_page(pmap, va);
 					vm_page_aflag_set(m, PGA_REFERENCED);
 				}
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						vm_page_aflag_clear(m,
 						    PGA_WRITEABLE);
 					}
 				}
 				pc->pc_map[field] |= 1UL << bit;
 				pmap_unuse_pt(pmap, va, pmap_load(pde), &free);
 				freed++;
 			}
 		}
 		if (freed == 0) {
 			mtx_lock(&pv_chunks_mutex);
 			goto next_chunk;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap_resident_count_dec(pmap, freed);
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
 		    pc->pc_map[2] == PC_FREE2) {
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 			PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 			PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 			/* Entire chunk is free; return it. */
 			m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 			dump_drop_page(m_pc->phys_addr);
 			mtx_lock(&pv_chunks_mutex);
 			TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 			break;
 		}
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		mtx_lock(&pv_chunks_mutex);
 		/* One freed pv entry in locked_pmap is sufficient. */
 		if (pmap == locked_pmap)
 			break;
 
 next_chunk:
 		TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
 		TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
 		if (active_reclaims == 1 && pmap != NULL) {
 			/*
 			 * Rotate the pv chunks list so that we do not
 			 * scan the same pv chunks that could not be
 			 * freed (because they contained a wired
 			 * and/or superpage mapping) on every
 			 * invocation of reclaim_pv_chunk().
 			 */
 			while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
 				MPASS(pc->pc_pmap != NULL);
 				TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 				TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 			}
 		}
 	}
 	TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
 	TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
 	active_reclaims--;
 	mtx_unlock(&pv_chunks_mutex);
 	if (pmap != NULL && pmap != locked_pmap)
 		PMAP_UNLOCK(pmap);
 	if (m_pc == NULL && !SLIST_EMPTY(&free)) {
 		m_pc = SLIST_FIRST(&free);
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->ref_count = 1;
 	}
 	vm_page_free_pages_toq(&free, true);
 	return (m_pc);
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 	dump_drop_page(m->phys_addr);
 	vm_page_unwire_noq(m);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * Ensure that the number of spare PV entries in the specified pmap meets or
  * exceeds the given count, "needed".
  *
  * The given PV list lock may be released.
  */
 static void
 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 {
 	struct pch new_tail;
 	struct pv_chunk *pc;
 	vm_page_t m;
 	int avail, free;
 	bool reclaimed;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
 
 	/*
 	 * Newly allocated PV chunks must be stored in a private list until
 	 * the required number of PV chunks have been allocated.  Otherwise,
 	 * reclaim_pv_chunk() could recycle one of these chunks.  In
 	 * contrast, these chunks must be added to the pmap upon allocation.
 	 */
 	TAILQ_INIT(&new_tail);
 retry:
 	avail = 0;
 	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 		bit_count((bitstr_t *)pc->pc_map, 0,
 		    sizeof(pc->pc_map) * NBBY, &free);
 		if (free == 0)
 			break;
 		avail += free;
 		if (avail >= needed)
 			break;
 	}
 	for (reclaimed = false; avail < needed; avail += _NPCPV) {
 		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_WIRED);
 		if (m == NULL) {
 			m = reclaim_pv_chunk(pmap, lockp);
 			if (m == NULL)
 				goto retry;
 			reclaimed = true;
 		}
 		PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 		PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 		dump_add_page(m->phys_addr);
 		pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 		pc->pc_pmap = pmap;
 		pc->pc_map[0] = PC_FREE0;
 		pc->pc_map[1] = PC_FREE1;
 		pc->pc_map[2] = PC_FREE2;
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 		PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
 
 		/*
 		 * The reclaim might have freed a chunk from the current pmap.
 		 * If that chunk contained available entries, we need to
 		 * re-count the number of available entries.
 		 */
 		if (reclaimed)
 			goto retry;
 	}
 	if (!TAILQ_EMPTY(&new_tail)) {
 		mtx_lock(&pv_chunks_mutex);
 		TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 		mtx_unlock(&pv_chunks_mutex);
 	}
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * After demotion from a 2MB page mapping to 512 4KB page mappings,
  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
  * entries for each of the 4KB page mappings.
  */
 static void
 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	struct pv_chunk *pc;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 	int bit, field;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((va & L2_OFFSET) == 0,
 	    ("pmap_pv_demote_l2: va is not 2mpage aligned"));
 	KASSERT((pa & L2_OFFSET) == 0,
 	    ("pmap_pv_demote_l2: pa is not 2mpage aligned"));
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the 2mpage's pv entry for this mapping to the first
 	 * page's pv list.  Once this transfer begins, the pv list lock
 	 * must not be released until the last pv entry is reinstantiated.
 	 */
 	pvh = pa_to_pvh(pa);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	m->md.pv_gen++;
 	/* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */
 	PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1));
 	va_last = va + L2_SIZE - PAGE_SIZE;
 	for (;;) {
 		pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 		KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
 		    pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
 		for (field = 0; field < _NPCM; field++) {
 			while (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				pc->pc_map[field] &= ~(1ul << bit);
 				pv = &pc->pc_pventry[field * 64 + bit];
 				va += PAGE_SIZE;
 				pv->pv_va = va;
 				m++;
 				KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 			    ("pmap_pv_demote_l2: page %p is not managed", m));
 				TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 				if (va == va_last)
 					goto out;
 			}
 		}
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 out:
 	if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 	PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1));
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1));
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Create the PV entry for a 2MB page mapping.  Always returns true unless the
  * flag PMAP_ENTER_NORECLAIM is specified.  If that flag is specified, returns
  * false if the PV entry cannot be allocated without resorting to reclamation.
  */
 static bool
 pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_paddr_t pa;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
 	    NULL : lockp)) == NULL)
 		return (false);
 	pv->pv_va = va;
 	pa = l2e & ~ATTR_MASK;
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	pvh->pv_gen++;
 	return (true);
 }
 
 static void
 pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 {
 	pt_entry_t newl2, oldl2;
 	vm_page_t ml3;
 	vm_paddr_t ml3pa;
 
 	KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
 	KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	ml3 = pmap_remove_pt_page(pmap, va);
 	if (ml3 == NULL)
 		panic("pmap_remove_kernel_l2: Missing pt page");
 
 	ml3pa = VM_PAGE_TO_PHYS(ml3);
 	newl2 = ml3pa | L2_TABLE;
 
 	/*
 	 * If this page table page was unmapped by a promotion, then it
 	 * contains valid mappings.  Zero it to invalidate those mappings.
 	 */
 	if (ml3->valid != 0)
 		pagezero((void *)PHYS_TO_DMAP(ml3pa));
 
 	/*
 	 * Demote the mapping.  The caller must have already invalidated the
 	 * mapping (i.e., the "break" in break-before-make).
 	 */
 	oldl2 = pmap_load_store(l2, newl2);
 	KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
 	    __func__, l2, oldl2));
 }
 
 /*
  * pmap_remove_l2: Do the things to unmap a level 2 superpage.
  */
 static int
 pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
     pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pt_entry_t old_l2;
 	vm_offset_t eva, va;
 	vm_page_t m, ml3;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
 	old_l2 = pmap_load_clear(l2);
 	KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
 	    ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2));
 
 	/*
 	 * Since a promotion must break the 4KB page mappings before making
 	 * the 2MB page mapping, a pmap_invalidate_page() suffices.
 	 */
 	pmap_invalidate_page(pmap, sva);
 
 	if (old_l2 & ATTR_SW_WIRED)
 		pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
 	pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
 	if (old_l2 & ATTR_SW_MANAGED) {
 		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK);
 		pvh = pa_to_pvh(old_l2 & ~ATTR_MASK);
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + L2_SIZE;
 		for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if (pmap_pte_dirty(pmap, old_l2))
 				vm_page_dirty(m);
 			if (old_l2 & ATTR_AF)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		pmap_remove_kernel_l2(pmap, l2, sva);
 	} else {
 		ml3 = pmap_remove_pt_page(pmap, sva);
 		if (ml3 != NULL) {
 			KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_remove_l2: l3 page not promoted"));
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(ml3->ref_count == NL3PG,
 			    ("pmap_remove_l2: l3 page ref count error"));
 			ml3->ref_count = 0;
 			pmap_add_delayed_free_list(ml3, free, FALSE);
 		}
 	}
 	return (pmap_unuse_pt(pmap, sva, l1e, free));
 }
 
 /*
  * pmap_remove_l3: do the things to unmap a page in a process
  */
 static int
 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pt_entry_t old_l3;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	old_l3 = pmap_load_clear(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & ATTR_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (old_l3 & ATTR_SW_MANAGED) {
 		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
 		if (pmap_pte_dirty(pmap, old_l3))
 			vm_page_dirty(m);
 		if (old_l3 & ATTR_AF)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 		if (TAILQ_EMPTY(&m->md.pv_list) &&
 		    (m->flags & PG_FICTITIOUS) == 0) {
 			pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 			if (TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	return (pmap_unuse_pt(pmap, va, l2e, free));
 }
 
 /*
  * Remove the specified range of addresses from the L3 page table that is
  * identified by the given L2 entry.
  */
 static void
 pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
     vm_offset_t eva, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	struct rwlock *new_lock;
 	pt_entry_t *l3, old_l3;
 	vm_offset_t va;
 	vm_page_t l3pg, m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(rounddown2(sva, L2_SIZE) + L2_SIZE == roundup2(eva, L2_SIZE),
 	    ("pmap_remove_l3_range: range crosses an L3 page table boundary"));
 	l3pg = sva < VM_MAXUSER_ADDRESS ? PHYS_TO_VM_PAGE(l2e & ~ATTR_MASK) :
 	    NULL;
 	va = eva;
 	for (l3 = pmap_l2_to_l3(&l2e, sva); sva != eva; l3++, sva += L3_SIZE) {
 		if (!pmap_l3_valid(pmap_load(l3))) {
 			if (va != eva) {
 				pmap_invalidate_range(pmap, va, sva);
 				va = eva;
 			}
 			continue;
 		}
 		old_l3 = pmap_load_clear(l3);
 		if ((old_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 		pmap_resident_count_dec(pmap, 1);
 		if ((old_l3 & ATTR_SW_MANAGED) != 0) {
 			m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
 			if (pmap_pte_dirty(pmap, old_l3))
 				vm_page_dirty(m);
 			if ((old_l3 & ATTR_AF) != 0)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			new_lock = PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m));
 			if (new_lock != *lockp) {
 				if (*lockp != NULL) {
 					/*
 					 * Pending TLB invalidations must be
 					 * performed before the PV list lock is
 					 * released.  Otherwise, a concurrent
 					 * pmap_remove_all() on a physical page
 					 * could return while a stale TLB entry
 					 * still provides access to that page. 
 					 */
 					if (va != eva) {
 						pmap_invalidate_range(pmap, va,
 						    sva);
 						va = eva;
 					}
 					rw_wunlock(*lockp);
 				}
 				*lockp = new_lock;
 				rw_wlock(*lockp);
 			}
 			pmap_pvh_free(&m->md, pmap, sva);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    (m->flags & PG_FICTITIOUS) == 0) {
 				pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 				if (TAILQ_EMPTY(&pvh->pv_list))
 					vm_page_aflag_clear(m, PGA_WRITEABLE);
 			}
 		}
 		if (va == eva)
 			va = sva;
 		if (l3pg != NULL && pmap_unwire_l3(pmap, sva, l3pg, free)) {
 			sva += L3_SIZE;
 			break;
 		}
 	}
 	if (va != eva)
 		pmap_invalidate_range(pmap, va, sva);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
 	vm_offset_t va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t l3_paddr;
 	struct spglist free;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	SLIST_INIT(&free);
 
 	PMAP_LOCK(pmap);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
 
 		l3_paddr = pmap_load(l2);
 
 		if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				pmap_remove_l2(pmap, l2, sva, pmap_load(l1),
 				    &free, &lock);
 				continue;
 			} else if (pmap_demote_l2_locked(pmap, l2, sva,
 			    &lock) == NULL)
 				continue;
 			l3_paddr = pmap_load(l2);
 		}
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		pmap_remove_l3_range(pmap, l3_paddr, sva, va_next, &free,
 		    &lock);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, true);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	vm_offset_t va;
 	struct spglist free;
 	int lvl, pvh_gen, md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 retry:
 	rw_wlock(lock);
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				rw_wunlock(lock);
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		va = pv->pv_va;
 		pte = pmap_pte(pmap, va, &lvl);
 		KASSERT(pte != NULL,
 		    ("pmap_remove_all: no page table entry found"));
 		KASSERT(lvl == 2,
 		    ("pmap_remove_all: invalid pte level %d", lvl));
 
 		pmap_demote_l2_locked(pmap, pte, va, &lock);
 		PMAP_UNLOCK(pmap);
 	}
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				rw_wunlock(lock);
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		pmap_resident_count_dec(pmap, 1);
 
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_remove_all: no page directory entry found"));
 		KASSERT(lvl == 2,
 		    ("pmap_remove_all: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load_clear(pte);
 		if (tpte & ATTR_SW_WIRED)
 			pmap->pm_stats.wired_count--;
 		if ((tpte & ATTR_AF) != 0) {
 			pmap_invalidate_page(pmap, pv->pv_va);
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		}
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pmap_pte_dirty(pmap, tpte))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, tpde, &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(lock);
 	vm_page_free_pages_toq(&free, true);
 }
 
 /*
  * pmap_protect_l2: do the things to protect a 2MB page in a pmap
  */
 static void
 pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
     pt_entry_t nbits)
 {
 	pd_entry_t old_l2;
 	vm_page_t m, mt;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PMAP_ASSERT_STAGE1(pmap);
 	KASSERT((sva & L2_OFFSET) == 0,
 	    ("pmap_protect_l2: sva is not 2mpage aligned"));
 	old_l2 = pmap_load(l2);
 	KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK,
 	    ("pmap_protect_l2: L2e %lx is not a block mapping", old_l2));
 
 	/*
 	 * Return if the L2 entry already has the desired access restrictions
 	 * in place.
 	 */
 retry:
 	if ((old_l2 & mask) == nbits)
 		return;
 
 	/*
 	 * When a dirty read/write superpage mapping is write protected,
 	 * update the dirty field of each of the superpage's constituent 4KB
 	 * pages.
 	 */
 	if ((old_l2 & ATTR_SW_MANAGED) != 0 &&
 	    (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
 	    pmap_pte_dirty(pmap, old_l2)) {
 		m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
 		for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 			vm_page_dirty(mt);
 	}
 
 	if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
 		goto retry;
 
 	/*
 	 * Since a promotion must break the 4KB page mappings before making
 	 * the 2MB page mapping, a pmap_invalidate_page() suffices.
 	 */
 	pmap_invalidate_page(pmap, sva);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va, va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3p, l3, mask, nbits;
 
 	PMAP_ASSERT_STAGE1(pmap);
 	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	mask = nbits = 0;
 	if ((prot & VM_PROT_WRITE) == 0) {
 		mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM;
 		nbits |= ATTR_S1_AP(ATTR_S1_AP_RO);
 	}
 	if ((prot & VM_PROT_EXECUTE) == 0) {
 		mask |= ATTR_S1_XN;
 		nbits |= ATTR_S1_XN;
 	}
 	if (mask == 0)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (pmap_load(l2) == 0)
 			continue;
 
 		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				pmap_protect_l2(pmap, l2, sva, mask, nbits);
 				continue;
 			} else if (pmap_demote_l2(pmap, l2, sva) == NULL)
 				continue;
 		}
 		KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_protect: Invalid L2 entry after demotion"));
 
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
 retry:
 			/*
 			 * Go to the next L3 entry if the current one is
 			 * invalid or already has the desired access
 			 * restrictions in place.  (The latter case occurs
 			 * frequently.  For example, in a "buildworld"
 			 * workload, almost 1 out of 4 L3 entries already
 			 * have the desired restrictions.)
 			 */
 			if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 
 			/*
 			 * When a dirty read/write mapping is write protected,
 			 * update the page's dirty field.
 			 */
 			if ((l3 & ATTR_SW_MANAGED) != 0 &&
 			    (nbits & ATTR_S1_AP(ATTR_S1_AP_RO)) != 0 &&
 			    pmap_pte_dirty(pmap, l3))
 				vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK));
 
 			if (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | nbits))
 				goto retry;
 			if (va == va_next)
 				va = sva;
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  *
  * If "promoted" is false, then the page table page "mpte" must be zero filled.
  */
 static __inline int
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mpte->valid = promoted ? VM_PAGE_BITS_ALL : 0;
 	return (vm_radix_insert(&pmap->pm_root, mpte));
 }
 
 /*
  * Removes the page table page mapping the specified virtual address from the
  * specified pmap's collection of idle page table pages, and returns it.
  * Otherwise, returns NULL if there is no page table page corresponding to the
  * specified virtual address.
  */
 static __inline vm_page_t
 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
 }
 
 /*
  * Performs a break-before-make update of a pmap entry. This is needed when
  * either promoting or demoting pages to ensure the TLB doesn't get into an
  * inconsistent state.
  */
 static void
 pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
     vm_offset_t va, vm_size_t size)
 {
 	register_t intr;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Ensure we don't get switched out with the page table in an
 	 * inconsistent state. We also need to ensure no interrupts fire
 	 * as they may make use of an address we are about to invalidate.
 	 */
 	intr = intr_disable();
 
 	/*
 	 * Clear the old mapping's valid bit, but leave the rest of the entry
 	 * unchanged, so that a lockless, concurrent pmap_kextract() can still
 	 * lookup the physical address.
 	 */
 	pmap_clear_bits(pte, ATTR_DESCR_VALID);
 	pmap_invalidate_range(pmap, va, va + size);
 
 	/* Create the new mapping */
 	pmap_store(pte, newpte);
 	dsb(ishst);
 
 	intr_restore(intr);
 }
 
 #if VM_NRESERVLEVEL > 0
 /*
  * After promotion from 512 4KB page mappings to a single 2MB page mapping,
  * replace the many pv entries for the 4KB page mappings by a single pv entry
  * for the 2MB page mapping.
  */
 static void
 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	KASSERT((pa & L2_OFFSET) == 0,
 	    ("pmap_pv_promote_l2: pa is not 2mpage aligned"));
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the 2mpage's
 	 * pv list.  Aside from avoiding the cost of a call to get_pv_entry(),
 	 * a transfer avoids the possibility that get_pv_entry() calls
 	 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
 	 * mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = va & ~L2_OFFSET;
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	pvh->pv_gen++;
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + L2_SIZE - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single level 2 table entry to a single 2MB page mapping.  For promotion
  * to occur, two conditions must be met: (1) the 4KB page mappings must map
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics.
  */
 static void
 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
     struct rwlock **lockp)
 {
 	pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
 	vm_page_t mpte;
 	vm_offset_t sva;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PMAP_ASSERT_STAGE1(pmap);
 
 	sva = va & ~L2_OFFSET;
 	firstl3 = pmap_l2_to_l3(l2, sva);
 	newl2 = pmap_load(firstl3);
 
 setl2:
 	if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) {
 		atomic_add_long(&pmap_l2_p_failures, 1);
 		CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 
 	if ((newl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
 	    (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
 		if (!atomic_fcmpset_64(l2, &newl2, newl2 & ~ATTR_SW_DBM))
 			goto setl2;
 		newl2 &= ~ATTR_SW_DBM;
 	}
 
 	pa = newl2 + L2_SIZE - PAGE_SIZE;
 	for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
 		oldl3 = pmap_load(l3);
 setl3:
 		if ((oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) ==
 		    (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM)) {
 			if (!atomic_fcmpset_64(l3, &oldl3, oldl3 &
 			    ~ATTR_SW_DBM))
 				goto setl3;
 			oldl3 &= ~ATTR_SW_DBM;
 		}
 		if (oldl3 != pa) {
 			atomic_add_long(&pmap_l2_p_failures, 1);
 			CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		pa -= PAGE_SIZE;
 	}
 
 	/*
 	 * Save the page table page in its current state until the L2
 	 * mapping the superpage is demoted by pmap_demote_l2() or
 	 * destroyed by pmap_remove_l3().
 	 */
 	mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
 	KASSERT(mpte >= vm_page_array &&
 	    mpte < &vm_page_array[vm_page_array_size],
 	    ("pmap_promote_l2: page table page is out of range"));
 	KASSERT(mpte->pindex == pmap_l2_pindex(va),
 	    ("pmap_promote_l2: page table page's pindex is wrong"));
 	if (pmap_insert_pt_page(pmap, mpte, true)) {
 		atomic_add_long(&pmap_l2_p_failures, 1);
 		CTR2(KTR_PMAP,
 		    "pmap_promote_l2: failure for va %#lx in pmap %p", va,
 		    pmap);
 		return;
 	}
 
 	if ((newl2 & ATTR_SW_MANAGED) != 0)
 		pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp);
 
 	newl2 &= ~ATTR_DESCR_MASK;
 	newl2 |= L2_BLOCK;
 
 	pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE);
 
 	atomic_add_long(&pmap_l2_promotions, 1);
 	CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
 		    pmap);
 }
 #endif /* VM_NRESERVLEVEL > 0 */
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	struct rwlock *lock;
 	pd_entry_t *pde;
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l2, *l3;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t nosleep;
 	int lvl, rv;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	va = trunc_page(va);
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		VM_PAGE_OBJECT_BUSY_ASSERT(m);
 	pa = VM_PAGE_TO_PHYS(m);
 	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) |
 	    L3_PAGE);
 	if ((prot & VM_PROT_WRITE) == 0)
 		new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
 	if ((prot & VM_PROT_EXECUTE) == 0 ||
 	    m->md.pv_memattr == VM_MEMATTR_DEVICE)
 		new_l3 |= ATTR_S1_XN;
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		new_l3 |= ATTR_SW_WIRED;
 	if (va < VM_MAXUSER_ADDRESS)
 		new_l3 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
 	else
 		new_l3 |= ATTR_S1_UXN;
 	if (pmap != kernel_pmap)
 		new_l3 |= ATTR_S1_nG;
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l3 |= ATTR_SW_MANAGED;
 		if ((prot & VM_PROT_WRITE) != 0) {
 			new_l3 |= ATTR_SW_DBM;
 			if ((flags & VM_PROT_WRITE) == 0)
 				new_l3 |= ATTR_S1_AP(ATTR_S1_AP_RO);
 		}
 	}
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	lock = NULL;
 	PMAP_LOCK(pmap);
 	if (psind == 1) {
 		/* Assert the required virtual and physical alignment. */
 		KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned"));
 		KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
 		rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK,
 		    flags, m, &lock);
 		goto out;
 	}
 	mpte = NULL;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 retry:
 	pde = pmap_pde(pmap, va, &lvl);
 	if (pde != NULL && lvl == 2) {
 		l3 = pmap_l2_to_l3(pde, va);
 		if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
 			mpte = PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
 			mpte->ref_count++;
 		}
 		goto havel3;
 	} else if (pde != NULL && lvl == 1) {
 		l2 = pmap_l1_to_l2(pde, va);
 		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
 		    (l3 = pmap_demote_l2_locked(pmap, l2, va, &lock)) != NULL) {
 			l3 = &l3[pmap_l3_index(va)];
 			if (va < VM_MAXUSER_ADDRESS) {
 				mpte = PHYS_TO_VM_PAGE(
 				    pmap_load(l2) & ~ATTR_MASK);
 				mpte->ref_count++;
 			}
 			goto havel3;
 		}
 		/* We need to allocate an L3 table. */
 	}
 	if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 
 		/*
 		 * We use _pmap_alloc_l3() instead of pmap_alloc_l3() in order
 		 * to handle the possibility that a superpage mapping for "va"
 		 * was created while we slept.
 		 */
 		mpte = _pmap_alloc_l3(pmap, pmap_l2_pindex(va),
 		    nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 			rv = KERN_RESOURCE_SHORTAGE;
 			goto out;
 		}
 		goto retry;
 	} else
 		panic("pmap_enter: missing L3 table for kernel va %#lx", va);
 
 havel3:
 	orig_l3 = pmap_load(l3);
 	opa = orig_l3 & ~ATTR_MASK;
 	pv = NULL;
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if (pmap_l3_valid(orig_l3)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->ref_count--;
 			KASSERT(mpte->ref_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
 			    (new_l3 & ATTR_SW_DBM) != 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 			goto validate;
 		}
 
 		/*
 		 * The physical page has changed.  Temporarily invalidate
 		 * the mapping.
 		 */
 		orig_l3 = pmap_load_clear(l3);
 		KASSERT((orig_l3 & ~ATTR_MASK) == opa,
 		    ("pmap_enter: unexpected pa update for %#lx", va));
 		if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 			om = PHYS_TO_VM_PAGE(opa);
 
 			/*
 			 * The pmap lock is sufficient to synchronize with
 			 * concurrent calls to pmap_page_test_mappings() and
 			 * pmap_ts_referenced().
 			 */
 			if (pmap_pte_dirty(pmap, orig_l3))
 				vm_page_dirty(om);
 			if ((orig_l3 & ATTR_AF) != 0) {
 				pmap_invalidate_page(pmap, va);
 				vm_page_aflag_set(om, PGA_REFERENCED);
 			}
 			CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 			if ((m->oflags & VPO_UNMANAGED) != 0)
 				free_pv_entry(pmap, pv);
 			if ((om->a.flags & PGA_WRITEABLE) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list) &&
 			    ((om->flags & PG_FICTITIOUS) != 0 ||
 			    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 				vm_page_aflag_clear(om, PGA_WRITEABLE);
 		} else {
 			KASSERT((orig_l3 & ATTR_AF) != 0,
 			    ("pmap_enter: unmanaged mapping lacks ATTR_AF"));
 			pmap_invalidate_page(pmap, va);
 		}
 		orig_l3 = 0;
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((new_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		if (pv == NULL) {
 			pv = get_pv_entry(pmap, &lock);
 			pv->pv_va = va;
 		}
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if ((new_l3 & ATTR_SW_DBM) != 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 validate:
 	/*
 	 * Sync icache if exec permission and attribute VM_MEMATTR_WRITE_BACK
 	 * is set. Do it now, before the mapping is stored and made
 	 * valid for hardware table walk. If done later, then other can
 	 * access this page before caches are properly synced.
 	 * Don't do it for kernel memory which is mapped with exec
 	 * permission even if the memory isn't going to hold executable
 	 * code. The only time when icache sync is needed is after
 	 * kernel module is loaded and the relocation info is processed.
 	 * And it's done in elf_cpu_load_file().
 	*/
 	if ((prot & VM_PROT_EXECUTE) &&  pmap != kernel_pmap &&
 	    m->md.pv_memattr == VM_MEMATTR_WRITE_BACK &&
 	    (opa != pa || (orig_l3 & ATTR_S1_XN)))
 		cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
 
 	/*
 	 * Update the L3 entry
 	 */
 	if (pmap_l3_valid(orig_l3)) {
 		KASSERT(opa == pa, ("pmap_enter: invalid update"));
 		if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) {
 			/* same PA, different attributes */
 			orig_l3 = pmap_load_store(l3, new_l3);
 			pmap_invalidate_page(pmap, va);
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0 &&
 			    pmap_pte_dirty(pmap, orig_l3))
 				vm_page_dirty(m);
 		} else {
 			/*
 			 * orig_l3 == new_l3
 			 * This can happens if multiple threads simultaneously
 			 * access not yet mapped page. This bad for performance
 			 * since this can cause full demotion-NOP-promotion
 			 * cycle.
 			 * Another possible reasons are:
 			 * - VM and pmap memory layout are diverged
 			 * - tlb flush is missing somewhere and CPU doesn't see
 			 *   actual mapping.
 			 */
 			CTR4(KTR_PMAP, "%s: already mapped page - "
 			    "pmap %p va 0x%#lx pte 0x%lx",
 			    __func__, pmap, va, new_l3);
 		}
 	} else {
 		/* New mapping */
 		pmap_store(l3, new_l3);
 		dsb(ishst);
 	}
 
 #if VM_NRESERVLEVEL > 0
 	if ((mpte == NULL || mpte->ref_count == NL3PG) &&
 	    pmap_ps_enabled(pmap) &&
 	    (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0) {
 		pmap_promote_l2(pmap, pde, va, &lock);
 	}
 #endif
 
 	rv = KERN_SUCCESS;
 out:
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
  * if successful.  Returns false if (1) a page table page cannot be allocated
  * without sleeping, (2) a mapping already exists at the specified virtual
  * address, or (3) a PV entry cannot be allocated without reclaiming another
  * PV entry.
  */
 static bool
 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     struct rwlock **lockp)
 {
 	pd_entry_t new_l2;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PMAP_ASSERT_STAGE1(pmap);
 
 	new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT |
 	    ATTR_S1_IDX(m->md.pv_memattr) | ATTR_S1_AP(ATTR_S1_AP_RO) |
 	    L2_BLOCK);
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l2 |= ATTR_SW_MANAGED;
 		new_l2 &= ~ATTR_AF;
 	}
 	if ((prot & VM_PROT_EXECUTE) == 0 ||
 	    m->md.pv_memattr == VM_MEMATTR_DEVICE)
 		new_l2 |= ATTR_S1_XN;
 	if (va < VM_MAXUSER_ADDRESS)
 		new_l2 |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
 	else
 		new_l2 |= ATTR_S1_UXN;
 	if (pmap != kernel_pmap)
 		new_l2 |= ATTR_S1_nG;
 	return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
 	    PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
 	    KERN_SUCCESS);
 }
 
 /*
  * Returns true if every page table entry in the specified page table is
  * zero.
  */
 static bool
 pmap_every_pte_zero(vm_paddr_t pa)
 {
 	pt_entry_t *pt_end, *pte;
 
 	KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned"));
 	pte = (pt_entry_t *)PHYS_TO_DMAP(pa);
 	for (pt_end = pte + Ln_ENTRIES; pte < pt_end; pte++) {
 		if (*pte != 0)
 			return (false);
 	}
 	return (true);
 }
 
 /*
  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
  * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
  * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
  * a mapping already exists at the specified virtual address.  Returns
  * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
  * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
  * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
  *
  * The parameter "m" is only used when creating a managed, writeable mapping.
  */
 static int
 pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
     vm_page_t m, struct rwlock **lockp)
 {
 	struct spglist free;
 	pd_entry_t *l2, old_l2;
 	vm_page_t l2pg, mt;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	if ((l2 = pmap_alloc_l2(pmap, va, &l2pg, (flags &
 	    PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p",
 		    va, pmap);
 		return (KERN_RESOURCE_SHORTAGE);
 	}
 
 	/*
 	 * If there are existing mappings, either abort or remove them.
 	 */
 	if ((old_l2 = pmap_load(l2)) != 0) {
 		KASSERT(l2pg == NULL || l2pg->ref_count > 1,
 		    ("pmap_enter_l2: l2pg's ref count is too low"));
 		if ((flags & PMAP_ENTER_NOREPLACE) != 0 && (va <
 		    VM_MAXUSER_ADDRESS || (old_l2 & ATTR_DESCR_MASK) ==
 		    L2_BLOCK || !pmap_every_pte_zero(old_l2 & ~ATTR_MASK))) {
 			if (l2pg != NULL)
 				l2pg->ref_count--;
 			CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (KERN_FAILURE);
 		}
 		SLIST_INIT(&free);
 		if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK)
 			(void)pmap_remove_l2(pmap, l2, va,
 			    pmap_load(pmap_l1(pmap, va)), &free, lockp);
 		else
 			pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE,
 			    &free, lockp);
 		if (va < VM_MAXUSER_ADDRESS) {
 			vm_page_free_pages_toq(&free, true);
 			KASSERT(pmap_load(l2) == 0,
 			    ("pmap_enter_l2: non-zero L2 entry %p", l2));
 		} else {
 			KASSERT(SLIST_EMPTY(&free),
 			    ("pmap_enter_l2: freed kernel page table page"));
 
 			/*
 			 * Both pmap_remove_l2() and pmap_remove_l3_range()
 			 * will leave the kernel page table page zero filled.
 			 * Nonetheless, the TLB could have an intermediate
 			 * entry for the kernel page table page.
 			 */
 			mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
 			if (pmap_insert_pt_page(pmap, mt, false))
 				panic("pmap_enter_l2: trie insert failed");
 			pmap_clear(l2);
 			pmap_invalidate_page(pmap, va);
 		}
 	}
 
 	if ((new_l2 & ATTR_SW_MANAGED) != 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
 			if (l2pg != NULL)
 				pmap_abort_ptp(pmap, va, l2pg);
 			CTR2(KTR_PMAP,
 			    "pmap_enter_l2: failure for va %#lx in pmap %p",
 			    va, pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		if ((new_l2 & ATTR_SW_DBM) != 0)
 			for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 				vm_page_aflag_set(mt, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Increment counters.
 	 */
 	if ((new_l2 & ATTR_SW_WIRED) != 0)
 		pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
 	pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
 
 	/*
 	 * Map the superpage.
 	 */
 	pmap_store(l2, new_l2);
 	dsb(ishst);
 
 	atomic_add_long(&pmap_l2_mappings, 1);
 	CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p",
 	    va, pmap);
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
 		    m->psind == 1 && pmap_ps_enabled(pmap) &&
 		    pmap_enter_2mpage(pmap, va, m, prot, &lock))
 			m = &m[L2_SIZE / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
 			    &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	pd_entry_t *pde;
 	pt_entry_t *l2, *l3, l3_val;
 	vm_paddr_t pa;
 	int lvl;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PMAP_ASSERT_STAGE1(pmap);
 
 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t l2pindex;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		l2pindex = pmap_l2_pindex(va);
 		if (mpte && (mpte->pindex == l2pindex)) {
 			mpte->ref_count++;
 		} else {
 			/*
 			 * Get the l2 entry
 			 */
 			pde = pmap_pde(pmap, va, &lvl);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (lvl == 1) {
 				l2 = pmap_l1_to_l2(pde, va);
 				if ((pmap_load(l2) & ATTR_DESCR_MASK) ==
 				    L2_BLOCK)
 					return (NULL);
 			}
 			if (lvl == 2 && pmap_load(pde) != 0) {
 				mpte =
 				    PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
 				mpte->ref_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		l3 = &l3[pmap_l3_index(va)];
 	} else {
 		mpte = NULL;
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 		     va));
 		KASSERT(lvl == 2,
 		    ("pmap_enter_quick_locked: Invalid level %d", lvl));
 		l3 = pmap_l2_to_l3(pde, va);
 	}
 
 	/*
 	 * Abort if a mapping already exists.
 	 */
 	if (pmap_load(l3) != 0) {
 		if (mpte != NULL)
 			mpte->ref_count--;
 		return (NULL);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL)
 			pmap_abort_ptp(pmap, va, mpte);
 		return (NULL);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m);
 	l3_val = pa | ATTR_DEFAULT | ATTR_S1_IDX(m->md.pv_memattr) |
 	    ATTR_S1_AP(ATTR_S1_AP_RO) | L3_PAGE;
 	if ((prot & VM_PROT_EXECUTE) == 0 ||
 	    m->md.pv_memattr == VM_MEMATTR_DEVICE)
 		l3_val |= ATTR_S1_XN;
 	if (va < VM_MAXUSER_ADDRESS)
 		l3_val |= ATTR_S1_AP(ATTR_S1_AP_USER) | ATTR_S1_PXN;
 	else
 		l3_val |= ATTR_S1_UXN;
 	if (pmap != kernel_pmap)
 		l3_val |= ATTR_S1_nG;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		l3_val |= ATTR_SW_MANAGED;
 		l3_val &= ~ATTR_AF;
 	}
 
 	/* Sync icache before the mapping is stored to PTE */
 	if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap &&
 	    m->md.pv_memattr == VM_MEMATTR_WRITE_BACK)
 		cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE);
 
 	pmap_store(l3, l3_val);
 	dsb(ishst);
 
 	return (mpte);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (pmap_load(l2) == 0)
 			continue;
 
 		if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if ((pmap_load(l2) & ATTR_SW_WIRED) == 0)
 				panic("pmap_unwire: l2 %#jx is missing "
 				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l2));
 
 			/*
 			 * Are we unwiring the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + L2_SIZE == va_next && eva >= va_next) {
 				pmap_clear_bits(l2, ATTR_SW_WIRED);
 				pmap->pm_stats.wired_count -= L2_SIZE /
 				    PAGE_SIZE;
 				continue;
 			} else if (pmap_demote_l2(pmap, l2, sva) == NULL)
 				panic("pmap_unwire: demotion failed");
 		}
 		KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_unwire: Invalid l2 entry after demotion"));
 
 		if (va_next > eva)
 			va_next = eva;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (pmap_load(l3) == 0)
 				continue;
 			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
 				panic("pmap_unwire: l3 %#jx is missing "
 				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
 
 			/*
 			 * ATTR_SW_WIRED must be cleared atomically.  Although
 			 * the pmap lock synchronizes access to ATTR_SW_WIRED,
 			 * the System MMU may write to the entry concurrently.
 			 */
 			pmap_clear_bits(l3, ATTR_SW_WIRED);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  *
  *	Because the executable mappings created by this routine are copied,
  *	it should not have to flush the instruction cache.
  */
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	struct rwlock *lock;
 	pd_entry_t *l0, *l1, *l2, srcptepaddr;
 	pt_entry_t *dst_pte, mask, nbits, ptetemp, *src_pte;
 	vm_offset_t addr, end_addr, va_next;
 	vm_page_t dst_l2pg, dstmpte, srcmpte;
 
 	PMAP_ASSERT_STAGE1(dst_pmap);
 	PMAP_ASSERT_STAGE1(src_pmap);
 
 	if (dst_addr != src_addr)
 		return;
 	end_addr = src_addr + len;
 	lock = NULL;
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	for (addr = src_addr; addr < end_addr; addr = va_next) {
 		l0 = pmap_l0(src_pmap, addr);
 		if (pmap_load(l0) == 0) {
 			va_next = (addr + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 		l1 = pmap_l0_to_l1(l0, addr);
 		if (pmap_load(l1) == 0) {
 			va_next = (addr + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 		va_next = (addr + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < addr)
 			va_next = end_addr;
 		l2 = pmap_l1_to_l2(l1, addr);
 		srcptepaddr = pmap_load(l2);
 		if (srcptepaddr == 0)
 			continue;
 		if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if ((addr & L2_OFFSET) != 0 ||
 			    addr + L2_SIZE > end_addr)
 				continue;
 			l2 = pmap_alloc_l2(dst_pmap, addr, &dst_l2pg, NULL);
 			if (l2 == NULL)
 				break;
 			if (pmap_load(l2) == 0 &&
 			    ((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
 			    pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
 			    PMAP_ENTER_NORECLAIM, &lock))) {
 				mask = ATTR_AF | ATTR_SW_WIRED;
 				nbits = 0;
 				if ((srcptepaddr & ATTR_SW_DBM) != 0)
 					nbits |= ATTR_S1_AP_RW_BIT;
 				pmap_store(l2, (srcptepaddr & ~mask) | nbits);
 				pmap_resident_count_inc(dst_pmap, L2_SIZE /
 				    PAGE_SIZE);
 				atomic_add_long(&pmap_l2_mappings, 1);
 			} else
 				pmap_abort_ptp(dst_pmap, addr, dst_l2pg);
 			continue;
 		}
 		KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_copy: invalid L2 entry"));
 		srcptepaddr &= ~ATTR_MASK;
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 		KASSERT(srcmpte->ref_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 		if (va_next > end_addr)
 			va_next = end_addr;
 		src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
 		src_pte = &src_pte[pmap_l3_index(addr)];
 		dstmpte = NULL;
 		for (; addr < va_next; addr += PAGE_SIZE, src_pte++) {
 			ptetemp = pmap_load(src_pte);
 
 			/*
 			 * We only virtual copy managed pages.
 			 */
 			if ((ptetemp & ATTR_SW_MANAGED) == 0)
 				continue;
 
 			if (dstmpte != NULL) {
 				KASSERT(dstmpte->pindex == pmap_l2_pindex(addr),
 				    ("dstmpte pindex/addr mismatch"));
 				dstmpte->ref_count++;
 			} else if ((dstmpte = pmap_alloc_l3(dst_pmap, addr,
 			    NULL)) == NULL)
 				goto out;
 			dst_pte = (pt_entry_t *)
 			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
 			dst_pte = &dst_pte[pmap_l3_index(addr)];
 			if (pmap_load(dst_pte) == 0 &&
 			    pmap_try_insert_pv_entry(dst_pmap, addr,
 			    PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) {
 				/*
 				 * Clear the wired, modified, and accessed
 				 * (referenced) bits during the copy.
 				 */
 				mask = ATTR_AF | ATTR_SW_WIRED;
 				nbits = 0;
 				if ((ptetemp & ATTR_SW_DBM) != 0)
 					nbits |= ATTR_S1_AP_RW_BIT;
 				pmap_store(dst_pte, (ptetemp & ~mask) | nbits);
 				pmap_resident_count_inc(dst_pmap, 1);
 			} else {
 				pmap_abort_ptp(dst_pmap, addr, dstmpte);
 				goto out;
 			}
 			/* Have we copied all of the valid mappings? */ 
 			if (dstmpte->ref_count >= srcmpte->ref_count)
 				break;
 		}
 	}
 out:
 	/*
 	 * XXX This barrier may not be needed because the destination pmap is
 	 * not active.
 	 */
 	dsb(ishst);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t m_a, m_b;
 	vm_paddr_t p_a, p_b;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		m_a = ma[a_offset >> PAGE_SHIFT];
 		p_a = m_a->phys_addr;
 		b_pg_offset = b_offset & PAGE_MASK;
 		m_b = mb[b_offset >> PAGE_SHIFT];
 		p_b = m_b->phys_addr;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 			panic("!DMAP a %lx", p_a);
 		} else {
 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 		}
 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 			panic("!DMAP b %lx", p_b);
 		} else {
 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 		}
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 
 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct rwlock *lock;
 	struct md_page *pvh;
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 	int count, lvl, md_gen, pvh_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			pte = pmap_pte(pmap, pv->pv_va, &lvl);
 			if (pte != NULL &&
 			    (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 				count++;
 			PMAP_UNLOCK(pmap);
 		}
 	}
 	rw_runlock(lock);
 	return (count);
 }
 
 /*
  * Returns true if the given page is mapped individually or as part of
  * a 2mpage.  Otherwise, returns false.
  */
 bool
 pmap_page_is_mapped(vm_page_t m)
 {
 	struct rwlock *lock;
 	bool rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (false);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
  *
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, tpte;
 	struct spglist free;
 	vm_page_t m, ml3, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx, lvl;
 	vm_paddr_t pa;
 
 	KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap));
 
 	lock = NULL;
 
 	SLIST_INIT(&free);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffsl(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pde = pmap_pde(pmap, pv->pv_va, &lvl);
 				KASSERT(pde != NULL,
 				    ("Attempting to remove an unmapped page"));
 
 				switch(lvl) {
 				case 1:
 					pte = pmap_l1_to_l2(pde, pv->pv_va);
 					tpte = pmap_load(pte); 
 					KASSERT((tpte & ATTR_DESCR_MASK) ==
 					    L2_BLOCK,
 					    ("Attempting to remove an invalid "
 					    "block: %lx", tpte));
 					break;
 				case 2:
 					pte = pmap_l2_to_l3(pde, pv->pv_va);
 					tpte = pmap_load(pte);
 					KASSERT((tpte & ATTR_DESCR_MASK) ==
 					    L3_PAGE,
 					    ("Attempting to remove an invalid "
 					     "page: %lx", tpte));
 					break;
 				default:
 					panic(
 					    "Invalid page directory level: %d",
 					    lvl);
 				}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & ATTR_SW_WIRED) {
 					allfree = 0;
 					continue;
 				}
 
 				pa = tpte & ~ATTR_MASK;
 
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad pte %#jx",
 				    (uintmax_t)tpte));
 
 				/*
 				 * Because this pmap is not active on other
 				 * processors, the dirty bit cannot have
 				 * changed state since we last loaded pte.
 				 */
 				pmap_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if (pmap_pte_dirty(pmap, tpte)) {
 					switch (lvl) {
 					case 1:
 						for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 							vm_page_dirty(mt);
 						break;
 					case 2:
 						vm_page_dirty(m);
 						break;
 					}
 				}
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 				switch (lvl) {
 				case 1:
 					pmap_resident_count_dec(pmap,
 					    L2_SIZE / PAGE_SIZE);
 					pvh = pa_to_pvh(tpte & ~ATTR_MASK);
 					TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
 					pvh->pv_gen++;
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 							if ((mt->a.flags & PGA_WRITEABLE) != 0 &&
 							    TAILQ_EMPTY(&mt->md.pv_list))
 								vm_page_aflag_clear(mt, PGA_WRITEABLE);
 					}
 					ml3 = pmap_remove_pt_page(pmap,
 					    pv->pv_va);
 					if (ml3 != NULL) {
 						KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
 						    ("pmap_remove_pages: l3 page not promoted"));
 						pmap_resident_count_dec(pmap,1);
 						KASSERT(ml3->ref_count == NL3PG,
 						    ("pmap_remove_pages: l3 page ref count error"));
 						ml3->ref_count = 0;
 						pmap_add_delayed_free_list(ml3,
 						    &free, FALSE);
 					}
 					break;
 				case 2:
 					pmap_resident_count_dec(pmap, 1);
 					TAILQ_REMOVE(&m->md.pv_list, pv,
 					    pv_next);
 					m->md.pv_gen++;
 					if ((m->a.flags & PGA_WRITEABLE) != 0 &&
 					    TAILQ_EMPTY(&m->md.pv_list) &&
 					    (m->flags & PG_FICTITIOUS) == 0) {
 						pvh = pa_to_pvh(
 						    VM_PAGE_TO_PHYS(m));
 						if (TAILQ_EMPTY(&pvh->pv_list))
 							vm_page_aflag_clear(m,
 							    PGA_WRITEABLE);
 					}
 					break;
 				}
 				pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde),
 				    &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	pmap_invalidate_all(pmap);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, true);
 }
 
 /*
  * This is used to check if a page has been accessed or modified.
  */
 static boolean_t
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	pt_entry_t *pte, mask, value;
 	pmap_t pmap;
 	int lvl, md_gen, pvh_gen;
 	boolean_t rv;
 
 	rv = FALSE;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		KASSERT(lvl == 3,
 		    ("pmap_page_test_mappings: Invalid level %d", lvl));
 		mask = 0;
 		value = 0;
 		if (modified) {
 			mask |= ATTR_S1_AP_RW_BIT;
 			value |= ATTR_S1_AP(ATTR_S1_AP_RW);
 		}
 		if (accessed) {
 			mask |= ATTR_AF | ATTR_DESCR_MASK;
 			value |= ATTR_AF | L3_PAGE;
 		}
 		rv = (pmap_load(pte) & mask) == value;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			PMAP_ASSERT_STAGE1(pmap);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			pte = pmap_pte(pmap, pv->pv_va, &lvl);
 			KASSERT(lvl == 2,
 			    ("pmap_page_test_mappings: Invalid level %d", lvl));
 			mask = 0;
 			value = 0;
 			if (modified) {
 				mask |= ATTR_S1_AP_RW_BIT;
 				value |= ATTR_S1_AP(ATTR_S1_AP_RW);
 			}
 			if (accessed) {
 				mask |= ATTR_AF | ATTR_DESCR_MASK;
 				value |= ATTR_AF | L2_BLOCK;
 			}
 			rv = (pmap_load(pte) & mask) == value;
 			PMAP_UNLOCK(pmap);
 			if (rv)
 				goto out;
 		}
 	}
 out:
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not busied then this check is racy.
 	 */
 	if (!pmap_page_is_write_mapped(m))
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *pte;
 	boolean_t rv;
 	int lvl;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, addr, &lvl);
 	if (pte != NULL && pmap_load(pte) != 0) {
 		rv = TRUE;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pv_entry_t next_pv, pv;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 	int lvl, md_gen, pvh_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 	vm_page_assert_busied(m);
 
 	if (!pmap_page_is_write_mapped(m))
 		return;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		va = pv->pv_va;
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		if ((pmap_load(pte) & ATTR_SW_DBM) != 0)
 			(void)pmap_demote_l2_locked(pmap, pte, va, &lock);
 		KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 		    ("inconsistent pv lock %p %p for page %p",
 		    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen ||
 			    md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		oldpte = pmap_load(pte);
 retry:
 		if ((oldpte & ATTR_SW_DBM) != 0) {
 			if (!atomic_fcmpset_long(pte, &oldpte,
 			    (oldpte | ATTR_S1_AP_RW_BIT) & ~ATTR_SW_DBM))
 				goto retry;
 			if ((oldpte & ATTR_S1_AP_RW_BIT) ==
 			    ATTR_S1_AP(ATTR_S1_AP_RW))
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	As an optimization, update the page's dirty field if a modified bit is
  *	found while counting reference bits.  This opportunistic update can be
  *	performed at low cost and can eliminate the need for some future calls
  *	to pmap_is_modified().  However, since this function stops after
  *	finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
  *	dirty pages.  Those dirty pages will only be detected by a future call
  *	to pmap_is_modified().
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	int cleared, lvl, md_gen, not_cleared, pvh_gen;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	cleared = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
 	rw_wlock(lock);
 retry:
 	not_cleared = 0;
 	if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found"));
 		KASSERT(lvl == 1,
 		    ("pmap_ts_referenced: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 		KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE,
 		    ("pmap_ts_referenced: found an invalid l1 table"));
 		pte = pmap_l1_to_l2(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if (pmap_pte_dirty(pmap, tpte)) {
 			/*
 			 * Although "tpte" is mapping a 2MB page, because
 			 * this function is called at a 4KB page granularity,
 			 * we only update the 4KB page under test.
 			 */
 			vm_page_dirty(m);
 		}
 
 		if ((tpte & ATTR_AF) != 0) {
 			/*
 			 * Since this reference bit is shared by 512 4KB pages,
 			 * it should not be cleared every time it is tested.
 			 * Apply a simple "hash" function on the physical page
 			 * number, the virtual superpage number, and the pmap
 			 * address to select one 4KB page out of the 512 on
 			 * which testing the reference bit will result in
 			 * clearing that reference bit.  This function is
 			 * designed to avoid the selection of the same 4KB page
 			 * for every 2MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the superpage is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
 			    (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
 			    (tpte & ATTR_SW_WIRED) == 0) {
 				pmap_clear_bits(pte, ATTR_AF);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 		}
 		if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
 		KASSERT(lvl == 2,
 		    ("pmap_ts_referenced: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 		KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_ts_referenced: found an invalid l2 table"));
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if (pmap_pte_dirty(pmap, tpte))
 			vm_page_dirty(m);
 		if ((tpte & ATTR_AF) != 0) {
 			if ((tpte & ATTR_SW_WIRED) == 0) {
 				pmap_clear_bits(pte, ATTR_AF);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	vm_page_free_pages_toq(&free, true);
 	return (cleared + not_cleared);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	vm_page_t m;
 	pd_entry_t *l0, *l1, *l2, oldl2;
 	pt_entry_t *l3, oldl3;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 		l2 = pmap_l1_to_l2(l1, sva);
 		oldl2 = pmap_load(l2);
 		if (oldl2 == 0)
 			continue;
 		if ((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK) {
 			if ((oldl2 & ATTR_SW_MANAGED) == 0)
 				continue;
 			lock = NULL;
 			if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) {
 				if (lock != NULL)
 					rw_wunlock(lock);
 
 				/*
 				 * The 2MB page mapping was destroyed.
 				 */
 				continue;
 			}
 
 			/*
 			 * Unless the page mappings are wired, remove the
 			 * mapping to a single page so that a subsequent
 			 * access may repromote.  Choosing the last page
 			 * within the address range [sva, min(va_next, eva))
 			 * generally results in more repromotions.  Since the
 			 * underlying page table page is fully populated, this
 			 * removal never frees a page table page.
 			 */
 			if ((oldl2 & ATTR_SW_WIRED) == 0) {
 				va = eva;
 				if (va > va_next)
 					va = va_next;
 				va -= PAGE_SIZE;
 				KASSERT(va >= sva,
 				    ("pmap_advise: no address gap"));
 				l3 = pmap_l2_to_l3(l2, va);
 				KASSERT(pmap_load(l3) != 0,
 				    ("pmap_advise: invalid PTE"));
 				pmap_remove_l3(pmap, l3, va, pmap_load(l2),
 				    NULL, &lock);
 			}
 			if (lock != NULL)
 				rw_wunlock(lock);
 		}
 		KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_advise: invalid L2 entry after demotion"));
 		if (va_next > eva)
 			va_next = eva;
 		va = va_next;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			oldl3 = pmap_load(l3);
 			if ((oldl3 & (ATTR_SW_MANAGED | ATTR_DESCR_MASK)) !=
 			    (ATTR_SW_MANAGED | L3_PAGE))
 				goto maybe_invlrng;
 			else if (pmap_pte_dirty(pmap, oldl3)) {
 				if (advice == MADV_DONTNEED) {
 					/*
 					 * Future calls to pmap_is_modified()
 					 * can be avoided by making the page
 					 * dirty now.
 					 */
 					m = PHYS_TO_VM_PAGE(oldl3 & ~ATTR_MASK);
 					vm_page_dirty(m);
 				}
 				while (!atomic_fcmpset_long(l3, &oldl3,
 				    (oldl3 & ~ATTR_AF) |
 				    ATTR_S1_AP(ATTR_S1_AP_RO)))
 					cpu_spinwait();
 			} else if ((oldl3 & ATTR_AF) != 0)
 				pmap_clear_bits(l3, ATTR_AF);
 			else
 				goto maybe_invlrng;
 			if (va == va_next)
 				va = sva;
 			continue;
 maybe_invlrng:
 			if (va != va_next) {
 				pmap_invalidate_range(pmap, va, sva);
 				va = va_next;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t *l2, oldl2;
 	pt_entry_t *l3, oldl3;
 	vm_offset_t va;
 	int md_gen, pvh_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	vm_page_assert_busied(m);
 
 	if (!pmap_page_is_write_mapped(m))
 		return;
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_wlock(lock);
 restart:
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		va = pv->pv_va;
 		l2 = pmap_l2(pmap, va);
 		oldl2 = pmap_load(l2);
 		/* If oldl2 has ATTR_SW_DBM set, then it is also dirty. */
 		if ((oldl2 & ATTR_SW_DBM) != 0 &&
 		    pmap_demote_l2_locked(pmap, l2, va, &lock) &&
 		    (oldl2 & ATTR_SW_WIRED) == 0) {
 			/*
 			 * Write protect the mapping to a single page so that
 			 * a subsequent write access may repromote.
 			 */
 			va += VM_PAGE_TO_PHYS(m) - (oldl2 & ~ATTR_MASK);
 			l3 = pmap_l2_to_l3(l2, va);
 			oldl3 = pmap_load(l3);
 			while (!atomic_fcmpset_long(l3, &oldl3,
 			    (oldl3 & ~ATTR_SW_DBM) | ATTR_S1_AP(ATTR_S1_AP_RO)))
 				cpu_spinwait();
 			vm_page_dirty(m);
 			pmap_invalidate_page(pmap, va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_ASSERT_STAGE1(pmap);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l2 = pmap_l2(pmap, pv->pv_va);
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
 		oldl3 = pmap_load(l3);
 		if (pmap_l3_valid(oldl3) &&
 		    (oldl3 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM){
 			pmap_set_bits(l3, ATTR_S1_AP(ATTR_S1_AP_RO));
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t va, offset;
 	pd_entry_t *pde;
 	pt_entry_t *l2;
 	int i, lvl, l2_blocks, free_l2_count, start_idx;
 
 	if (!vm_initialized) {
 		/*
 		 * No L3 ptables so map entire L2 blocks where start VA is:
 		 * 	preinit_map_va + start_idx * L2_SIZE
 		 * There may be duplicate mappings (multiple VA -> same PA) but
 		 * ARM64 dcache is always PIPT so that's acceptable.
 		 */
 		 if (size == 0)
 			 return (NULL);
 
 		 /* Calculate how many L2 blocks are needed for the mapping */
 		l2_blocks = (roundup2(pa + size, L2_SIZE) -
 		    rounddown2(pa, L2_SIZE)) >> L2_SHIFT;
 
 		offset = pa & L2_OFFSET;
 
 		if (preinit_map_va == 0)
 			return (NULL);
 
 		/* Map 2MiB L2 blocks from reserved VA space */
 
 		free_l2_count = 0;
 		start_idx = -1;
 		/* Find enough free contiguous VA space */
 		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 			ppim = pmap_preinit_mapping + i;
 			if (free_l2_count > 0 && ppim->pa != 0) {
 				/* Not enough space here */
 				free_l2_count = 0;
 				start_idx = -1;
 				continue;
 			}
 
 			if (ppim->pa == 0) {
 				/* Free L2 block */
 				if (start_idx == -1)
 					start_idx = i;
 				free_l2_count++;
 				if (free_l2_count == l2_blocks)
 					break;
 			}
 		}
 		if (free_l2_count != l2_blocks)
 			panic("%s: too many preinit mappings", __func__);
 
 		va = preinit_map_va + (start_idx * L2_SIZE);
 		for (i = start_idx; i < start_idx + l2_blocks; i++) {
 			/* Mark entries as allocated */
 			ppim = pmap_preinit_mapping + i;
 			ppim->pa = pa;
 			ppim->va = va + offset;
 			ppim->size = size;
 		}
 
 		/* Map L2 blocks */
 		pa = rounddown2(pa, L2_SIZE);
 		for (i = 0; i < l2_blocks; i++) {
 			pde = pmap_pde(kernel_pmap, va, &lvl);
 			KASSERT(pde != NULL,
 			    ("pmap_mapbios: Invalid page entry, va: 0x%lx",
 			    va));
 			KASSERT(lvl == 1,
 			    ("pmap_mapbios: Invalid level %d", lvl));
 
 			/* Insert L2_BLOCK */
 			l2 = pmap_l1_to_l2(pde, va);
 			pmap_load_store(l2,
 			    pa | ATTR_DEFAULT | ATTR_S1_XN |
 			    ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK) | L2_BLOCK);
 
 			va += L2_SIZE;
 			pa += L2_SIZE;
 		}
 		pmap_invalidate_all(kernel_pmap);
 
 		va = preinit_map_va + (start_idx * L2_SIZE);
 
 	} else {
 		/* kva_alloc may be used to map the pages */
 		offset = pa & PAGE_MASK;
 		size = round_page(offset + size);
 
 		va = kva_alloc(size);
 		if (va == 0)
 			panic("%s: Couldn't allocate KVA", __func__);
 
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl));
 
 		/* L3 table is linked */
 		va = trunc_page(va);
 		pa = trunc_page(pa);
 		pmap_kenter(va, size, pa, VM_MEMATTR_WRITE_BACK);
 	}
 
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapbios(vm_offset_t va, vm_size_t size)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t offset, tmpsize, va_trunc;
 	pd_entry_t *pde;
 	pt_entry_t *l2;
 	int i, lvl, l2_blocks, block;
 	bool preinit_map;
 
 	l2_blocks =
 	   (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT;
 	KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size));
 
 	/* Remove preinit mapping */
 	preinit_map = false;
 	block = 0;
 	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 		ppim = pmap_preinit_mapping + i;
 		if (ppim->va == va) {
 			KASSERT(ppim->size == size,
 			    ("pmap_unmapbios: size mismatch"));
 			ppim->va = 0;
 			ppim->pa = 0;
 			ppim->size = 0;
 			preinit_map = true;
 			offset = block * L2_SIZE;
 			va_trunc = rounddown2(va, L2_SIZE) + offset;
 
 			/* Remove L2_BLOCK */
 			pde = pmap_pde(kernel_pmap, va_trunc, &lvl);
 			KASSERT(pde != NULL,
 			    ("pmap_unmapbios: Invalid page entry, va: 0x%lx",
 			    va_trunc));
 			l2 = pmap_l1_to_l2(pde, va_trunc);
 			pmap_clear(l2);
 
 			if (block == (l2_blocks - 1))
 				break;
 			block++;
 		}
 	}
 	if (preinit_map) {
 		pmap_invalidate_all(kernel_pmap);
 		return;
 	}
 
 	/* Unmap the pages reserved with kva_alloc. */
 	if (vm_initialized) {
 		offset = va & PAGE_MASK;
 		size = round_page(offset + size);
 		va = trunc_page(va);
 
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl));
 
 		/* Unmap and invalidate the pages */
                 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 			pmap_kremove(va + tmpsize);
 
 		kva_free(va, size);
 	}
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pv_memattr = ma;
 
 	/*
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
 	    m->md.pv_memattr) != 0)
 		panic("memory attribute change on the direct map failed");
 }
 
 /*
  * Changes the specified virtual address range's memory type to that given by
  * the parameter "mode".  The specified virtual address range must be
  * completely contained within either the direct map or the kernel map.  If
  * the virtual address range is contained within the kernel map, then the
  * memory type for each of the corresponding ranges of the direct map is also
  * changed.  (The corresponding ranges of the direct map are those ranges that
  * map the same physical pages as the specified virtual address range.)  These
  * changes to the direct map are necessary because Intel describes the
  * behavior of their processors as "undefined" if two or more mappings to the
  * same physical page have different memory types.
  *
  * Returns zero if the change completed successfully, and either EINVAL or
  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
  * of the virtual address range was not mapped, and ENOMEM is returned if
  * there was insufficient memory available to complete the change.  In the
  * latter case, the memory type may have been changed on some part of the
  * virtual address range or the direct map.
  */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	int error;
 
 	PMAP_LOCK(kernel_pmap);
 	error = pmap_change_attr_locked(va, size, mode);
 	PMAP_UNLOCK(kernel_pmap);
 	return (error);
 }
 
 static int
 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
 	pt_entry_t l3, *pte, *newpte;
 	int lvl;
 
 	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
 
 	if (!VIRT_IN_DMAP(base) &&
 	    !(base >= VM_MIN_KERNEL_ADDRESS && base < VM_MAX_KERNEL_ADDRESS))
 		return (EINVAL);
 
 	for (tmpva = base; tmpva < base + size; ) {
 		pte = pmap_pte(kernel_pmap, tmpva, &lvl);
 		if (pte == NULL)
 			return (EINVAL);
 
 		if ((pmap_load(pte) & ATTR_S1_IDX_MASK) == ATTR_S1_IDX(mode)) {
 			/*
 			 * We already have the correct attribute,
 			 * ignore this entry.
 			 */
 			switch (lvl) {
 			default:
 				panic("Invalid DMAP table level: %d\n", lvl);
 			case 1:
 				tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
 				break;
 			case 2:
 				tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
 				break;
 			case 3:
 				tmpva += PAGE_SIZE;
 				break;
 			}
 		} else {
 			/*
 			 * Split the entry to an level 3 table, then
 			 * set the new attribute.
 			 */
 			switch (lvl) {
 			default:
 				panic("Invalid DMAP table level: %d\n", lvl);
 			case 1:
 				newpte = pmap_demote_l1(kernel_pmap, pte,
 				    tmpva & ~L1_OFFSET);
 				if (newpte == NULL)
 					return (EINVAL);
 				pte = pmap_l1_to_l2(pte, tmpva);
 			case 2:
 				newpte = pmap_demote_l2(kernel_pmap, pte,
 				    tmpva);
 				if (newpte == NULL)
 					return (EINVAL);
 				pte = pmap_l2_to_l3(pte, tmpva);
 			case 3:
 				/* Update the entry */
 				l3 = pmap_load(pte);
 				l3 &= ~ATTR_S1_IDX_MASK;
 				l3 |= ATTR_S1_IDX(mode);
 				if (mode == VM_MEMATTR_DEVICE)
 					l3 |= ATTR_S1_XN;
 
 				pmap_update_entry(kernel_pmap, pte, l3, tmpva,
 				    PAGE_SIZE);
 
 				/*
 				 * If moving to a non-cacheable entry flush
 				 * the cache.
 				 */
 				if (mode == VM_MEMATTR_UNCACHEABLE)
 					cpu_dcache_wbinv_range(tmpva, L3_SIZE);
 
 				break;
 			}
 			tmpva += PAGE_SIZE;
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Create an L2 table to map all addresses within an L1 mapping.
  */
 static pt_entry_t *
 pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
 {
 	pt_entry_t *l2, newl2, oldl1;
 	vm_offset_t tmpl1;
 	vm_paddr_t l2phys, phys;
 	vm_page_t ml2;
 	int i;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldl1 = pmap_load(l1);
 	KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
 	    ("pmap_demote_l1: Demoting a non-block entry"));
 	KASSERT((va & L1_OFFSET) == 0,
 	    ("pmap_demote_l1: Invalid virtual address %#lx", va));
 	KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
 	    ("pmap_demote_l1: Level 1 table shouldn't be managed"));
 
 	tmpl1 = 0;
 	if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
 		tmpl1 = kva_alloc(PAGE_SIZE);
 		if (tmpl1 == 0)
 			return (NULL);
 	}
 
 	if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (NULL);
 	}
 
 	l2phys = VM_PAGE_TO_PHYS(ml2);
 	l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
 
 	/* Address the range points at */
 	phys = oldl1 & ~ATTR_MASK;
 	/* The attributed from the old l1 table to be copied */
 	newl2 = oldl1 & ATTR_MASK;
 
 	/* Create the new entries */
 	for (i = 0; i < Ln_ENTRIES; i++) {
 		l2[i] = newl2 | phys;
 		phys += L2_SIZE;
 	}
 	KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK),
 	    ("Invalid l2 page (%lx != %lx)", l2[0],
 	    (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
 
 	if (tmpl1 != 0) {
 		pmap_kenter(tmpl1, PAGE_SIZE,
 		    DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET,
 		    VM_MEMATTR_WRITE_BACK);
 		l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
 	}
 
 	pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
 
 	if (tmpl1 != 0) {
 		pmap_kremove(tmpl1);
 		kva_free(tmpl1, PAGE_SIZE);
 	}
 
 	return (l2);
 }
 
 static void
 pmap_fill_l3(pt_entry_t *firstl3, pt_entry_t newl3)
 {
 	pt_entry_t *l3;
 
 	for (l3 = firstl3; l3 - firstl3 < Ln_ENTRIES; l3++) {
 		*l3 = newl3;
 		newl3 += L3_SIZE;
 	}
 }
 
 static void
 pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2,
     struct rwlock **lockp)
 {
 	struct spglist free;
 
 	SLIST_INIT(&free);
 	(void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free,
 	    lockp);
 	vm_page_free_pages_toq(&free, true);
 }
 
 /*
  * Create an L3 table to map all addresses within an L2 mapping.
  */
 static pt_entry_t *
 pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
     struct rwlock **lockp)
 {
 	pt_entry_t *l3, newl3, oldl2;
 	vm_offset_t tmpl2;
 	vm_paddr_t l3phys;
 	vm_page_t ml3;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PMAP_ASSERT_STAGE1(pmap);
 	l3 = NULL;
 	oldl2 = pmap_load(l2);
 	KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
 	    ("pmap_demote_l2: Demoting a non-block entry"));
 	va &= ~L2_OFFSET;
 
 	tmpl2 = 0;
 	if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
 		tmpl2 = kva_alloc(PAGE_SIZE);
 		if (tmpl2 == 0)
 			return (NULL);
 	}
 
 	/*
 	 * Invalidate the 2MB page mapping and return "failure" if the
 	 * mapping was never accessed.
 	 */
 	if ((oldl2 & ATTR_AF) == 0) {
 		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
 		    ("pmap_demote_l2: a wired mapping is missing ATTR_AF"));
 		pmap_demote_l2_abort(pmap, va, l2, lockp);
 		CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx in pmap %p",
 		    va, pmap);
 		goto fail;
 	}
 
 	if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
 		KASSERT((oldl2 & ATTR_SW_WIRED) == 0,
 		    ("pmap_demote_l2: page table page for a wired mapping"
 		    " is missing"));
 
 		/*
 		 * If the page table page is missing and the mapping
 		 * is for a kernel address, the mapping must belong to
 		 * the direct map.  Page table pages are preallocated
 		 * for every other part of the kernel address space,
 		 * so the direct map region is the only part of the
 		 * kernel address space that must be handled here.
 		 */
 		KASSERT(va < VM_MAXUSER_ADDRESS || VIRT_IN_DMAP(va),
 		    ("pmap_demote_l2: No saved mpte for va %#lx", va));
 
 		/*
 		 * If the 2MB page mapping belongs to the direct map
 		 * region of the kernel's address space, then the page
 		 * allocation request specifies the highest possible
 		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the
 		 * priority is normal.
 		 */
 		ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
 		    (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 
 		/*
 		 * If the allocation of the new page table page fails,
 		 * invalidate the 2MB page mapping and return "failure".
 		 */
 		if (ml3 == NULL) {
 			pmap_demote_l2_abort(pmap, va, l2, lockp);
 			CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			goto fail;
 		}
 
 		if (va < VM_MAXUSER_ADDRESS) {
 			ml3->ref_count = NL3PG;
 			pmap_resident_count_inc(pmap, 1);
 		}
 	}
 	l3phys = VM_PAGE_TO_PHYS(ml3);
 	l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
 	newl3 = (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE;
 	KASSERT((oldl2 & (ATTR_S1_AP_RW_BIT | ATTR_SW_DBM)) !=
 	    (ATTR_S1_AP(ATTR_S1_AP_RO) | ATTR_SW_DBM),
 	    ("pmap_demote_l2: L2 entry is writeable but not dirty"));
 
 	/*
 	 * If the page table page is not leftover from an earlier promotion,
 	 * or the mapping attributes have changed, (re)initialize the L3 table.
 	 *
 	 * When pmap_update_entry() clears the old L2 mapping, it (indirectly)
 	 * performs a dsb().  That dsb() ensures that the stores for filling
 	 * "l3" are visible before "l3" is added to the page table.
 	 */
 	if (ml3->valid == 0 || (l3[0] & ATTR_MASK) != (newl3 & ATTR_MASK))
 		pmap_fill_l3(l3, newl3);
 
 	/*
 	 * Map the temporary page so we don't lose access to the l2 table.
 	 */
 	if (tmpl2 != 0) {
 		pmap_kenter(tmpl2, PAGE_SIZE,
 		    DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET,
 		    VM_MEMATTR_WRITE_BACK);
 		l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
 	}
 
 	/*
 	 * The spare PV entries must be reserved prior to demoting the
 	 * mapping, that is, prior to changing the PDE.  Otherwise, the state
 	 * of the L2 and the PV lists will be inconsistent, which can result
 	 * in reclaim_pv_chunk() attempting to remove a PV entry from the
 	 * wrong PV list and pmap_pv_demote_l2() failing to find the expected
 	 * PV entry for the 2MB page mapping that is being demoted.
 	 */
 	if ((oldl2 & ATTR_SW_MANAGED) != 0)
 		reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
 
 	/*
 	 * Pass PAGE_SIZE so that a single TLB invalidation is performed on
 	 * the 2MB page mapping.
 	 */
 	pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
 
 	/*
 	 * Demote the PV entry.
 	 */
 	if ((oldl2 & ATTR_SW_MANAGED) != 0)
 		pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp);
 
 	atomic_add_long(&pmap_l2_demotions, 1);
 	CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx"
 	    " in pmap %p %lx", va, pmap, l3[0]);
 
 fail:
 	if (tmpl2 != 0) {
 		pmap_kremove(tmpl2);
 		kva_free(tmpl2, PAGE_SIZE);
 	}
 
 	return (l3);
 
 }
 
 static pt_entry_t *
 pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 {
 	struct rwlock *lock;
 	pt_entry_t *l3;
 
 	lock = NULL;
 	l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	return (l3);
 }
 
 /*
  * Perform the pmap work for mincore(2).  If the page is not both referenced and
  * modified by this pmap, returns its physical address so that the caller can
  * find other mappings.
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t mask, pa;
 	int lvl, val;
 	bool managed;
 
 	PMAP_ASSERT_STAGE1(pmap);
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, addr, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 
 		switch (lvl) {
 		case 3:
 			mask = L3_OFFSET;
 			break;
 		case 2:
 			mask = L2_OFFSET;
 			break;
 		case 1:
 			mask = L1_OFFSET;
 			break;
 		default:
 			panic("pmap_mincore: invalid level %d", lvl);
 		}
 
 		managed = (tpte & ATTR_SW_MANAGED) != 0;
 		val = MINCORE_INCORE;
 		if (lvl != 3)
 			val |= MINCORE_SUPER;
 		if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed &&
 		    (tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((tpte & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 
 		pa = (tpte & ~ATTR_MASK) | (addr & mask);
 	} else {
 		managed = false;
 		val = 0;
 	}
 
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		*pap = pa;
 	}
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 /*
  * Garbage collect every ASID that is neither active on a processor nor
  * reserved.
  */
 static void
 pmap_reset_asid_set(pmap_t pmap)
 {
 	pmap_t curpmap;
 	int asid, cpuid, epoch;
 	struct asid_set *set;
 
 	PMAP_ASSERT_STAGE1(pmap);
 
 	set = pmap->pm_asid_set;
 	KASSERT(set != NULL, ("%s: NULL asid set", __func__));
 	mtx_assert(&set->asid_set_mutex, MA_OWNED);
 
 	/*
 	 * Ensure that the store to asid_epoch is globally visible before the
 	 * loads from pc_curpmap are performed.
 	 */
 	epoch = set->asid_epoch + 1;
 	if (epoch == INT_MAX)
 		epoch = 0;
 	set->asid_epoch = epoch;
 	dsb(ishst);
 	__asm __volatile("tlbi vmalle1is");
 	dsb(ish);
 	bit_nclear(set->asid_set, ASID_FIRST_AVAILABLE,
 	    set->asid_set_size - 1);
 	CPU_FOREACH(cpuid) {
 		if (cpuid == curcpu)
 			continue;
 		curpmap = pcpu_find(cpuid)->pc_curpmap;
 		KASSERT(curpmap->pm_asid_set == set, ("Incorrect set"));
 		asid = COOKIE_TO_ASID(curpmap->pm_cookie);
 		if (asid == -1)
 			continue;
 		bit_set(set->asid_set, asid);
 		curpmap->pm_cookie = COOKIE_FROM(asid, epoch);
 	}
 }
 
 /*
  * Allocate a new ASID for the specified pmap.
  */
 static void
 pmap_alloc_asid(pmap_t pmap)
 {
 	struct asid_set *set;
 	int new_asid;
 
 	PMAP_ASSERT_STAGE1(pmap);
 	set = pmap->pm_asid_set;
 	KASSERT(set != NULL, ("%s: NULL asid set", __func__));
 
 	mtx_lock_spin(&set->asid_set_mutex);
 
 	/*
 	 * While this processor was waiting to acquire the asid set mutex,
 	 * pmap_reset_asid_set() running on another processor might have
 	 * updated this pmap's cookie to the current epoch.  In which case, we
 	 * don't need to allocate a new ASID.
 	 */
 	if (COOKIE_TO_EPOCH(pmap->pm_cookie) == set->asid_epoch)
 		goto out;
 
 	bit_ffc_at(set->asid_set, set->asid_next, set->asid_set_size,
 	    &new_asid);
 	if (new_asid == -1) {
 		bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
 		    set->asid_next, &new_asid);
 		if (new_asid == -1) {
 			pmap_reset_asid_set(pmap);
 			bit_ffc_at(set->asid_set, ASID_FIRST_AVAILABLE,
 			    set->asid_set_size, &new_asid);
 			KASSERT(new_asid != -1, ("ASID allocation failure"));
 		}
 	}
 	bit_set(set->asid_set, new_asid);
 	set->asid_next = new_asid + 1;
 	pmap->pm_cookie = COOKIE_FROM(new_asid, set->asid_epoch);
 out:
 	mtx_unlock_spin(&set->asid_set_mutex);
 }
 
 /*
  * Compute the value that should be stored in ttbr0 to activate the specified
  * pmap.  This value may change from time to time.
  */
 uint64_t
 pmap_to_ttbr0(pmap_t pmap)
 {
 
 	PMAP_ASSERT_STAGE1(pmap);
 	return (ASID_TO_OPERAND(COOKIE_TO_ASID(pmap->pm_cookie)) |
 	    pmap->pm_l0_paddr);
 }
 
 static bool
 pmap_activate_int(pmap_t pmap)
 {
 	struct asid_set *set;
 	int epoch;
 
 	PMAP_ASSERT_STAGE1(pmap);
 	KASSERT(PCPU_GET(curpmap) != NULL, ("no active pmap"));
 	KASSERT(pmap != kernel_pmap, ("kernel pmap activation"));
 	if (pmap == PCPU_GET(curpmap)) {
 		/*
 		 * Handle the possibility that the old thread was preempted
 		 * after an "ic" or "tlbi" instruction but before it performed
 		 * a "dsb" instruction.  If the old thread migrates to a new
 		 * processor, its completion of a "dsb" instruction on that
 		 * new processor does not guarantee that the "ic" or "tlbi"
 		 * instructions performed on the old processor have completed.
 		 */
 		dsb(ish);
 		return (false);
 	}
 
 	set = pmap->pm_asid_set;
 	KASSERT(set != NULL, ("%s: NULL asid set", __func__));
 
 	/*
 	 * Ensure that the store to curpmap is globally visible before the
 	 * load from asid_epoch is performed.
 	 */
 	PCPU_SET(curpmap, pmap);
 	dsb(ish);
 	epoch = COOKIE_TO_EPOCH(pmap->pm_cookie);
 	if (epoch >= 0 && epoch != set->asid_epoch)
 		pmap_alloc_asid(pmap);
 
 	set_ttbr0(pmap_to_ttbr0(pmap));
 	if (PCPU_GET(bcast_tlbi_workaround) != 0)
 		invalidate_local_icache();
 	return (true);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap;
 
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	PMAP_ASSERT_STAGE1(pmap);
 	critical_enter();
 	(void)pmap_activate_int(pmap);
 	critical_exit();
 }
 
 /*
  * To eliminate the unused parameter "old", we would have to add an instruction
  * to cpu_switch().
  */
 struct pcb *
 pmap_switch(struct thread *old __unused, struct thread *new)
 {
 	pcpu_bp_harden bp_harden;
 	struct pcb *pcb;
 
 	/* Store the new curthread */
 	PCPU_SET(curthread, new);
 
 	/* And the new pcb */
 	pcb = new->td_pcb;
 	PCPU_SET(curpcb, pcb);
 
 	/*
 	 * TODO: We may need to flush the cache here if switching
 	 * to a user process.
 	 */
 
 	if (pmap_activate_int(vmspace_pmap(new->td_proc->p_vmspace))) {
 		/*
 		 * Stop userspace from training the branch predictor against
 		 * other processes. This will call into a CPU specific
 		 * function that clears the branch predictor state.
 		 */
 		bp_harden = PCPU_GET(bp_harden);
 		if (bp_harden != NULL)
 			bp_harden();
 	}
 
 	return (pcb);
 }
 
 void
 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 {
 
 	PMAP_ASSERT_STAGE1(pmap);
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		cpu_icache_sync_range(va, sz);
 	} else {
 		u_int len, offset;
 		vm_paddr_t pa;
 
 		/* Find the length of data in this page to flush */
 		offset = va & PAGE_MASK;
 		len = imin(PAGE_SIZE - offset, sz);
 
 		while (sz != 0) {
 			/* Extract the physical address & find it in the DMAP */
 			pa = pmap_extract(pmap, va);
 			if (pa != 0)
 				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
 
 			/* Move to the next page */
 			sz -= len;
 			va += len;
 			/* Set the length for the next iteration */
 			len = imin(PAGE_SIZE, sz);
 		}
 	}
 }
 
 int
 pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
 {
 	pt_entry_t pte, *ptep;
 	register_t intr;
 	uint64_t ec, par;
 	int lvl, rv;
 
 	PMAP_ASSERT_STAGE1(pmap);
 	rv = KERN_FAILURE;
 
 	ec = ESR_ELx_EXCEPTION(esr);
 	switch (ec) {
 	case EXCP_INSN_ABORT_L:
 	case EXCP_INSN_ABORT:
 	case EXCP_DATA_ABORT_L:
 	case EXCP_DATA_ABORT:
 		break;
 	default:
 		return (rv);
 	}
 
 	/* Data and insn aborts use same encoding for FSC field. */
 	switch (esr & ISS_DATA_DFSC_MASK) {
 	case ISS_DATA_DFSC_AFF_L1:
 	case ISS_DATA_DFSC_AFF_L2:
 	case ISS_DATA_DFSC_AFF_L3:
 		PMAP_LOCK(pmap);
 		ptep = pmap_pte(pmap, far, &lvl);
 		if (ptep != NULL) {
 			pmap_set_bits(ptep, ATTR_AF);
 			rv = KERN_SUCCESS;
 			/*
 			 * XXXMJ as an optimization we could mark the entry
 			 * dirty if this is a write fault.
 			 */
 		}
 		PMAP_UNLOCK(pmap);
 		break;
 	case ISS_DATA_DFSC_PF_L1:
 	case ISS_DATA_DFSC_PF_L2:
 	case ISS_DATA_DFSC_PF_L3:
 		if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) ||
 		    (esr & ISS_DATA_WnR) == 0)
 			return (rv);
 		PMAP_LOCK(pmap);
 		ptep = pmap_pte(pmap, far, &lvl);
 		if (ptep != NULL &&
 		    ((pte = pmap_load(ptep)) & ATTR_SW_DBM) != 0) {
 			if ((pte & ATTR_S1_AP_RW_BIT) ==
 			    ATTR_S1_AP(ATTR_S1_AP_RO)) {
 				pmap_clear_bits(ptep, ATTR_S1_AP_RW_BIT);
 				pmap_invalidate_page(pmap, far);
 			}
 			rv = KERN_SUCCESS;
 		}
 		PMAP_UNLOCK(pmap);
 		break;
 	case ISS_DATA_DFSC_TF_L0:
 	case ISS_DATA_DFSC_TF_L1:
 	case ISS_DATA_DFSC_TF_L2:
 	case ISS_DATA_DFSC_TF_L3:
 		/*
 		 * Retry the translation.  A break-before-make sequence can
 		 * produce a transient fault.
 		 */
 		if (pmap == kernel_pmap) {
 			/*
 			 * The translation fault may have occurred within a
 			 * critical section.  Therefore, we must check the
 			 * address without acquiring the kernel pmap's lock.
 			 */
 			if (pmap_kextract(far) != 0)
 				rv = KERN_SUCCESS;
 		} else {
 			PMAP_LOCK(pmap);
 			/* Ask the MMU to check the address. */
 			intr = intr_disable();
 			par = arm64_address_translate_s1e0r(far);
 			intr_restore(intr);
 			PMAP_UNLOCK(pmap);
 
 			/*
 			 * If the translation was successful, then we can
 			 * return success to the trap handler.
 			 */
 			if (PAR_SUCCESS(par))
 				rv = KERN_SUCCESS;
 		}
 		break;
 	}
 
 	return (rv);
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < L2_SIZE)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & L2_OFFSET;
 	if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
 	    (*addr & L2_OFFSET) == superpage_offset)
 		return;
 	if ((*addr & L2_OFFSET) < superpage_offset)
 		*addr = (*addr & ~L2_OFFSET) + superpage_offset;
 	else
 		*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	int error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(!PHYS_IN_DMAP(paddr))) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic(
 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
 		}
 	}
 }
 
 boolean_t
 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 {
 
 	return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH);
 }
 
 /*
  * Track a range of the kernel's virtual address space that is contiguous
  * in various mapping attributes.
  */
 struct pmap_kernel_map_range {
 	vm_offset_t sva;
 	pt_entry_t attrs;
 	int l3pages;
 	int l3contig;
 	int l2blocks;
 	int l1blocks;
 };
 
 static void
 sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range,
     vm_offset_t eva)
 {
 	const char *mode;
 	int index;
 
 	if (eva <= range->sva)
 		return;
 
 	index = range->attrs & ATTR_S1_IDX_MASK;
 	switch (index) {
 	case ATTR_S1_IDX(VM_MEMATTR_DEVICE):
 		mode = "DEV";
 		break;
 	case ATTR_S1_IDX(VM_MEMATTR_UNCACHEABLE):
 		mode = "UC";
 		break;
 	case ATTR_S1_IDX(VM_MEMATTR_WRITE_BACK):
 		mode = "WB";
 		break;
 	case ATTR_S1_IDX(VM_MEMATTR_WRITE_THROUGH):
 		mode = "WT";
 		break;
 	default:
 		printf(
 		    "%s: unknown memory type %x for range 0x%016lx-0x%016lx\n",
 		    __func__, index, range->sva, eva);
 		mode = "??";
 		break;
 	}
 
 	sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c %3s %d %d %d %d\n",
 	    range->sva, eva,
 	    (range->attrs & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP_RW ? 'w' : '-',
 	    (range->attrs & ATTR_S1_PXN) != 0 ? '-' : 'x',
 	    (range->attrs & ATTR_S1_AP_USER) != 0 ? 'u' : 's',
 	    mode, range->l1blocks, range->l2blocks, range->l3contig,
 	    range->l3pages);
 
 	/* Reset to sentinel value. */
 	range->sva = 0xfffffffffffffffful;
 }
 
 /*
  * Determine whether the attributes specified by a page table entry match those
  * being tracked by the current range.
  */
 static bool
 sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs)
 {
 
 	return (range->attrs == attrs);
 }
 
 static void
 sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va,
     pt_entry_t attrs)
 {
 
 	memset(range, 0, sizeof(*range));
 	range->sva = va;
 	range->attrs = attrs;
 }
 
 /*
  * Given a leaf PTE, derive the mapping's attributes.  If they do not match
  * those of the current run, dump the address range and its attributes, and
  * begin a new run.
  */
 static void
 sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range,
     vm_offset_t va, pd_entry_t l0e, pd_entry_t l1e, pd_entry_t l2e,
     pt_entry_t l3e)
 {
 	pt_entry_t attrs;
 
 	attrs = l0e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
 	attrs |= l1e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
 	if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK)
 		attrs |= l1e & ATTR_S1_IDX_MASK;
 	attrs |= l2e & (ATTR_S1_AP_MASK | ATTR_S1_XN);
 	if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK)
 		attrs |= l2e & ATTR_S1_IDX_MASK;
 	attrs |= l3e & (ATTR_S1_AP_MASK | ATTR_S1_XN | ATTR_S1_IDX_MASK);
 
 	if (range->sva > va || !sysctl_kmaps_match(range, attrs)) {
 		sysctl_kmaps_dump(sb, range, va);
 		sysctl_kmaps_reinit(range, va, attrs);
 	}
 }
 
 static int
 sysctl_kmaps(SYSCTL_HANDLER_ARGS)
 {
 	struct pmap_kernel_map_range range;
 	struct sbuf sbuf, *sb;
 	pd_entry_t l0e, *l1, l1e, *l2, l2e;
 	pt_entry_t *l3, l3e;
 	vm_offset_t sva;
 	vm_paddr_t pa;
 	int error, i, j, k, l;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sb = &sbuf;
 	sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req);
 
 	/* Sentinel value. */
 	range.sva = 0xfffffffffffffffful;
 
 	/*
 	 * Iterate over the kernel page tables without holding the kernel pmap
 	 * lock.  Kernel page table pages are never freed, so at worst we will
 	 * observe inconsistencies in the output.
 	 */
 	for (sva = 0xffff000000000000ul, i = pmap_l0_index(sva); i < Ln_ENTRIES;
 	    i++) {
 		if (i == pmap_l0_index(DMAP_MIN_ADDRESS))
 			sbuf_printf(sb, "\nDirect map:\n");
 		else if (i == pmap_l0_index(VM_MIN_KERNEL_ADDRESS))
 			sbuf_printf(sb, "\nKernel map:\n");
 
 		l0e = kernel_pmap->pm_l0[i];
 		if ((l0e & ATTR_DESCR_VALID) == 0) {
 			sysctl_kmaps_dump(sb, &range, sva);
 			sva += L0_SIZE;
 			continue;
 		}
 		pa = l0e & ~ATTR_MASK;
 		l1 = (pd_entry_t *)PHYS_TO_DMAP(pa);
 
 		for (j = pmap_l1_index(sva); j < Ln_ENTRIES; j++) {
 			l1e = l1[j];
 			if ((l1e & ATTR_DESCR_VALID) == 0) {
 				sysctl_kmaps_dump(sb, &range, sva);
 				sva += L1_SIZE;
 				continue;
 			}
 			if ((l1e & ATTR_DESCR_MASK) == L1_BLOCK) {
 				sysctl_kmaps_check(sb, &range, sva, l0e, l1e,
 				    0, 0);
 				range.l1blocks++;
 				sva += L1_SIZE;
 				continue;
 			}
 			pa = l1e & ~ATTR_MASK;
 			l2 = (pd_entry_t *)PHYS_TO_DMAP(pa);
 
 			for (k = pmap_l2_index(sva); k < Ln_ENTRIES; k++) {
 				l2e = l2[k];
 				if ((l2e & ATTR_DESCR_VALID) == 0) {
 					sysctl_kmaps_dump(sb, &range, sva);
 					sva += L2_SIZE;
 					continue;
 				}
 				if ((l2e & ATTR_DESCR_MASK) == L2_BLOCK) {
 					sysctl_kmaps_check(sb, &range, sva,
 					    l0e, l1e, l2e, 0);
 					range.l2blocks++;
 					sva += L2_SIZE;
 					continue;
 				}
 				pa = l2e & ~ATTR_MASK;
 				l3 = (pt_entry_t *)PHYS_TO_DMAP(pa);
 
 				for (l = pmap_l3_index(sva); l < Ln_ENTRIES;
 				    l++, sva += L3_SIZE) {
 					l3e = l3[l];
 					if ((l3e & ATTR_DESCR_VALID) == 0) {
 						sysctl_kmaps_dump(sb, &range,
 						    sva);
 						continue;
 					}
 					sysctl_kmaps_check(sb, &range, sva,
 					    l0e, l1e, l2e, l3e);
 					if ((l3e & ATTR_CONTIGUOUS) != 0)
 						range.l3contig += l % 16 == 0 ?
 						    1 : 0;
 					else
 						range.l3pages++;
 				}
 			}
 		}
 	}
 
 	error = sbuf_finish(sb);
 	sbuf_delete(sb);
 	return (error);
 }
 SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kmaps, "A",
     "Dump kernel address layout");
Index: head/sys/conf/files.arm
===================================================================
--- head/sys/conf/files.arm	(revision 360081)
+++ head/sys/conf/files.arm	(revision 360082)
@@ -1,166 +1,166 @@
 # $FreeBSD$
 
 kern/kern_clocksource.c			standard
 
 arm/arm/autoconf.c		standard
 arm/arm/bcopy_page.S		standard
 arm/arm/bcopyinout.S		standard
 arm/arm/blockio.S		standard
 arm/arm/bus_space_asm_generic.S	standard
 arm/arm/bus_space_base.c	optional	fdt
 arm/arm/bus_space_generic.c	standard
 arm/arm/busdma_machdep.c 	standard
 arm/arm/copystr.S		standard
 arm/arm/cpufunc.c		standard
 arm/arm/cpufunc_asm.S		standard
 arm/arm/cpufunc_asm_arm9.S 	optional	cpu_arm9e
 arm/arm/cpufunc_asm_arm11x6.S	optional	cpu_arm1176
 arm/arm/cpufunc_asm_armv4.S 	optional	cpu_arm9e
 arm/arm/cpufunc_asm_armv5_ec.S 	optional	cpu_arm9e
 arm/arm/cpufunc_asm_armv7.S	optional	cpu_cortexa | cpu_krait | cpu_mv_pj4b
 arm/arm/cpufunc_asm_pj4b.S	optional	cpu_mv_pj4b
 arm/arm/cpufunc_asm_sheeva.S 	optional	cpu_arm9e
 arm/arm/cpuinfo.c		standard
 arm/arm/cpu_asm-v6.S		optional	armv7 | armv6
 arm/arm/db_disasm.c		optional	ddb
 arm/arm/db_interface.c		optional	ddb
 arm/arm/db_trace.c		optional	ddb
 arm/arm/debug_monitor.c		optional	ddb armv6
 arm/arm/debug_monitor.c		optional	ddb armv7
 arm/arm/disassem.c		optional	ddb
 arm/arm/dump_machdep.c		standard
 arm/arm/elf_machdep.c		standard
 arm/arm/elf_note.S		standard
 arm/arm/exception.S		standard
 arm/arm/fiq.c			standard
 arm/arm/fiq_subr.S		standard
 arm/arm/fusu.S			standard
 arm/arm/gdb_machdep.c		optional	gdb
 arm/arm/generic_timer.c		optional	generic_timer
 arm/arm/gic.c			optional	gic
 arm/arm/gic_fdt.c		optional	gic fdt
 arm/arm/identcpu-v4.c		optional	!armv7 !armv6
 arm/arm/identcpu-v6.c		optional	armv7 | armv6
 arm/arm/in_cksum.c		optional	inet | inet6
 arm/arm/in_cksum_arm.S		optional	inet | inet6
 arm/arm/intr.c			optional	!intrng
 kern/subr_intr.c		optional	intrng
 arm/arm/locore.S		standard	no-obj
 arm/arm/hypervisor-stub.S	optional	armv7 | armv6
 arm/arm/machdep.c		standard
 arm/arm/machdep_boot.c		standard
 arm/arm/machdep_kdb.c		standard
 arm/arm/machdep_intr.c		standard
 arm/arm/machdep_ptrace.c	standard
 arm/arm/mem.c			optional	mem
 arm/arm/minidump_machdep.c	standard
 arm/arm/mp_machdep.c		optional	smp
 arm/arm/mpcore_timer.c		optional	mpcore_timer
 arm/arm/nexus.c			standard
 arm/arm/ofw_machdep.c		optional	fdt
-arm/arm/physmem.c		standard
 arm/arm/pl190.c			optional	pl190
 arm/arm/pl310.c			optional	pl310
 arm/arm/platform.c		optional	platform
 arm/arm/platform_if.m		optional	platform
 arm/arm/platform_pl310_if.m	optional	platform pl310
 arm/arm/pmap-v4.c		optional	!armv7 !armv6
 arm/arm/pmap-v6.c		optional	armv7 | armv6
 arm/arm/pmu.c			optional	pmu | fdt hwpmc
 arm/arm/ptrace_machdep.c	standard
 arm/arm/sc_machdep.c		optional	sc
 arm/arm/setcpsr.S		standard
 arm/arm/setstack.s		standard
 arm/arm/stack_machdep.c		optional	ddb | stack
 arm/arm/stdatomic.c		standard \
 	compile-with "${NORMAL_C:N-Wmissing-prototypes}"
 arm/arm/support.S		standard
 arm/arm/swtch.S			standard
 arm/arm/swtch-v4.S		optional	!armv7 !armv6
 arm/arm/swtch-v6.S		optional	armv7 | armv6
 arm/arm/sys_machdep.c		standard
 arm/arm/syscall.c		standard
 arm/arm/trap-v4.c		optional	!armv7 !armv6
 arm/arm/trap-v6.c		optional	armv7 | armv6
 arm/arm/uio_machdep.c		standard
 arm/arm/undefined.c		standard
 arm/arm/unwind.c		optional	ddb | kdtrace_hooks | stack
 arm/arm/vm_machdep.c		standard
 arm/arm/vfp.c			standard
 arm/cloudabi32/cloudabi32_sysvec.c	optional compat_cloudabi32
 cddl/compat/opensolaris/kern/opensolaris_atomic.c	optional !armv7 !armv6 zfs | !armv7 !armv6 dtrace compile-with "${CDDL_C}"
 cddl/dev/dtrace/arm/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/arm/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/arm/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec | ipsec_support
 crypto/des/des_enc.c		optional	crypto | ipsec | ipsec_support | netsmb
 dev/cpufreq/cpufreq_dt.c	optional	cpufreq fdt
 dev/dwc/if_dwc.c		optional	dwc
 dev/dwc/if_dwc_if.m		optional	dwc
 dev/fb/fb.c			optional	sc
 dev/fdt/fdt_arm_platform.c	optional	platform fdt
 dev/hdmi/hdmi_if.m		optional	hdmi
 dev/hwpmc/hwpmc_arm.c		optional	hwpmc
 dev/hwpmc/hwpmc_armv7.c		optional	hwpmc armv6
 dev/hwpmc/hwpmc_armv7.c		optional	hwpmc armv7
 dev/iicbus/twsi/twsi.c		optional	twsi
 dev/ofw/ofwpci.c		optional	fdt pci
 dev/pci/pci_host_generic.c	optional	pci_host_generic pci
 dev/pci/pci_host_generic_fdt.c	optional	pci_host_generic pci fdt
 dev/psci/psci.c			optional	psci
 dev/psci/smccc_arm.S		optional	psci
 dev/syscons/scgfbrndr.c		optional	sc
 dev/uart/uart_cpu_fdt.c		optional	uart fdt
 
 kern/msi_if.m			optional	intrng
 kern/pic_if.m			optional	intrng
 kern/subr_busdma_bufalloc.c	standard
 kern/subr_devmap.c		standard
+kern/subr_physmem.c		standard
 kern/subr_sfbuf.c		standard
 libkern/arm/aeabi_unwind.c	standard
 libkern/arm/divsi3.S		standard
 libkern/arm/ffs.S		optional	!armv7 !armv6
 libkern/arm/ldivmod.S		standard
 libkern/arm/ldivmod_helper.c	standard
 libkern/arm/memclr.S		standard
 libkern/arm/memcpy.S		standard
 libkern/arm/memset.S		standard
 libkern/arm/muldi3.c		standard
 libkern/ashldi3.c		standard
 libkern/ashrdi3.c		standard
 libkern/divdi3.c		standard
 libkern/ffsl.c			optional	!armv7 !armv6
 libkern/ffsll.c			optional	!armv7 !armv6
 libkern/fls.c			optional	!armv7 !armv6
 libkern/flsl.c			optional	!armv7 !armv6
 libkern/flsll.c			optional	!armv7 !armv6
 libkern/lshrdi3.c		standard
 libkern/memcmp.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard
 libkern/ucmpdi2.c		standard
 libkern/udivdi3.c		standard
 libkern/umoddi3.c		standard
 
 # CloudABI support
 cloudabi32_vdso.o		optional	compat_cloudabi32	\
 	dependency	"$S/contrib/cloudabi/cloudabi_vdso_armv6.S" \
 	compile-with	"${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_armv6.S -o ${.TARGET}" \
 	no-obj no-implicit-rule						\
 	clean		"cloudabi32_vdso.o"
 #
 cloudabi32_vdso_blob.o		optional	compat_cloudabi32	\
 	dependency 	"cloudabi32_vdso.o"				\
 	compile-with	"${OBJCOPY} --input-target binary --output-target elf32-littlearm --binary-architecture arm cloudabi32_vdso.o ${.TARGET}" \
 	no-implicit-rule						\
 	clean		"cloudabi32_vdso_blob.o"
 #
 
 # Annapurna support
 arm/annapurna/alpine/alpine_ccu.c		optional al_ccu fdt
 arm/annapurna/alpine/alpine_nb_service.c	optional al_nb_service fdt
 arm/annapurna/alpine/alpine_pci.c		optional al_pci fdt
 arm/annapurna/alpine/alpine_pci_msix.c		optional al_pci fdt
 arm/annapurna/alpine/alpine_serdes.c		optional al_serdes fdt		\
 	no-depend	\
 	compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}"
Index: head/sys/conf/files.arm64
===================================================================
--- head/sys/conf/files.arm64	(revision 360081)
+++ head/sys/conf/files.arm64	(revision 360082)
@@ -1,335 +1,335 @@
 # $FreeBSD$
 cloudabi32_vdso.o		optional	compat_cloudabi32	\
 	dependency	"$S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S"	\
 	compile-with	"${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_armv6_on_64bit.S -o ${.TARGET}" \
 	no-obj no-implicit-rule						\
 	clean		"cloudabi32_vdso.o"
 #
 cloudabi32_vdso_blob.o		optional	compat_cloudabi32	\
 	dependency 	"cloudabi32_vdso.o"			\
 	compile-with	"${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi32_vdso.o ${.TARGET}" \
 	no-implicit-rule						\
 	clean		"cloudabi32_vdso_blob.o"
 #
 cloudabi64_vdso.o		optional	compat_cloudabi64	\
 	dependency	"$S/contrib/cloudabi/cloudabi_vdso_aarch64.S"	\
 	compile-with	"${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib -Wl,-T$S/compat/cloudabi/cloudabi_vdso.lds $S/contrib/cloudabi/cloudabi_vdso_aarch64.S -o ${.TARGET}" \
 	no-obj no-implicit-rule						\
 	clean		"cloudabi64_vdso.o"
 #
 cloudabi64_vdso_blob.o		optional	compat_cloudabi64	\
 	dependency 	"cloudabi64_vdso.o"			\
 	compile-with	"${OBJCOPY} --input-target binary --output-target elf64-littleaarch64 --binary-architecture aarch64 cloudabi64_vdso.o ${.TARGET}" \
 	no-implicit-rule						\
 	clean		"cloudabi64_vdso_blob.o"
 #
 
 # Allwinner common files
 arm/allwinner/a10_timer.c	optional	a10_timer fdt
 arm/allwinner/a10_codec.c	optional	sound a10_codec
 arm/allwinner/a31_dmac.c	optional	a31_dmac
 arm/allwinner/sunxi_dma_if.m	optional	a31_dmac
 arm/allwinner/aw_cir.c		optional	evdev aw_cir fdt
 arm/allwinner/aw_dwc3.c		optional	aw_dwc3 fdt
 arm/allwinner/aw_gpio.c		optional	gpio aw_gpio fdt
 arm/allwinner/aw_mmc.c		optional	mmc aw_mmc fdt | mmccam aw_mmc fdt
 arm/allwinner/aw_nmi.c		optional	aw_nmi fdt \
 	compile-with "${NORMAL_C} -I$S/gnu/dts/include"
 arm/allwinner/aw_pwm.c		optional	aw_pwm fdt
 arm/allwinner/aw_rsb.c		optional	aw_rsb fdt
 arm/allwinner/aw_rtc.c		optional	aw_rtc fdt
 arm/allwinner/aw_sid.c		optional	aw_sid nvmem fdt
 arm/allwinner/aw_spi.c		optional	aw_spi fdt
 arm/allwinner/aw_syscon.c	optional	aw_syscon ext_resources syscon fdt
 arm/allwinner/aw_thermal.c	optional	aw_thermal nvmem fdt
 arm/allwinner/aw_usbphy.c	optional	ehci aw_usbphy fdt
 arm/allwinner/aw_usb3phy.c	optional	xhci aw_usbphy fdt
 arm/allwinner/aw_wdog.c		optional	aw_wdog fdt
 arm/allwinner/axp81x.c		optional	axp81x fdt
 arm/allwinner/if_awg.c		optional	awg ext_resources syscon aw_sid nvmem fdt
 
 # Allwinner clock driver
 arm/allwinner/clkng/aw_ccung.c		optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_frac.c	optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_m.c		optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_mipi.c	optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_nkmp.c	optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_nm.c		optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_nmm.c	optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_np.c		optional	aw_ccu fdt
 arm/allwinner/clkng/aw_clk_prediv_mux.c	optional	aw_ccu fdt
 arm/allwinner/clkng/ccu_a64.c		optional	soc_allwinner_a64 aw_ccu fdt
 arm/allwinner/clkng/ccu_h3.c		optional	soc_allwinner_h5 aw_ccu fdt
 arm/allwinner/clkng/ccu_h6.c		optional	soc_allwinner_h6 aw_ccu fdt
 arm/allwinner/clkng/ccu_h6_r.c		optional	soc_allwinner_h6 aw_ccu fdt
 arm/allwinner/clkng/ccu_sun8i_r.c	optional	aw_ccu fdt
 arm/allwinner/clkng/ccu_de2.c		optional	aw_ccu fdt
 
 # Allwinner padconf files
 arm/allwinner/a64/a64_padconf.c	optional	soc_allwinner_a64 fdt
 arm/allwinner/a64/a64_r_padconf.c optional	soc_allwinner_a64 fdt
 arm/allwinner/h3/h3_padconf.c	optional	soc_allwinner_h5 fdt
 arm/allwinner/h3/h3_r_padconf.c optional	soc_allwinner_h5 fdt
 arm/allwinner/h6/h6_padconf.c optional		soc_allwinner_h6 fdt
 arm/allwinner/h6/h6_r_padconf.c optional	soc_allwinner_h6 fdt
 
 arm/annapurna/alpine/alpine_ccu.c		optional	al_ccu fdt
 arm/annapurna/alpine/alpine_nb_service.c	optional	al_nb_service fdt
 arm/annapurna/alpine/alpine_pci.c		optional	al_pci fdt
 arm/annapurna/alpine/alpine_pci_msix.c		optional	al_pci fdt
 arm/annapurna/alpine/alpine_serdes.c		optional al_serdes fdt		\
 	no-depend	\
 	compile-with "${CC} -c -o ${.TARGET} ${CFLAGS} -I$S/contrib/alpine-hal -I$S/contrib/alpine-hal/eth ${PROF} ${.IMPSRC}"
 arm/arm/generic_timer.c		standard
 arm/arm/gic.c			standard
 arm/arm/gic_acpi.c		optional	acpi
 arm/arm/gic_fdt.c		optional	fdt
 arm/arm/pmu.c			standard
-arm/arm/physmem.c		standard
 arm/broadcom/bcm2835/bcm2835_audio.c		optional sound vchiq fdt \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 arm/broadcom/bcm2835/bcm2835_bsc.c		optional bcm2835_bsc fdt
 arm/broadcom/bcm2835/bcm2835_clkman.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_cpufreq.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_dma.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_fbd.c		optional vt soc_brcm_bcm2837 fdt | vt soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_ft5406.c		optional evdev bcm2835_ft5406 fdt
 arm/broadcom/bcm2835/bcm2835_gpio.c		optional gpio soc_brcm_bcm2837 fdt | gpio soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_intr.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_mbox.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_rng.c		optional !random_loadable soc_brcm_bcm2837 fdt | !random_loadable soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_sdhci.c		optional sdhci soc_brcm_bcm2837 fdt | sdhci soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_sdhost.c		optional sdhci soc_brcm_bcm2837 fdt | sdhci soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_spi.c		optional bcm2835_spi fdt
 arm/broadcom/bcm2835/bcm2835_vcbus.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_vcio.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2835_wdog.c		optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm2836.c			optional soc_brcm_bcm2837 fdt | soc_brcm_bcm2838 fdt
 arm/broadcom/bcm2835/bcm283x_dwc_fdt.c		optional dwcotg fdt soc_brcm_bcm2837 | dwcotg fdt soc_brcm_bcm2838
 arm/mv/a37x0_gpio.c				optional a37x0_gpio gpio fdt
 arm/mv/a37x0_iic.c				optional a37x0_iic iicbus fdt
 arm/mv/a37x0_spi.c				optional a37x0_spi spibus fdt
 arm/mv/armada38x/armada38x_rtc.c		optional mv_rtc fdt
 arm/mv/gpio.c					optional mv_gpio fdt
 arm/mv/mvebu_pinctrl.c				optional mvebu_pinctrl fdt
 arm/mv/mv_ap806_clock.c				optional SOC_MARVELL_8K fdt
 arm/mv/mv_ap806_gicp.c				optional mv_ap806_gicp fdt
 arm/mv/mv_ap806_sei.c				optional mv_ap806_sei fdt
 arm/mv/mv_cp110_clock.c				optional SOC_MARVELL_8K fdt
 arm/mv/mv_cp110_icu.c				optional mv_cp110_icu fdt
 arm/mv/mv_cp110_icu_bus.c			optional mv_cp110_icu fdt
 arm/mv/mv_thermal.c				optional SOC_MARVELL_8K mv_thermal fdt
 arm/mv/armada38x/armada38x_rtc.c		optional mv_rtc fdt
 arm/xilinx/uart_dev_cdnc.c			optional uart soc_xilinx_zynq
 arm64/acpica/acpi_iort.c	optional	acpi
 arm64/acpica/acpi_machdep.c	optional	acpi
 arm64/acpica/OsdEnvironment.c	optional	acpi
 arm64/acpica/acpi_wakeup.c	optional	acpi
 arm64/acpica/pci_cfgreg.c	optional	acpi	pci
 arm64/arm64/autoconf.c		standard
 arm64/arm64/bus_machdep.c	standard
 arm64/arm64/bus_space_asm.S	standard
 arm64/arm64/busdma_bounce.c	standard
 arm64/arm64/busdma_machdep.c	standard
 arm64/arm64/bzero.S		standard
 arm64/arm64/clock.c		standard
 arm64/arm64/copyinout.S		standard
 arm64/arm64/copystr.c		standard
 arm64/arm64/cpu_errata.c	standard
 arm64/arm64/cpufunc_asm.S	standard
 arm64/arm64/db_disasm.c		optional	ddb
 arm64/arm64/db_interface.c	optional	ddb
 arm64/arm64/db_trace.c		optional	ddb
 arm64/arm64/debug_monitor.c	standard
 arm64/arm64/disassem.c		optional	ddb
 arm64/arm64/dump_machdep.c	standard
 arm64/arm64/efirt_machdep.c	optional	efirt
 arm64/arm64/elf32_machdep.c	optional	compat_freebsd32
 arm64/arm64/elf_machdep.c	standard
 arm64/arm64/exception.S		standard
 arm64/arm64/freebsd32_machdep.c	optional	compat_freebsd32
 arm64/arm64/gicv3_its.c		optional	intrng fdt
 arm64/arm64/gic_v3.c		standard
 arm64/arm64/gic_v3_acpi.c	optional	acpi
 arm64/arm64/gic_v3_fdt.c	optional	fdt
 arm64/arm64/identcpu.c		standard
 arm64/arm64/in_cksum.c		optional	inet | inet6
 arm64/arm64/locore.S		standard	no-obj
 arm64/arm64/machdep.c		standard
 arm64/arm64/machdep_boot.c	standard
 arm64/arm64/mem.c		standard
 arm64/arm64/memcpy.S		standard
 arm64/arm64/memmove.S		standard
 arm64/arm64/minidump_machdep.c	standard
 arm64/arm64/mp_machdep.c	optional	smp
 arm64/arm64/nexus.c		standard
 arm64/arm64/ofw_machdep.c	optional	fdt
 arm64/arm64/pmap.c		standard
 arm64/arm64/stack_machdep.c	optional	ddb | stack
 arm64/arm64/support.S		standard
 arm64/arm64/swtch.S		standard
 arm64/arm64/sys_machdep.c	standard
 arm64/arm64/trap.c		standard
 arm64/arm64/uio_machdep.c	standard
 arm64/arm64/uma_machdep.c	standard
 arm64/arm64/undefined.c		standard
 arm64/arm64/unwind.c		optional	ddb | kdtrace_hooks | stack
 arm64/arm64/vfp.c		standard
 arm64/arm64/vm_machdep.c	standard
 arm64/broadcom/brcmmdio/mdio_mux_iproc.c	optional	fdt
 arm64/broadcom/brcmmdio/mdio_nexus_iproc.c	optional	fdt
 arm64/broadcom/brcmmdio/mdio_ns2_pcie_phy.c	optional	fdt pci
 arm64/cavium/thunder_pcie_fdt.c		optional	soc_cavm_thunderx pci fdt
 arm64/cavium/thunder_pcie_pem.c		optional	soc_cavm_thunderx pci
 arm64/cavium/thunder_pcie_pem_fdt.c	optional	soc_cavm_thunderx pci fdt
 arm64/cavium/thunder_pcie_common.c	optional	soc_cavm_thunderx pci
 arm64/cloudabi32/cloudabi32_sysvec.c	optional compat_cloudabi32
 arm64/cloudabi64/cloudabi64_sysvec.c	optional compat_cloudabi64
 arm64/coresight/coresight.c			standard
 arm64/coresight/coresight_if.m			standard
 arm64/coresight/coresight-cmd.c			standard
 arm64/coresight/coresight-cpu-debug.c		standard
 arm64/coresight/coresight-dynamic-replicator.c	standard
 arm64/coresight/coresight-etm4x.c		standard
 arm64/coresight/coresight-funnel.c		standard
 arm64/coresight/coresight-tmc.c			standard
 arm64/intel/firmware.c				optional soc_intel_stratix10
 arm64/intel/stratix10-soc-fpga-mgr.c		optional soc_intel_stratix10
 arm64/intel/stratix10-svc.c			optional soc_intel_stratix10
 arm64/qualcomm/qcom_gcc.c			optional qcom_gcc fdt
 contrib/vchiq/interface/compat/vchi_bsd.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_arm.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -Wno-unused -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_connected.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_core.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_kern_lib.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_kmod.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_shim.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 contrib/vchiq/interface/vchiq_arm/vchiq_util.c	optional vchiq soc_brcm_bcm2837 \
 	compile-with "${NORMAL_C} -DUSE_VCHIQ_ARM -D__VCCOREVER__=0x04000000 -I$S/contrib/vchiq"
 crypto/armv8/armv8_crypto.c	optional	armv8crypto
 armv8_crypto_wrap.o		optional	armv8crypto		\
 	dependency	"$S/crypto/armv8/armv8_crypto_wrap.c"		\
 	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8/ ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \
 	no-implicit-rule						\
 	clean		"armv8_crypto_wrap.o"
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec | ipsec_support
 crypto/des/des_enc.c		optional	crypto | ipsec | ipsec_support | netsmb
 dev/acpica/acpi_bus_if.m	optional	acpi
 dev/acpica/acpi_if.m		optional	acpi
 dev/acpica/acpi_pci_link.c	optional	acpi pci
 dev/acpica/acpi_pcib.c		optional	acpi pci
 dev/acpica/acpi_pxm.c		optional	acpi
 dev/ahci/ahci_generic.c		optional	ahci
 dev/altera/dwc/if_dwc_socfpga.c	optional	fdt dwc_socfpga
 dev/axgbe/if_axgbe.c		optional	axgbe
 dev/axgbe/xgbe-desc.c		optional	axgbe
 dev/axgbe/xgbe-dev.c		optional	axgbe
 dev/axgbe/xgbe-drv.c		optional	axgbe
 dev/axgbe/xgbe-mdio.c		optional	axgbe
 dev/cpufreq/cpufreq_dt.c	optional	cpufreq fdt
 dev/iicbus/sy8106a.c		optional	sy8106a fdt
 dev/iicbus/twsi/mv_twsi.c	optional	twsi fdt
 dev/iicbus/twsi/a10_twsi.c	optional	twsi fdt
 dev/iicbus/twsi/twsi.c		optional	twsi fdt
 dev/hwpmc/hwpmc_arm64.c		optional	hwpmc
 dev/hwpmc/hwpmc_arm64_md.c	optional	hwpmc
 dev/mbox/mbox_if.m		optional	soc_brcm_bcm2837
 dev/mmc/host/dwmmc.c		optional	dwmmc fdt
 dev/mmc/host/dwmmc_altera.c	optional	dwmmc dwmmc_altera fdt
 dev/mmc/host/dwmmc_hisi.c	optional	dwmmc dwmmc_hisi fdt
 dev/mmc/host/dwmmc_rockchip.c	optional	dwmmc rk_dwmmc fdt
 dev/neta/if_mvneta_fdt.c	optional	neta fdt
 dev/neta/if_mvneta.c		optional	neta mdio mii
 dev/ofw/ofw_cpu.c		optional	fdt
 dev/ofw/ofwpci.c		optional 	fdt pci
 dev/pci/controller/pci_n1sdp.c	optional	pci_n1sdp acpi
 dev/pci/pci_host_generic.c	optional	pci
 dev/pci/pci_host_generic_acpi.c	optional	pci acpi
 dev/pci/pci_host_generic_fdt.c	optional	pci fdt
 dev/pci/pci_dw_mv.c		optional	pci fdt
 dev/pci/pci_dw.c		optional	pci fdt
 dev/pci/pci_dw_if.m		optional	pci fdt
 dev/psci/psci.c			standard
 dev/psci/smccc_arm64.S		standard
 dev/psci/smccc.c		standard
 dev/sdhci/sdhci_xenon.c		optional	sdhci_xenon sdhci fdt
 dev/uart/uart_cpu_arm64.c	optional	uart
 dev/uart/uart_dev_mu.c		optional	uart uart_mu
 dev/uart/uart_dev_pl011.c	optional	uart pl011
 dev/usb/controller/dwc_otg_hisi.c optional	dwcotg fdt soc_hisi_hi6220
 dev/usb/controller/dwc3.c	optional fdt dwc3
 dev/usb/controller/ehci_mv.c	optional	ehci_mv fdt
 dev/usb/controller/generic_ehci.c optional	ehci
 dev/usb/controller/generic_ehci_acpi.c optional	ehci acpi
 dev/usb/controller/generic_ehci_fdt.c optional	ehci fdt
 dev/usb/controller/generic_ohci.c optional	ohci fdt
 dev/usb/controller/generic_usb_if.m optional	ohci fdt
 dev/usb/controller/usb_nop_xceiv.c	optional fdt ext_resources
 dev/usb/controller/generic_xhci.c	optional	xhci
 dev/usb/controller/generic_xhci_acpi.c	optional	xhci acpi
 dev/usb/controller/generic_xhci_fdt.c	optional	xhci fdt
 dev/vnic/mrml_bridge.c		optional	vnic fdt
 dev/vnic/nic_main.c		optional	vnic pci
 dev/vnic/nicvf_main.c		optional	vnic pci pci_iov
 dev/vnic/nicvf_queues.c		optional	vnic pci pci_iov
 dev/vnic/thunder_bgx_fdt.c	optional	vnic fdt
 dev/vnic/thunder_bgx.c		optional	vnic pci
 dev/vnic/thunder_mdio_fdt.c	optional	vnic fdt
 dev/vnic/thunder_mdio.c		optional	vnic
 dev/vnic/lmac_if.m		optional	inet | inet6 | vnic
 kern/kern_clocksource.c		standard
 kern/msi_if.m			optional	intrng
 kern/pic_if.m			optional	intrng
 kern/subr_devmap.c		standard
 kern/subr_intr.c		optional	intrng
+kern/subr_physmem.c		standard
 libkern/bcmp.c			standard
 libkern/memcmp.c		standard				\
 	compile-with "${NORMAL_C:N-fsanitize*}"
 libkern/memset.c		standard				\
 	compile-with "${NORMAL_C:N-fsanitize*}"
 libkern/arm64/crc32c_armv8.S	standard
 cddl/dev/dtrace/aarch64/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/aarch64/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/aarch64/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 
 # RockChip Drivers
 arm64/rockchip/rk3399_emmcphy.c		optional fdt rk_emmcphy soc_rockchip_rk3399
 arm64/rockchip/rk_dwc3.c		optional fdt rk_dwc3 soc_rockchip_rk3399
 arm64/rockchip/rk_i2c.c			optional fdt rk_i2c soc_rockchip_rk3328 | fdt rk_i2c soc_rockchip_rk3399
 arm64/rockchip/rk805.c			optional fdt rk805 soc_rockchip_rk3328 | fdt rk805 soc_rockchip_rk3399
 arm64/rockchip/rk_grf.c			optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/rk_pinctrl.c		optional fdt rk_pinctrl soc_rockchip_rk3328 | fdt rk_pinctrl soc_rockchip_rk3399
 arm64/rockchip/rk_gpio.c		optional fdt rk_gpio soc_rockchip_rk3328 | fdt rk_gpio soc_rockchip_rk3399
 arm64/rockchip/rk_iodomain.c		optional fdt rk_iodomain
 arm64/rockchip/rk_spi.c			optional fdt rk_spi
 arm64/rockchip/rk_usb2phy.c		optional fdt rk_usb2phy soc_rockchip_rk3328 | soc_rockchip_rk3399
 arm64/rockchip/rk_typec_phy.c		optional fdt rk_typec_phy soc_rockchip_rk3399
 arm64/rockchip/if_dwc_rk.c		optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399
 arm64/rockchip/rk_tsadc_if.m		optional fdt soc_rockchip_rk3399
 arm64/rockchip/rk_tsadc.c		optional fdt soc_rockchip_rk3399
 arm64/rockchip/rk_pwm.c			optional fdt rk_pwm
 arm64/rockchip/rk_pcie.c		optional fdt pci soc_rockchip_rk3399
 arm64/rockchip/rk_pcie_phy.c		optional fdt pci soc_rockchip_rk3399
 dev/dwc/if_dwc.c			optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399
 dev/dwc/if_dwc_if.m			optional fdt dwc_rk soc_rockchip_rk3328 | fdt dwc_rk soc_rockchip_rk3399
 
 # RockChip Clock support
 arm64/rockchip/clk/rk_cru.c		optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_armclk.c	optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_composite.c	optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_fract.c	optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_gate.c	optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_mux.c		optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk_clk_pll.c		optional fdt soc_rockchip_rk3328 | fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk3328_cru.c		optional fdt soc_rockchip_rk3328
 arm64/rockchip/clk/rk3399_cru.c		optional fdt soc_rockchip_rk3399
 arm64/rockchip/clk/rk3399_pmucru.c	optional fdt soc_rockchip_rk3399
Index: head/sys/kern/subr_physmem.c
===================================================================
--- head/sys/kern/subr_physmem.c	(nonexistent)
+++ head/sys/kern/subr_physmem.c	(revision 360082)
@@ -0,0 +1,399 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#include "opt_ddb.h"
+
+/*
+ * Routines for describing and initializing anything related to physical memory.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/physmem.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+#include <machine/md_var.h>
+
+/*
+ * These structures are used internally to keep track of regions of physical
+ * ram, and regions within the physical ram that need to be excluded.  An
+ * exclusion region can be excluded from crash dumps, from the vm pool of pages
+ * that can be allocated, or both, depending on the exclusion flags associated
+ * with the region.
+ */
+#ifdef DEV_ACPI
+#define	MAX_HWCNT	32	/* ACPI needs more regions */
+#define	MAX_EXCNT	32
+#else
+#define	MAX_HWCNT	16
+#define	MAX_EXCNT	16
+#endif
+
+#if defined(__arm__)
+#define	MAX_PHYS_ADDR	0xFFFFFFFFull
+#elif defined(__aarch64__) || defined(__riscv)
+#define	MAX_PHYS_ADDR	0xFFFFFFFFFFFFFFFFull
+#endif
+
+struct region {
+	vm_paddr_t	addr;
+	vm_size_t	size;
+	uint32_t	flags;
+};
+
+static struct region hwregions[MAX_HWCNT];
+static struct region exregions[MAX_EXCNT];
+
+static size_t hwcnt;
+static size_t excnt;
+
+/*
+ * realmem is the total number of hardware pages, excluded or not.
+ * Maxmem is one greater than the last physical page number.
+ */
+long realmem;
+long Maxmem;
+
+/*
+ * Print the contents of the physical and excluded region tables using the
+ * provided printf-like output function (which will be either printf or
+ * db_printf).
+ */
+static void
+physmem_dump_tables(int (*prfunc)(const char *, ...))
+{
+	int flags, i;
+	uintmax_t addr, size;
+	const unsigned int mbyte = 1024 * 1024;
+
+	prfunc("Physical memory chunk(s):\n");
+	for (i = 0; i < hwcnt; ++i) {
+		addr = hwregions[i].addr;
+		size = hwregions[i].size;
+		prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages)\n", addr,
+		    addr + size - 1, size / mbyte, size / PAGE_SIZE);
+	}
+
+	prfunc("Excluded memory regions:\n");
+	for (i = 0; i < excnt; ++i) {
+		addr  = exregions[i].addr;
+		size  = exregions[i].size;
+		flags = exregions[i].flags;
+		prfunc("  0x%08jx - 0x%08jx, %5ju MB (%7ju pages) %s %s\n",
+		    addr, addr + size - 1, size / mbyte, size / PAGE_SIZE,
+		    (flags & EXFLAG_NOALLOC) ? "NoAlloc" : "",
+		    (flags & EXFLAG_NODUMP)  ? "NoDump" : "");
+	}
+
+#ifdef DEBUG
+	prfunc("Avail lists:\n");
+	for (i = 0; phys_avail[i] != 0; ++i) {
+		prfunc("  phys_avail[%d] 0x%08x\n", i, phys_avail[i]);
+	}
+	for (i = 0; dump_avail[i] != 0; ++i) {
+		prfunc("  dump_avail[%d] 0x%08x\n", i, dump_avail[i]);
+	}
+#endif
+}
+
+/*
+ * Print the contents of the static mapping table.  Used for bootverbose.
+ */
+void
+physmem_print_tables(void)
+{
+
+	physmem_dump_tables(printf);
+}
+
+/*
+ * Walk the list of hardware regions, processing it against the list of
+ * exclusions that contain the given exflags, and generating an "avail list".
+ *
+ * Updates the value at *pavail with the sum of all pages in all hw regions.
+ *
+ * Returns the number of pages of non-excluded memory added to the avail list.
+ */
+static size_t
+regions_to_avail(vm_paddr_t *avail, uint32_t exflags, size_t maxavail,
+    long *pavail, long *prealmem)
+{
+	size_t acnt, exi, hwi;
+	uint64_t end, start, xend, xstart;
+	long availmem, totalmem;
+	const struct region *exp, *hwp;
+
+	totalmem = 0;
+	availmem = 0;
+	acnt = 0;
+	for (hwi = 0, hwp = hwregions; hwi < hwcnt; ++hwi, ++hwp) {
+		start = hwp->addr;
+		end   = hwp->size + start;
+		totalmem += atop((vm_offset_t)(end - start));
+		for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) {
+			/*
+			 * If the excluded region does not match given flags,
+			 * continue checking with the next excluded region.
+			 */
+			if ((exp->flags & exflags) == 0)
+				continue;
+			xstart = exp->addr;
+			xend   = exp->size + xstart;
+			/*
+			 * If the excluded region ends before this hw region,
+			 * continue checking with the next excluded region.
+			 */
+			if (xend <= start)
+				continue;
+			/*
+			 * If the excluded region begins after this hw region
+			 * we're done because both lists are sorted.
+			 */
+			if (xstart >= end)
+				break;
+			/*
+			 * If the excluded region completely covers this hw
+			 * region, shrink this hw region to zero size.
+			 */
+			if ((start >= xstart) && (end <= xend)) {
+				start = xend;
+				end = xend;
+				break;
+			}
+			/*
+			 * If the excluded region falls wholly within this hw
+			 * region without abutting or overlapping the beginning
+			 * or end, create an available entry from the leading
+			 * fragment, then adjust the start of this hw region to
+			 * the end of the excluded region, and continue checking
+			 * the next excluded region because another exclusion
+			 * could affect the remainder of this hw region.
+			 */
+			if ((xstart > start) && (xend < end)) {
+				if (acnt > 0 &&
+				    avail[acnt - 1] == (vm_paddr_t)start) {
+					avail[acnt - 1] = (vm_paddr_t)xstart;
+				} else {
+					avail[acnt++] = (vm_paddr_t)start;
+					avail[acnt++] = (vm_paddr_t)xstart;
+				}
+				availmem += atop((vm_offset_t)(xstart - start));
+				start = xend;
+				continue;
+			}
+			/*
+			 * We know the excluded region overlaps either the start
+			 * or end of this hardware region (but not both), trim
+			 * the excluded portion off the appropriate end.
+			 */
+			if (xstart <= start)
+				start = xend;
+			else
+				end = xstart;
+		}
+		/*
+		 * If the trimming actions above left a non-zero size, create an
+		 * available entry for it.
+		 */
+		if (end > start) {
+			if (acnt > 0 && avail[acnt - 1] == (vm_paddr_t)start) {
+				avail[acnt - 1] = (vm_paddr_t)end;
+			} else {
+				avail[acnt++] = (vm_paddr_t)start;
+				avail[acnt++] = (vm_paddr_t)end;
+			}
+			availmem += atop((vm_offset_t)(end - start));
+		}
+		if (acnt >= maxavail)
+			panic("Not enough space in the dump/phys_avail arrays");
+	}
+
+	if (pavail != NULL)
+		*pavail = availmem;
+	if (prealmem != NULL)
+		*prealmem = totalmem;
+	return (acnt);
+}
+
+/*
+ * Insertion-sort a new entry into a regions list; sorted by start address.
+ */
+static size_t
+insert_region(struct region *regions, size_t rcnt, vm_paddr_t addr,
+    vm_size_t size, uint32_t flags)
+{
+	size_t i;
+	struct region *ep, *rp;
+
+	ep = regions + rcnt;
+	for (i = 0, rp = regions; i < rcnt; ++i, ++rp) {
+		if (rp->addr == addr && rp->size == size) /* Pure dup. */
+			return (rcnt);
+		if (flags == rp->flags) {
+			if (addr + size == rp->addr) {
+				rp->addr = addr;
+				rp->size += size;
+				return (rcnt);
+			} else if (rp->addr + rp->size == addr) {
+				rp->size += size;
+				return (rcnt);
+			}
+		}
+		if (addr < rp->addr) {
+			bcopy(rp, rp + 1, (ep - rp) * sizeof(*rp));
+			break;
+		}
+	}
+	rp->addr  = addr;
+	rp->size  = size;
+	rp->flags = flags;
+	rcnt++;
+
+	return (rcnt);
+}
+
+/*
+ * Add a hardware memory region.
+ */
+void
+physmem_hardware_region(uint64_t pa, uint64_t sz)
+{
+	vm_offset_t adj;
+
+	/*
+	 * Filter out the page at PA 0x00000000.  The VM can't handle it, as
+	 * pmap_extract() == 0 means failure.
+	 */
+	if (pa == 0) {
+		if (sz <= PAGE_SIZE)
+			return;
+		pa  = PAGE_SIZE;
+		sz -= PAGE_SIZE;
+	} else if (pa > MAX_PHYS_ADDR) {
+		/* This range is past usable memory, ignore it */
+		return;
+	}
+
+	/*
+	 * Also filter out the page at the end of the physical address space --
+	 * if addr is non-zero and addr+size is zero we wrapped to the next byte
+	 * beyond what vm_paddr_t can express.  That leads to a NULL pointer
+	 * deref early in startup; work around it by leaving the last page out.
+	 *
+	 * XXX This just in:  subtract out a whole megabyte, not just 1 page.
+	 * Reducing the size by anything less than 1MB results in the NULL
+	 * pointer deref in _vm_map_lock_read().  Better to give up a megabyte
+	 * than leave some folks with an unusable system while we investigate.
+	 */
+	if ((pa + sz) > (MAX_PHYS_ADDR - 1024 * 1024)) {
+		sz = MAX_PHYS_ADDR - pa + 1;
+		if (sz <= 1024 * 1024)
+			return;
+		sz -= 1024 * 1024;
+	}
+
+	/*
+	 * Round the starting address up to a page boundary, and truncate the
+	 * ending page down to a page boundary.
+	 */
+	adj = round_page(pa) - pa;
+	pa  = round_page(pa);
+	sz  = trunc_page(sz - adj);
+
+	if (sz > 0 && hwcnt < nitems(hwregions))
+		hwcnt = insert_region(hwregions, hwcnt, pa, sz, 0);
+}
+
+/*
+ * Add an exclusion region.
+ */
+void
+physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t exflags)
+{
+	vm_offset_t adj;
+
+	/*
+	 * Truncate the starting address down to a page boundary, and round the
+	 * ending page up to a page boundary.
+	 */
+	adj = pa - trunc_page(pa);
+	pa  = trunc_page(pa);
+	sz  = round_page(sz + adj);
+
+	if (excnt >= nitems(exregions))
+		panic("failed to exclude region %#jx-%#jx", (uintmax_t)pa,
+		    (uintmax_t)(pa + sz));
+	excnt = insert_region(exregions, excnt, pa, sz, exflags);
+}
+
+size_t
+physmem_avail(vm_paddr_t *avail, size_t maxavail)
+{
+
+	return (regions_to_avail(avail, EXFLAG_NOALLOC, maxavail, NULL, NULL));
+}
+
+/*
+ * Process all the regions added earlier into the global avail lists.
+ *
+ * Updates the kernel global 'physmem' with the number of physical pages
+ * available for use (all pages not in any exclusion region).
+ *
+ * Updates the kernel global 'Maxmem' with the page number one greater then the
+ * last page of physical memory in the system.
+ */
+void
+physmem_init_kernel_globals(void)
+{
+	size_t nextidx;
+
+	regions_to_avail(dump_avail, EXFLAG_NODUMP, PHYS_AVAIL_ENTRIES, NULL,
+	    NULL);
+	nextidx = regions_to_avail(phys_avail, EXFLAG_NOALLOC,
+	    PHYS_AVAIL_ENTRIES, &physmem, &realmem);
+	if (nextidx == 0)
+		panic("No memory entries in phys_avail");
+	Maxmem = atop(phys_avail[nextidx - 1]);
+}
+
+#ifdef DDB
+#include <ddb/ddb.h>
+
+DB_SHOW_COMMAND(physmem, db_show_physmem)
+{
+
+	physmem_dump_tables(db_printf);
+}
+
+#endif /* DDB */

Property changes on: head/sys/kern/subr_physmem.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/sys/physmem.h
===================================================================
--- head/sys/sys/physmem.h	(nonexistent)
+++ head/sys/sys/physmem.h	(revision 360082)
@@ -0,0 +1,88 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_SYS_PHYSMEM_H_
+#define	_SYS_PHYSMEM_H_
+
+/*
+ * Routines to help configure physical ram.
+ *
+ * Multiple regions of contiguous physical ram can be added (in any order).
+ *
+ * Multiple regions of physical ram that should be excluded from crash dumps, or
+ * memory allocation, or both, can be added (in any order).
+ *
+ * After all early kernel init is done and it's time to configure all
+ * remainining non-excluded physical ram for use by other parts of the kernel,
+ * physmem_init_kernel_globals() processes the hardware regions and
+ * exclusion regions to generate the global dump_avail and phys_avail arrays
+ * that communicate physical ram configuration to other parts of the kernel.
+ */
+
+#define	EXFLAG_NODUMP	0x01
+#define	EXFLAG_NOALLOC	0x02
+
+void physmem_hardware_region(uint64_t pa, uint64_t sz);
+void physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t flags);
+size_t physmem_avail(vm_paddr_t *avail, size_t maxavail);
+void physmem_init_kernel_globals(void);
+void physmem_print_tables(void);
+
+/*
+ * Convenience routines for FDT.
+ */
+
+#ifdef FDT
+
+#include <machine/ofw_machdep.h>
+
+static inline void
+physmem_hardware_regions(struct mem_region * mrptr, int mrcount)
+{
+	while (mrcount--) {
+		physmem_hardware_region(mrptr->mr_start, mrptr->mr_size);
+		++mrptr;
+	}
+}
+
+static inline void
+physmem_exclude_regions(struct mem_region * mrptr, int mrcount,
+    uint32_t exflags)
+{
+	while (mrcount--) {
+		physmem_exclude_region(mrptr->mr_start, mrptr->mr_size,
+		    exflags);
+		++mrptr;
+	}
+}
+
+#endif /* FDT */
+
+#endif /* !_SYS_PHYSMEM_H_ */

Property changes on: head/sys/sys/physmem.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property