diff --git a/sys/arm/at91/at91_machdep.c b/sys/arm/at91/at91_machdep.c
index 3df922ef3365..27e88447d441 100644
--- a/sys/arm/at91/at91_machdep.c
+++ b/sys/arm/at91/at91_machdep.c
@@ -1,685 +1,684 @@
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/board.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 #include <vm/vm_map.h>
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/at91/at91board.h>
 #include <arm/at91/at91var.h>
 #include <arm/at91/at91soc.h>
 #include <arm/at91/at91_usartreg.h>
 #include <arm/at91/at91rm92reg.h>
 #include <arm/at91/at91sam9g20reg.h>
 #include <arm/at91/at91sam9g45reg.h>
 
 #ifndef MAXCPU
 #define MAXCPU 1
 #endif
 
 /* Page table for mapping proc0 zero page */
 #define KERNEL_PT_SYS		0
 #define KERNEL_PT_KERN		1
 #define KERNEL_PT_KERN_NUM	22
 /* L2 table for mapping after kernel */
 #define KERNEL_PT_AFKERNEL	KERNEL_PT_KERN + KERNEL_PT_KERN_NUM
 #define	KERNEL_PT_AFKERNEL_NUM	5
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 extern u_int data_abort_handler_address;
 extern u_int prefetch_abort_handler_address;
 extern u_int undefined_handler_address;
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 vm_paddr_t phys_avail[10];
 vm_paddr_t dump_avail[4];
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 
 /* Static device mappings. */
 const struct pmap_devmap at91_devmap[] = {
 	/*
 	 * Map the on-board devices VA == PA so that we can access them
 	 * with the MMU on or off.
 	 */
 	{
 		/*
 		 * This at least maps the interrupt controller, the UART
 		 * and the timer. Other devices should use newbus to
 		 * map their memory anyway.
 		 */
 		0xdff00000,
 		0xfff00000,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	/*
 	 * We can't just map the OHCI registers VA == PA, because
 	 * AT91xx_xxx_BASE belongs to the userland address space.
 	 * We could just choose a different virtual address, but a better
 	 * solution would probably be to just use pmap_mapdev() to allocate
 	 * KVA, as we don't need the OHCI controller before the vm
 	 * initialization is done. However, the AT91 resource allocation
 	 * system doesn't know how to use pmap_mapdev() yet.
 	 * Care must be taken to ensure PA and VM address do not overlap
 	 * between entries.
 	 */
 	{
 		/*
 		 * Add the ohci controller, and anything else that might be
 		 * on this chip select for a VA/PA mapping.
 		 */
 		/* Internal Memory 1MB  */
 		AT91RM92_OHCI_BASE,
 		AT91RM92_OHCI_PA_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	{
 		/* CompactFlash controller. Portion of EBI CS4 1MB */
 		AT91RM92_CF_BASE,
 		AT91RM92_CF_PA_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	/*
 	 * The next two should be good for the 9260, 9261 and 9G20 since
 	 * addresses mapping is the same.
 	 */
 	{
 		/* Internal Memory 1MB  */
 		AT91SAM9G20_OHCI_BASE,
 		AT91SAM9G20_OHCI_PA_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	{
 		/* EBI CS3 256MB */
 		AT91SAM9G20_NAND_BASE,
 		AT91SAM9G20_NAND_PA_BASE,
 		AT91SAM9G20_NAND_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	/*
 	 * The next should be good for the 9G45.
 	 */
 	{
 		/* Internal Memory 1MB  */
 		AT91SAM9G45_OHCI_BASE,
 		AT91SAM9G45_OHCI_PA_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	{ 0, 0, 0, 0, 0, }
 };
 
 #ifdef LINUX_BOOT_ABI
 extern int membanks;
 extern int memstart[];
 extern int memsize[];
 #endif
 
 long
 at91_ramsize(void)
 {
 	uint32_t cr, mdr, mr, *SDRAMC;
 	int banks, rows, cols, bw;
 #ifdef LINUX_BOOT_ABI
 	/*
 	 * If we found any ATAGs that were for memory, return the first bank.
 	 */
 	if (membanks > 0)
 		return (memsize[0]);
 #endif
 
 	if (at91_is_rm92()) {
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91RM92_SDRAMC_BASE);
 		cr = SDRAMC[AT91RM92_SDRAMC_CR / 4];
 		mr = SDRAMC[AT91RM92_SDRAMC_MR / 4];
 		banks = (cr & AT91RM92_SDRAMC_CR_NB_4) ? 2 : 1;
 		rows = ((cr & AT91RM92_SDRAMC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91RM92_SDRAMC_CR_NC_MASK) + 8;
 		bw = (mr & AT91RM92_SDRAMC_MR_DBW_16) ? 1 : 2;
 	} else if (at91_cpu_is(AT91_T_SAM9G45)) {
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G45_DDRSDRC0_BASE);
 		cr = SDRAMC[AT91SAM9G45_DDRSDRC_CR / 4];
 		mdr = SDRAMC[AT91SAM9G45_DDRSDRC_MDR / 4];
 		banks = 0;
 		rows = ((cr & AT91SAM9G45_DDRSDRC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91SAM9G45_DDRSDRC_CR_NC_MASK) + 8;
 		bw = (mdr & AT91SAM9G45_DDRSDRC_MDR_DBW_16) ? 1 : 2;
 
 		/* Fix the calculation for DDR memory */
 		mdr &= AT91SAM9G45_DDRSDRC_MDR_MASK;
 		if (mdr & AT91SAM9G45_DDRSDRC_MDR_LPDDR1 ||
 		    mdr & AT91SAM9G45_DDRSDRC_MDR_DDR2) {
 			/* The cols value is 1 higher for DDR */
 			cols += 1;
 			/* DDR has 4 internal banks. */
 			banks = 2;
 		}
 	} else {
 		/*
 		 * This should be good for the 9260, 9261, 9G20, 9G35 and 9X25
 		 * as addresses and registers are the same.
 		 */
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G20_SDRAMC_BASE);
 		cr = SDRAMC[AT91SAM9G20_SDRAMC_CR / 4];
 		mr = SDRAMC[AT91SAM9G20_SDRAMC_MR / 4];
 		banks = (cr & AT91SAM9G20_SDRAMC_CR_NB_4) ? 2 : 1;
 		rows = ((cr & AT91SAM9G20_SDRAMC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91SAM9G20_SDRAMC_CR_NC_MASK) + 8;
 		bw = (cr & AT91SAM9G20_SDRAMC_CR_DBW_16) ? 1 : 2;
 	}
 
 	return (1 << (cols + rows + banks + bw));
 }
 
 static const char *soc_type_name[] = {
 	[AT91_T_CAP9] = "at91cap9",
 	[AT91_T_RM9200] = "at91rm9200",
 	[AT91_T_SAM9260] = "at91sam9260",
 	[AT91_T_SAM9261] = "at91sam9261",
 	[AT91_T_SAM9263] = "at91sam9263",
 	[AT91_T_SAM9G10] = "at91sam9g10",
 	[AT91_T_SAM9G20] = "at91sam9g20",
 	[AT91_T_SAM9G45] = "at91sam9g45",
 	[AT91_T_SAM9N12] = "at91sam9n12",
 	[AT91_T_SAM9RL] = "at91sam9rl",
 	[AT91_T_SAM9X5] = "at91sam9x5",
 	[AT91_T_NONE] = "UNKNOWN"
 };
 
 static const char *soc_subtype_name[] = {
 	[AT91_ST_NONE] = "UNKNOWN",
 	[AT91_ST_RM9200_BGA] = "at91rm9200_bga",
 	[AT91_ST_RM9200_PQFP] = "at91rm9200_pqfp",
 	[AT91_ST_SAM9XE] = "at91sam9xe",
 	[AT91_ST_SAM9G45] = "at91sam9g45",
 	[AT91_ST_SAM9M10] = "at91sam9m10",
 	[AT91_ST_SAM9G46] = "at91sam9g46",
 	[AT91_ST_SAM9M11] = "at91sam9m11",
 	[AT91_ST_SAM9G15] = "at91sam9g15",
 	[AT91_ST_SAM9G25] = "at91sam9g25",
 	[AT91_ST_SAM9G35] = "at91sam9g35",
 	[AT91_ST_SAM9X25] = "at91sam9x25",
 	[AT91_ST_SAM9X35] = "at91sam9x35",
 };
 
 struct at91_soc_info soc_info;
 
 /*
  * Read the SoC ID from the CIDR register and try to match it against the
  * values we know.  If we find a good one, we return true.  If not, we
  * return false.  When we find a good one, we also find the subtype
  * and CPU family.
  */
 static int
 at91_try_id(uint32_t dbgu_base)
 {
 	uint32_t socid;
 
 	soc_info.cidr = *(volatile uint32_t *)(AT91_BASE + dbgu_base +
 	    DBGU_C1R);
 	socid = soc_info.cidr & ~AT91_CPU_VERSION_MASK;
 
 	soc_info.type = AT91_T_NONE;
 	soc_info.subtype = AT91_ST_NONE;
 	soc_info.family = (soc_info.cidr & AT91_CPU_FAMILY_MASK) >> 20;
 	soc_info.exid = *(volatile uint32_t *)(AT91_BASE + dbgu_base +
 	    DBGU_C2R);
 
 	switch (socid) {
 	case AT91_CPU_CAP9:
 		soc_info.type = AT91_T_CAP9;
 		break;
 	case AT91_CPU_RM9200:
 		soc_info.type = AT91_T_RM9200;
 		break;
 	case AT91_CPU_SAM9XE128:
 	case AT91_CPU_SAM9XE256:
 	case AT91_CPU_SAM9XE512:
 	case AT91_CPU_SAM9260:
 		soc_info.type = AT91_T_SAM9260;
 		if (soc_info.family == AT91_FAMILY_SAM9XE)
 			soc_info.subtype = AT91_ST_SAM9XE;
 		break;
 	case AT91_CPU_SAM9261:
 		soc_info.type = AT91_T_SAM9261;
 		break;
 	case AT91_CPU_SAM9263:
 		soc_info.type = AT91_T_SAM9263;
 		break;
 	case AT91_CPU_SAM9G10:
 		soc_info.type = AT91_T_SAM9G10;
 		break;
 	case AT91_CPU_SAM9G20:
 		soc_info.type = AT91_T_SAM9G20;
 		break;
 	case AT91_CPU_SAM9G45:
 		soc_info.type = AT91_T_SAM9G45;
 		break;
 	case AT91_CPU_SAM9N12:
 		soc_info.type = AT91_T_SAM9N12;
 		break;
 	case AT91_CPU_SAM9RL64:
 		soc_info.type = AT91_T_SAM9RL;
 		break;
 	case AT91_CPU_SAM9X5:
 		soc_info.type = AT91_T_SAM9X5;
 		break;
 	default:
 		return (0);
 	}
 
 	switch (soc_info.type) {
 	case AT91_T_SAM9G45:
 		switch (soc_info.exid) {
 		case AT91_EXID_SAM9G45:
 			soc_info.subtype = AT91_ST_SAM9G45;
 			break;
 		case AT91_EXID_SAM9G46:
 			soc_info.subtype = AT91_ST_SAM9G46;
 			break;
 		case AT91_EXID_SAM9M10:
 			soc_info.subtype = AT91_ST_SAM9M10;
 			break;
 		case AT91_EXID_SAM9M11:
 			soc_info.subtype = AT91_ST_SAM9M11;
 			break;
 		}
 		break;
 	case AT91_T_SAM9X5:
 		switch (soc_info.exid) {
 		case AT91_EXID_SAM9G15:
 			soc_info.subtype = AT91_ST_SAM9G15;
 			break;
 		case AT91_EXID_SAM9G25:
 			soc_info.subtype = AT91_ST_SAM9G25;
 			break;
 		case AT91_EXID_SAM9G35:
 			soc_info.subtype = AT91_ST_SAM9G35;
 			break;
 		case AT91_EXID_SAM9X25:
 			soc_info.subtype = AT91_ST_SAM9X25;
 			break;
 		case AT91_EXID_SAM9X35:
 			soc_info.subtype = AT91_ST_SAM9X35;
 			break;
 		}
 		break;
 	default:
 		break;
 	}
 	/*
 	 * Disable interrupts in the DBGU unit...
 	 */
 	*(volatile uint32_t *)(AT91_BASE + dbgu_base + USART_IDR) = 0xffffffff;
 
 	/*
 	 * Save the name for later...
 	 */
 	snprintf(soc_info.name, sizeof(soc_info.name), "%s%s%s",
 	    soc_type_name[soc_info.type],
 	    soc_info.subtype == AT91_ST_NONE ? "" : " subtype ",
 	    soc_info.subtype == AT91_ST_NONE ? "" :
 	    soc_subtype_name[soc_info.subtype]);
 
         /*
          * try to get the matching CPU support.
          */
         soc_info.soc_data = at91_match_soc(soc_info.type, soc_info.subtype);
         soc_info.dbgu_base = AT91_BASE + dbgu_base;
 
 	return (1);
 }
 
 static void
 at91_soc_id(void)
 {
 
 	if (!at91_try_id(AT91_DBGU0))
 		at91_try_id(AT91_DBGU1);
 }
 
 #ifdef ARM_MANY_BOARD
 /* likely belongs in arm/arm/machdep.c, but since board_init is still at91 only... */
 SET_DECLARE(arm_board_set, const struct arm_board);
 
 /* Not yet fully functional, but enough to build ATMEL config */
 static long
 board_init(void)
 {
 	return -1;
 }
 #endif
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t afterkern;
 	uint32_t memsize;
 	vm_offset_t lastaddr;
 
 	lastaddr = parse_boot_param(abp);
 	set_cpufuncs();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)						\
 	alloc_pages((var).pv_va, (np));					\
 	(var).pv_pa = (var).pv_va + (KERNPHYSADDR - KERNVIRTADDR);
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (i = 0; i < NUM_KERNEL_PTS; ++i) {
 		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[i],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[i].pv_va = freemempos -
 			    (i % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[i].pv_pa =
 			    kernel_pt_table[i].pv_va - KERNVIRTADDR +
 			    KERNPHYSADDR;
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to 0x00000000
 	 * or 0xffff0000. This page will just contain the system vectors
 	 * and can be shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
 	valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	for (i = 0; i < KERNEL_PT_KERN_NUM; i++)
 		pmap_link_l2pt(l1pagetable, KERNBASE + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_KERN + i]);
 	pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR,
 	   (((uint32_t)lastaddr - KERNBASE) + PAGE_SIZE) & ~(PAGE_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	afterkern = round_page((lastaddr + L1_S_SIZE) & ~(L1_S_SIZE - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map the DPCPU pages */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, DPCPU_SIZE,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map the stack pages */
 	pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
 	    IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
 	    ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
 	    UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
 	    KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa,
 	    msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	for (i = 0; i < NUM_KERNEL_PTS; ++i) {
 		pmap_map_chunk(l1pagetable, kernel_pt_table[i].pv_va,
 		    kernel_pt_table[i].pv_pa, L2_TABLE_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	}
 
 	pmap_devmap_bootstrap(l1pagetable, at91_devmap);
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2));
 
 	at91_soc_id();
 
 	/* Initialize all the clocks, so that the console can work */
 	at91_pmc_init_clock();
 
 	cninit();
 
 	if (soc_info.soc_data == NULL)
 		printf("Warning: No soc support for %s found.\n", soc_info.name);
 
 	memsize = board_init();
 	physmem = memsize / PAGE_SIZE;
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	/* Set stack for exception handlers */
 
 	data_abort_handler_address = (u_int)data_abort_handler;
 	prefetch_abort_handler_address = (u_int)prefetch_abort_handler;
 	undefined_handler_address = (u_int)undefinedinstruction_bounce;
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + L1_S_SIZE * (KERNEL_PT_KERN_NUM - 1);
 	arm_dump_avail_init(memsize, sizeof(dump_avail)/sizeof(dump_avail[0]));
 	vm_max_kernel_address = KERNVIRTADDR + 3 * memsize;
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	i = 0;
 #if PHYSADDR != KERNPHYSADDR
 	phys_avail[i++] = PHYSADDR;
 	phys_avail[i++] = KERNPHYSADDR;
 #endif
 	phys_avail[i++] = virtual_avail - KERNVIRTADDR + KERNPHYSADDR;
 	phys_avail[i++] = PHYSADDR + memsize;
 	phys_avail[i++] = 0;
 	phys_avail[i++] = 0;
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 
 /*
  * These functions are handled elsewhere, so make them nops here.
  */
 void
 cpu_startprofclock(void)
 {
 
 }
 
 void
 cpu_stopprofclock(void)
 {
 
 }
 
 void
 cpu_initclocks(void)
 {
 
 }
 
 void
 DELAY(int n)
 {
 
 	if (soc_info.soc_data)
 		soc_info.soc_data->soc_delay(n);
 }
 
 void
 cpu_reset(void)
 {
 
 	if (soc_info.soc_data)
 		soc_info.soc_data->soc_reset();
 	while (1)
 		continue;
 }
diff --git a/sys/arm/sa11x0/assabet_machdep.c b/sys/arm/sa11x0/assabet_machdep.c
index 9266939bbdf2..986395340d39 100644
--- a/sys/arm/sa11x0/assabet_machdep.c
+++ b/sys/arm/sa11x0/assabet_machdep.c
@@ -1,391 +1,390 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_md.h"
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 #include <vm/vm_map.h>
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/sa11x0/sa11x0_reg.h>
 
 #define MDROOT_ADDR 0xd0400000
 
 #define KERNEL_PT_VMEM		0	/* Page table for mapping video memory */
 #define KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define KERNEL_PT_IO		3	/* Page table for mapping IO */
 #define KERNEL_PT_IRQ		2	/* Page table for mapping irq handler */
 #define KERNEL_PT_KERNEL	1	/* Page table for mapping kernel */
 #define KERNEL_PT_L1		4	/* Page table for mapping l1pt */
 #define	KERNEL_PT_VMDATA	5	/* Page tables for mapping kernel VM */
 #define	KERNEL_PT_VMDATA_NUM	7	/* start with 16MB of KVM */
 #define	NUM_KERNEL_PTS		(KERNEL_PT_VMDATA + KERNEL_PT_VMDATA_NUM)
 
 #define	KERNEL_VM_BASE		(KERNBASE + 0x00100000)
 #define	KERNEL_VM_SIZE		0x05000000
 
 extern u_int data_abort_handler_address;
 extern u_int prefetch_abort_handler_address;
 extern u_int undefined_handler_address;
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 extern vm_offset_t sa1110_uart_vaddr;
 
 extern vm_offset_t sa1_cache_clean_addr;
 
 #ifndef MD_ROOT_SIZE
 #define MD_ROOT_SIZE 65535
 #endif
 /* Physical and virtual addresses for some global pages */
 
 vm_paddr_t phys_avail[10];
 vm_paddr_t dump_avail[4];
 vm_paddr_t physical_start;
 vm_paddr_t physical_end;
 vm_paddr_t physical_freestart;
 
 struct pv_addr systempage;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 
 /* Static device mappings. */
 static const struct pmap_devmap assabet_devmap[] = {
 	/*
 	 * Map the on-board devices VA == PA so that we can access them
 	 * with the MMU on or off.
 	 */
 	{
 		SACOM1_VBASE,
 		SACOM1_BASE,
 		SACOM1_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	{
 		SAIPIC_BASE,
 		SAIPIC_BASE,
 		SAIPIC_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_NOCACHE,
 	},
 	{
 		0,
 		0,
 		0,
 		0,
 		0,
 	}
 };
 
 struct arm32_dma_range *
 bus_dma_get_range(void)
 {
 
 	return (NULL);
 }
 
 int
 bus_dma_get_range_nb(void)
 {
 	return (0);
 }
 
 void
 cpu_reset()
 {
 	cpu_halt();
 	while (1);
 }
 
 #define CPU_SA110_CACHE_CLEAN_SIZE (0x4000 * 2)
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr	md_addr;
 	struct pv_addr	md_bla;
 	struct pv_addr  dpcpu;
 	int loop;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t lastalloced;
 	vm_offset_t lastaddr;
 	uint32_t memsize = 32 * 1024 * 1024;
 	sa1110_uart_vaddr = SACOM1_VBASE;
 
 	boothowto = RB_VERBOSE | RB_SINGLE;     /* Default value */
 	lastaddr = parse_boot_param(abp);
 	cninit();
 	set_cpufuncs();
 	physmem = memsize / PAGE_SIZE;
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 		
 	physical_start = (vm_offset_t) KERNBASE;
 	physical_end =  lastaddr;
 	physical_freestart = (((vm_offset_t)physical_end) + PAGE_MASK) & ~PAGE_MASK;
 	md_addr.pv_va = md_addr.pv_pa = MDROOT_ADDR;
 	freemempos = (vm_offset_t)round_page(physical_freestart);
 	memset((void *)freemempos, 0, 256*1024);
 		/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)			\
 	alloc_pages((var).pv_pa, (np));		\
 	(var).pv_va = (var).pv_pa;
 
 #define alloc_pages(var, np)			\
 	(var) = freemempos;		\
 	freemempos += ((np) * PAGE_SIZE);\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while ((freemempos & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	valloc_pages(md_bla, L2_TABLE_SIZE / PAGE_SIZE);
 	alloc_pages(sa1_cache_clean_addr, CPU_SA110_CACHE_CLEAN_SIZE / PAGE_SIZE);
 
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa;
 		}
 	}
 
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	lastalloced = kernelstack.pv_va;
 
 	/*
 	 * Allocate memory for the l1 and l2 page tables. The scheme to avoid
 	 * wasting memory by allocating the l1pt on the first 16k memory was
 	 * taken from NetBSD rpc_machdep.c. NKPT should be greater than 12 for
 	 * this to work (which is supposed to be the case).
 	 */
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_pa;
 
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, 0x00000000,
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	pmap_link_l2pt(l1pagetable, KERNBASE,
 	    &kernel_pt_table[KERNEL_PT_KERNEL]);
 	pmap_link_l2pt(l1pagetable, 0xd0000000,
 	    &kernel_pt_table[KERNEL_PT_IO]);
 	pmap_link_l2pt(l1pagetable, lastalloced & ~((L1_S_SIZE * 4) - 1),
 	    &kernel_pt_table[KERNEL_PT_L1]);
 	pmap_link_l2pt(l1pagetable, 0x90000000, &kernel_pt_table[KERNEL_PT_IRQ]);
 	pmap_link_l2pt(l1pagetable, MDROOT_ADDR,
 	    &md_bla);
 	for (loop = 0; loop < KERNEL_PT_VMDATA_NUM; ++loop)
 		pmap_link_l2pt(l1pagetable, KERNEL_VM_BASE + loop * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_VMDATA + loop]);
 	pmap_map_chunk(l1pagetable, KERNBASE, KERNBASE,
 	    ((uint32_t)lastaddr - KERNBASE), VM_PROT_READ|VM_PROT_WRITE,
 	    PTE_CACHE);
 	/* Map the DPCPU pages */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, DPCPU_SIZE,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	/* Map the stack pages */
 	pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
 	    IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, md_addr.pv_va, md_addr.pv_pa,
 	    MD_ROOT_SIZE * 1024, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
 	    ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
 	    UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
 	    KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va,
 		    kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	}
 	pmap_map_chunk(l1pagetable, md_bla.pv_va, md_bla.pv_pa, L2_TABLE_SIZE,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, vector_page, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	/* Map the statically mapped devices. */
 	pmap_devmap_bootstrap(l1pagetable, assabet_devmap);
 	pmap_map_chunk(l1pagetable, sa1_cache_clean_addr, 0xf0000000,
 	    CPU_SA110_CACHE_CLEAN_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	data_abort_handler_address = (u_int)data_abort_handler;
 	prefetch_abort_handler_address = (u_int)prefetch_abort_handler;
 	undefined_handler_address = (u_int)undefinedinstruction_bounce;
 	undefined_init();
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	bootverbose = 1;
 
 	/* Set stack for exception handlers */
 	
 	init_proc0(kernelstack.pv_va);
 	
 	
 	/* Enable MMU, I-cache, D-cache, write buffer. */
 
 	cpufunc_control(0x337f, 0x107d);
 	arm_vector_init(ARM_VECTORS_LOW, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = freemempos + KERNEL_PT_VMDATA_NUM * 0x400000;
 
 	dump_avail[0] = phys_avail[0] = round_page(virtual_avail);
 	dump_avail[1] = phys_avail[1] = 0xc0000000 + 0x02000000 - 1;
 	dump_avail[2] = phys_avail[2] = 0;
 	dump_avail[3] = phys_avail[3] = 0;
 					
 	mutex_init();
 	vm_max_kernel_address = 0xd0000000;
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index e5765687145d..06ad6346adbe 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -1,812 +1,810 @@
 /*
  * Copyright (c) 2007-2009 Google Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
-#include <vm/vm_pager.h>
-#include <vm/vnode_pager.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_node.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_io.h"
 
 #define FUSE_DEBUG_MODULE IO
 #include "fuse_debug.h"
 
 
 static int 
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_read_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh);
 static int 
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag);
 
 int
 fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
     struct ucred *cred)
 {
 	struct fuse_filehandle *fufh;
 	int err, directio;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	err = fuse_filehandle_getrw(vp,
 	    (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh);
 	if (err) {
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	switch (uio->uio_rw) {
 	case UIO_READ:
 		if (directio) {
 			FS_DEBUG("direct read of vnode %ju via file handle %ju\n",
 			    (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id);
 			err = fuse_read_directbackend(vp, uio, cred, fufh);
 		} else {
 			FS_DEBUG("buffered read of vnode %ju\n", 
 			      (uintmax_t)VTOILLU(vp));
 			err = fuse_read_biobackend(vp, uio, cred, fufh);
 		}
 		break;
 	case UIO_WRITE:
 		if (directio) {
 			FS_DEBUG("direct write of vnode %ju via file handle %ju\n",
 			    (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id);
 			err = fuse_write_directbackend(vp, uio, cred, fufh);
 		} else {
 			FS_DEBUG("buffered write of vnode %ju\n", 
 			      (uintmax_t)VTOILLU(vp));
 			err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag);
 		}
 		break;
 	default:
 		panic("uninterpreted mode passed to fuse_io_dispatch");
 	}
 
 	return (err);
 }
 
 static int
 fuse_read_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct buf *bp;
 	daddr_t lbn;
 	int bcount;
 	int err = 0, n = 0, on = 0;
 	off_t filesize;
 
 	const int biosize = fuse_iosize(vp);
 
 	FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n",
 	    uio->uio_resid, uio->uio_offset, VTOFUD(vp)->filesize);
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	bcount = MIN(MAXBSIZE, biosize);
 	filesize = VTOFUD(vp)->filesize;
 
 	do {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		FS_DEBUG2G("biosize %d, lbn %d, on %d\n", biosize, (int)lbn, on);
 
 		/*
 	         * Obtain the buffer cache block.  Figure out the buffer size
 	         * when we are at EOF.  If we are modifying the size of the
 	         * buffer based on an EOF condition we need to hold
 	         * nfs_rslock() through obtaining the buffer to prevent
 	         * a potential writer-appender from messing with n_size.
 	         * Otherwise we may accidently truncate the buffer and
 	         * lose dirty data.
 	         *
 	         * Note that bcount is *not* DEV_BSIZE aligned.
 	         */
 		if ((off_t)lbn * biosize >= filesize) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > filesize) {
 			bcount = filesize - (off_t)lbn *biosize;
 		}
 		bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 
 		if (!bp)
 			return (EINTR);
 
 		/*
 	         * If B_CACHE is not set, we must issue the read.  If this
 	         * fails, we return an error.
 	         */
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			err = fuse_io_strategy(vp, bp);
 			if (err) {
 				brelse(bp);
 				return (err);
 			}
 		}
 		/*
 	         * on is the offset into the current bp.  Figure out how many
 	         * bytes we can copy out of the bp.  Note that bcount is
 	         * NOT DEV_BSIZE aligned.
 	         *
 	         * Then figure out how many bytes we can copy into the uio.
 	         */
 
 		n = 0;
 		if (on < bcount)
 			n = MIN((unsigned)(bcount - on), uio->uio_resid);
 		if (n > 0) {
 			FS_DEBUG2G("feeding buffeater with %d bytes of buffer %p,"
 				" saying %d was asked for\n",
 				n, bp->b_data + on, n + (int)bp->b_resid);
 			err = uiomove(bp->b_data + on, n, uio);
 		}
 		brelse(bp);
 		FS_DEBUG2G("end of turn, err %d, uio->uio_resid %zd, n %d\n",
 		    err, uio->uio_resid, n);
 	} while (err == 0 && uio->uio_resid > 0 && n > 0);
 
 	return (err);
 }
 
 static int
 fuse_read_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_read_in *fri;
 	int err = 0;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	fdisp_init(&fdi, 0);
 
 	/*
          * XXX In "normal" case we use an intermediate kernel buffer for
          * transmitting data from daemon's context to ours. Eventually, we should
          * get rid of this. Anyway, if the target uio lives in sysspace (we are
          * called from pageops), and the input data doesn't need kernel-side
          * processing (we are not called from readdir) we can already invoke
          * an optimized, "peer-to-peer" I/O routine.
          */
 	while (uio->uio_resid > 0) {
 		fdi.iosize = sizeof(*fri);
 		fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred);
 		fri = fdi.indata;
 		fri->fh = fufh->fh_id;
 		fri->offset = uio->uio_offset;
 		fri->size = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_read);
 
 		FS_DEBUG2G("fri->fh %ju, fri->offset %ju, fri->size %ju\n",
 			(uintmax_t)fri->fh, (uintmax_t)fri->offset, 
 			(uintmax_t)fri->size);
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			goto out;
 
 		FS_DEBUG2G("complete: got iosize=%d, requested fri.size=%zd; "
 			"resid=%zd offset=%ju\n",
 			fri->size, fdi.iosize, uio->uio_resid, 
 			(uintmax_t)uio->uio_offset);
 
 		if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
 			break;
 		if (fdi.iosize < fri->size)
 			break;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 static int
 fuse_write_directbackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_write_in *fwi;
 	struct fuse_dispatcher fdi;
 	size_t chunksize;
 	int diff;
 	int err = 0;
 
 	if (!uio->uio_resid)
 		return (0);
 
 	fdisp_init(&fdi, 0);
 
 	while (uio->uio_resid > 0) {
 		chunksize = MIN(uio->uio_resid,
 		    fuse_get_mpdata(vp->v_mount)->max_write);
 
 		fdi.iosize = sizeof(*fwi) + chunksize;
 		fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred);
 
 		fwi = fdi.indata;
 		fwi->fh = fufh->fh_id;
 		fwi->offset = uio->uio_offset;
 		fwi->size = chunksize;
 
 		if ((err = uiomove((char *)fdi.indata + sizeof(*fwi),
 		    chunksize, uio)))
 			break;
 
 		if ((err = fdisp_wait_answ(&fdi)))
 			break;
 
 		diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size;
 		if (diff < 0) {
 			err = EINVAL;
 			break;
 		}
 		uio->uio_resid += diff;
 		uio->uio_offset -= diff;
 		if (uio->uio_offset > fvdat->filesize)
 			fuse_vnode_setsize(vp, cred, uio->uio_offset);
 	}
 
 	fdisp_destroy(&fdi);
 
 	return (err);
 }
 
 static int
 fuse_write_biobackend(struct vnode *vp, struct uio *uio,
     struct ucred *cred, struct fuse_filehandle *fufh, int ioflag)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct buf *bp;
 	daddr_t lbn;
 	int bcount;
 	int n, on, err = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode"));
 	FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n",
 	    uio->uio_resid, uio->uio_offset, fvdat->filesize);
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio_setoffset(uio, fvdat->filesize);
 
 	/*
          * Find all of this file's B_NEEDCOMMIT buffers.  If our writes
          * would exceed the local maximum per-file write commit size when
          * combined with those, we must decide whether to flush,
          * go synchronous, or return err.  We don't bother checking
          * IO_UNIT -- we just make all writes atomic anyway, as there's
          * no point optimizing for something that really won't ever happen.
          */
 	do {
 		if (fuse_isdeadfs(vp)) {
 			err = ENXIO;
 			break;
 		}
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		n = MIN((unsigned)(biosize - on), uio->uio_resid);
 
 		FS_DEBUG2G("lbn %ju, on %d, n %d, uio offset %ju, uio resid %zd\n",
 			(uintmax_t)lbn, on, n, 
 			(uintmax_t)uio->uio_offset, uio->uio_resid);
 
 again:
 		/*
 	         * Handle direct append and file extension cases, calculate
 	         * unaligned buffer size.
 	         */
 		if (uio->uio_offset == fvdat->filesize && n) {
 			/*
 	                 * Get the buffer (in its pre-append state to maintain
 	                 * B_CACHE if it was previously set).  Resize the
 	                 * nfsnode after we have locked the buffer to prevent
 	                 * readers from reading garbage.
 	                 */
 			bcount = on;
 			FS_DEBUG("getting block from OS, bcount %d\n", bcount);
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 
 			if (bp != NULL) {
 				long save;
 
 				err = fuse_vnode_setsize(vp, cred, 
 							 uio->uio_offset + n);
 				if (err) {
 					brelse(bp);
 					break;
 				}
 				save = bp->b_flags & B_CACHE;
 				bcount += n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
 			}
 		} else {
 			/*
 	                 * Obtain the locked cache block first, and then
 	                 * adjust the file's size as appropriate.
 	                 */
 			bcount = on + n;
 			if ((off_t)lbn * biosize + bcount < fvdat->filesize) {
 				if ((off_t)(lbn + 1) * biosize < fvdat->filesize)
 					bcount = biosize;
 				else
 					bcount = fvdat->filesize - 
 					  (off_t)lbn *biosize;
 			}
 			FS_DEBUG("getting block from OS, bcount %d\n", bcount);
 			bp = getblk(vp, lbn, bcount, PCATCH, 0, 0);
 			if (bp && uio->uio_offset + n > fvdat->filesize) {
 				err = fuse_vnode_setsize(vp, cred, 
 							 uio->uio_offset + n);
 				if (err) {
 					brelse(bp);
 					break;
 				}
 			}
 		}
 
 		if (!bp) {
 			err = EINTR;
 			break;
 		}
 		/*
 	         * Issue a READ if B_CACHE is not set.  In special-append
 	         * mode, B_CACHE is based on the buffer prior to the write
 	         * op and is typically set, avoiding the read.  If a read
 	         * is required in special append mode, the server will
 	         * probably send us a short-read since we extended the file
 	         * on our end, resulting in b_resid == 0 and, thusly,
 	         * B_CACHE getting set.
 	         *
 	         * We can also avoid issuing the read if the write covers
 	         * the entire buffer.  We have to make sure the buffer state
 	         * is reasonable in this case since we will not be initiating
 	         * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 	         * more information.
 	         *
 	         * B_CACHE may also be set due to the buffer being cached
 	         * normally.
 	         */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			fuse_io_strategy(vp, bp);
 			if ((err = bp->b_error)) {
 				brelse(bp);
 				break;
 			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
 
 		/*
 	         * If dirtyend exceeds file size, chop it down.  This should
 	         * not normally occur but there is an append race where it
 	         * might occur XXX, so we log it.
 	         *
 	         * If the chopping creates a reverse-indexed or degenerate
 	         * situation with dirtyoff/end, we 0 both of them.
 	         */
 
 		if (bp->b_dirtyend > bcount) {
 			FS_DEBUG("FUSE append race @%lx:%d\n",
 			    (long)bp->b_blkno * biosize,
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 	         * If the new write will leave a contiguous dirty
 	         * area, just update the b_dirtyoff and b_dirtyend,
 	         * otherwise force a write rpc of the old dirty area.
 	         *
 	         * While it is possible to merge discontiguous writes due to
 	         * our having a B_CACHE buffer ( and thus valid read data
 	         * for the hole), we don't because it could lead to
 	         * significant cache coherency problems with multiple clients,
 	         * especially if locking is implemented later on.
 	         *
 	         * as an optimization we could theoretically maintain
 	         * a linked list of discontinuous areas, but we would still
 	         * have to commit them separately so there isn't much
 	         * advantage to it except perhaps a bit of asynchronization.
 	         */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			/*
 	                 * Yes, we mean it. Write out everything to "storage"
 	                 * immediatly, without hesitation. (Apart from other
 	                 * reasons: the only way to know if a write is valid
 	                 * if its actually written out.)
 	                 */
 			bwrite(bp);
 			if (bp->b_error == EINTR) {
 				err = EINTR;
 				break;
 			}
 			goto again;
 		}
 		err = uiomove((char *)bp->b_data + on, n, uio);
 
 		/*
 	         * Since this block is being modified, it must be written
 	         * again and not just committed.  Since write clustering does
 	         * not work for the stage 1 data write, only the stage 2
 	         * commit rpc, we have to clear B_CLUSTEROK as well.
 	         */
 		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 
 		if (err) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = err;
 			brelse(bp);
 			break;
 		}
 		/*
 	         * Only update dirtyoff/dirtyend if not a degenerate
 	         * condition.
 	         */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = MIN(on, bp->b_dirtyoff);
 				bp->b_dirtyend = MAX((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_valid(bp, on, n);
 		}
 		err = bwrite(bp);
 		if (err)
 			break;
 	} while (uio->uio_resid > 0 && n > 0);
 
 	if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0)
 		fuse_vnode_savesize(vp, cred);
 
 	return (err);
 }
 
 int
 fuse_io_strategy(struct vnode *vp, struct buf *bp)
 {
 	struct fuse_filehandle *fufh;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct ucred *cred;
 	struct uio *uiop;
 	struct uio uio;
 	struct iovec io;
 	int error = 0;
 
 	const int biosize = fuse_iosize(vp);
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 	MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE);
 	FS_DEBUG("inode=%ju offset=%jd resid=%ld\n",
 	    (uintmax_t)VTOI(vp), (intmax_t)(((off_t)bp->b_blkno) * biosize),
 	    bp->b_bcount);
 
 	error = fuse_filehandle_getrw(vp,
 	    (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh);
 	if (error) {
 		printf("FUSE: strategy: filehandles are closed\n");
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 		return (error);
 	}
 	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = curthread;
 
 	/*
          * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
          * do this here so we do not have to do it in all the code that
          * calls us.
          */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("fuse_io_strategy: bp %p already marked done", bp));
 	if (bp->b_iocmd == BIO_READ) {
 		io.iov_len = uiop->uio_resid = bp->b_bcount;
 		io.iov_base = bp->b_data;
 		uiop->uio_rw = UIO_READ;
 
 		uiop->uio_offset = ((off_t)bp->b_blkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
 
 		if ((!error && uiop->uio_resid) ||
 		    (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO &&
 		    uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 &&
 		    uiop->uio_offset >= fvdat->cached_attrs.va_size)) {
 			/*
 	                 * If we had a short read with no error, we must have
 	                 * hit a file hole.  We should zero-fill the remainder.
 	                 * This can also occur if the server hits the file EOF.
 	                 *
 	                 * Holes used to be able to occur due to pending
 	                 * writes, but that is not possible any longer.
 	                 */
 			int nread = bp->b_bcount - uiop->uio_resid;
 			int left = uiop->uio_resid;
 
 			if (error != 0) {
 				printf("FUSE: Fix broken io: offset %ju, "
 				       " resid %zd, file size %ju/%ju\n", 
 				       (uintmax_t)uiop->uio_offset,
 				    uiop->uio_resid, fvdat->filesize,
 				    fvdat->cached_attrs.va_size);
 				error = 0;
 			}
 			if (left > 0)
 				bzero((char *)bp->b_data + nread, left);
 			uiop->uio_resid = 0;
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = error;
 		}
 	} else {
 		/*
 	         * If we only need to commit, try to commit
 	         */
 		if (bp->b_flags & B_NEEDCOMMIT) {
 			FS_DEBUG("write: B_NEEDCOMMIT flags set\n");
 		}
 		/*
 	         * Setup for actual write
 	         */
 		if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > 
 		    fvdat->filesize)
 			bp->b_dirtyend = fvdat->filesize - 
 				(off_t)bp->b_blkno * biosize;
 
 		if (bp->b_dirtyend > bp->b_dirtyoff) {
 			io.iov_len = uiop->uio_resid = bp->b_dirtyend
 			    - bp->b_dirtyoff;
 			uiop->uio_offset = (off_t)bp->b_blkno * biosize
 			    + bp->b_dirtyoff;
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;
 
 			error = fuse_write_directbackend(vp, uiop, cred, fufh);
 
 			if (error == EINTR || error == ETIMEDOUT
 			    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
 				if ((bp->b_flags & B_PAGING) == 0) {
 					bdirty(bp);
 					bp->b_flags &= ~B_DONE;
 				}
 				if ((error == EINTR || error == ETIMEDOUT) &&
 				    (bp->b_flags & B_ASYNC) == 0)
 					bp->b_flags |= B_EINTR;
 			} else {
 				if (error) {
 					bp->b_ioflags |= BIO_ERROR;
 					bp->b_flags |= B_INVAL;
 					bp->b_error = error;
 				}
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 			}
 		} else {
 			bp->b_resid = 0;
 			bufdone(bp);
 			return (0);
 		}
 	}
 	bp->b_resid = uiop->uio_resid;
 	bufdone(bp);
 	return (error);
 }
 
 int
 fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td)
 {
 	struct vop_fsync_args a = {
 		.a_vp = vp,
 		.a_waitfor = waitfor,
 		.a_td = td,
 	};
 
 	return (vop_stdfsync(&a));
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 fuse_io_invalbuf(struct vnode *vp, struct thread *td)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int error = 0;
 
 	if (vp->v_iflag & VI_DOOMED)
 		return 0;
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf");
 
 	while (fvdat->flag & FN_FLUSHINPROG) {
 		struct proc *p = td->td_proc;
 
 		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
 			return EIO;
 		fvdat->flag |= FN_FLUSHWANT;
 		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
 		error = 0;
 		if (p != NULL) {
 			PROC_LOCK(p);
 			if (SIGNOTEMPTY(p->p_siglist) ||
 			    SIGNOTEMPTY(td->td_siglist))
 				error = EINTR;
 			PROC_UNLOCK(p);
 		}
 		if (error == EINTR)
 			return EINTR;
 	}
 	fvdat->flag |= FN_FLUSHINPROG;
 
 	if (vp->v_bufobj.bo_object != NULL) {
 		VM_OBJECT_LOCK(vp->v_bufobj.bo_object);
 		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
 		VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object);
 	}
 	error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	while (error) {
 		if (error == ERESTART || error == EINTR) {
 			fvdat->flag &= ~FN_FLUSHINPROG;
 			if (fvdat->flag & FN_FLUSHWANT) {
 				fvdat->flag &= ~FN_FLUSHWANT;
 				wakeup(&fvdat->flag);
 			}
 			return EINTR;
 		}
 		error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
 	}
 	fvdat->flag &= ~FN_FLUSHINPROG;
 	if (fvdat->flag & FN_FLUSHWANT) {
 		fvdat->flag &= ~FN_FLUSHWANT;
 		wakeup(&fvdat->flag);
 	}
 	return (error);
 }
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 63de37a27de4..e70e60e1997d 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -1,1242 +1,1241 @@
 /*-
  * Copyright (c) 1994, Sean Eric Fagan
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Sean Eric Fagan.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/ptrace.h>
 #include <sys/sx.h>
 #include <sys/malloc.h>
 #include <sys/signalvar.h>
 
 #include <machine/reg.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/procfs.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 
 struct ptrace_io_desc32 {
 	int		piod_op;
 	uint32_t	piod_offs;
 	uint32_t	piod_addr;
 	uint32_t	piod_len;
 };
 
 struct ptrace_vm_entry32 {
 	int		pve_entry;
 	int		pve_timestamp;
 	uint32_t	pve_start;
 	uint32_t	pve_end;
 	uint32_t	pve_offset;
 	u_int		pve_prot;
 	u_int		pve_pathlen;
 	int32_t		pve_fileid;
 	u_int		pve_fsid;
 	uint32_t	pve_path;
 };
 
 struct ptrace_lwpinfo32 {
 	lwpid_t	pl_lwpid;	/* LWP described. */
 	int	pl_event;	/* Event that stopped the LWP. */
 	int	pl_flags;	/* LWP flags. */
 	sigset_t	pl_sigmask;	/* LWP signal mask */
 	sigset_t	pl_siglist;	/* LWP pending signal */
 	struct siginfo32 pl_siginfo;	/* siginfo for signal */
 	char	pl_tdname[MAXCOMLEN + 1];	/* LWP name. */
 	int	pl_child_pid;		/* New child pid */
 };
 
 #endif
 
 /*
  * Functions implemented using PROC_ACTION():
  *
  * proc_read_regs(proc, regs)
  *	Get the current user-visible register set from the process
  *	and copy it into the regs structure (<machine/reg.h>).
  *	The process is stopped at the time read_regs is called.
  *
  * proc_write_regs(proc, regs)
  *	Update the current register set from the passed in regs
  *	structure.  Take care to avoid clobbering special CPU
  *	registers or privileged bits in the PSL.
  *	Depending on the architecture this may have fix-up work to do,
  *	especially if the IAR or PCW are modified.
  *	The process is stopped at the time write_regs is called.
  *
  * proc_read_fpregs, proc_write_fpregs
  *	deal with the floating point register set, otherwise as above.
  *
  * proc_read_dbregs, proc_write_dbregs
  *	deal with the processor debug register set, otherwise as above.
  *
  * proc_sstep(proc)
  *	Arrange for the process to trap after executing a single instruction.
  */
 
 #define	PROC_ACTION(action) do {					\
 	int error;							\
 									\
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);			\
 	if ((td->td_proc->p_flag & P_INMEM) == 0)			\
 		error = EIO;						\
 	else								\
 		error = (action);					\
 	return (error);							\
 } while(0)
 
 int
 proc_read_regs(struct thread *td, struct reg *regs)
 {
 
 	PROC_ACTION(fill_regs(td, regs));
 }
 
 int
 proc_write_regs(struct thread *td, struct reg *regs)
 {
 
 	PROC_ACTION(set_regs(td, regs));
 }
 
 int
 proc_read_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	PROC_ACTION(fill_dbregs(td, dbregs));
 }
 
 int
 proc_write_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 
 	PROC_ACTION(set_dbregs(td, dbregs));
 }
 
 /*
  * Ptrace doesn't support fpregs at all, and there are no security holes
  * or translations for fpregs, so we can just copy them.
  */
 int
 proc_read_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	PROC_ACTION(fill_fpregs(td, fpregs));
 }
 
 int
 proc_write_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	PROC_ACTION(set_fpregs(td, fpregs));
 }
 
 #ifdef COMPAT_FREEBSD32
 /* For 32 bit binaries, we need to expose the 32 bit regs layouts. */
 int
 proc_read_regs32(struct thread *td, struct reg32 *regs32)
 {
 
 	PROC_ACTION(fill_regs32(td, regs32));
 }
 
 int
 proc_write_regs32(struct thread *td, struct reg32 *regs32)
 {
 
 	PROC_ACTION(set_regs32(td, regs32));
 }
 
 int
 proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
 {
 
 	PROC_ACTION(fill_dbregs32(td, dbregs32));
 }
 
 int
 proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32)
 {
 
 	PROC_ACTION(set_dbregs32(td, dbregs32));
 }
 
 int
 proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
 {
 
 	PROC_ACTION(fill_fpregs32(td, fpregs32));
 }
 
 int
 proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32)
 {
 
 	PROC_ACTION(set_fpregs32(td, fpregs32));
 }
 #endif
 
 int
 proc_sstep(struct thread *td)
 {
 
 	PROC_ACTION(ptrace_single_step(td));
 }
 
 int
 proc_rwmem(struct proc *p, struct uio *uio)
 {
 	vm_map_t map;
 	vm_offset_t pageno;		/* page number */
 	vm_prot_t reqprot;
 	int error, fault_flags, page_offset, writing;
 
 	/*
 	 * Assert that someone has locked this vmspace.  (Should be
 	 * curthread but we can't assert that.)  This keeps the process
 	 * from exiting out from under us until this operation completes.
 	 */
 	KASSERT(p->p_lock >= 1, ("%s: process %p (pid %d) not held", __func__,
 	    p, p->p_pid));
 
 	/*
 	 * The map we want...
 	 */
 	map = &p->p_vmspace->vm_map;
 
 	/*
 	 * If we are writing, then we request vm_fault() to create a private
 	 * copy of each page.  Since these copies will not be writeable by the
 	 * process, we must explicity request that they be dirtied.
 	 */
 	writing = uio->uio_rw == UIO_WRITE;
 	reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
 	fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
 
 	/*
 	 * Only map in one page at a time.  We don't have to, but it
 	 * makes things easier.  This way is trivial - right?
 	 */
 	do {
 		vm_offset_t uva;
 		u_int len;
 		vm_page_t m;
 
 		uva = (vm_offset_t)uio->uio_offset;
 
 		/*
 		 * Get the page number of this segment.
 		 */
 		pageno = trunc_page(uva);
 		page_offset = uva - pageno;
 
 		/*
 		 * How many bytes to copy
 		 */
 		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
 		/*
 		 * Fault and hold the page on behalf of the process.
 		 */
 		error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
 		if (error != KERN_SUCCESS) {
 			if (error == KERN_RESOURCE_SHORTAGE)
 				error = ENOMEM;
 			else
 				error = EFAULT;
 			break;
 		}
 
 		/*
 		 * Now do the i/o move.
 		 */
 		error = uiomove_fromphys(&m, page_offset, len, uio);
 
 		/* Make the I-cache coherent for breakpoints. */
 		if (writing && error == 0) {
 			vm_map_lock_read(map);
 			if (vm_map_check_protection(map, pageno, pageno +
 			    PAGE_SIZE, VM_PROT_EXECUTE))
 				vm_sync_icache(map, uva, len);
 			vm_map_unlock_read(map);
 		}
 
 		/*
 		 * Release the page.
 		 */
 		vm_page_lock(m);
 		vm_page_unhold(m);
 		vm_page_unlock(m);
 
 	} while (error == 0 && uio->uio_resid > 0);
 
 	return (error);
 }
 
 static int
 ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
 {
 	struct vattr vattr;
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj, tobj, lobj;
 	struct vmspace *vm;
 	struct vnode *vp;
 	char *freepath, *fullpath;
 	u_int pathlen;
 	int error, index;
 
 	error = 0;
 	obj = NULL;
 
 	vm = vmspace_acquire_ref(p);
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 
 	do {
 		entry = map->header.next;
 		index = 0;
 		while (index < pve->pve_entry && entry != &map->header) {
 			entry = entry->next;
 			index++;
 		}
 		if (index != pve->pve_entry) {
 			error = EINVAL;
 			break;
 		}
 		while (entry != &map->header &&
 		    (entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) {
 			entry = entry->next;
 			index++;
 		}
 		if (entry == &map->header) {
 			error = ENOENT;
 			break;
 		}
 
 		/* We got an entry. */
 		pve->pve_entry = index + 1;
 		pve->pve_timestamp = map->timestamp;
 		pve->pve_start = entry->start;
 		pve->pve_end = entry->end - 1;
 		pve->pve_offset = entry->offset;
 		pve->pve_prot = entry->protection;
 
 		/* Backing object's path needed? */
 		if (pve->pve_pathlen == 0)
 			break;
 
 		pathlen = pve->pve_pathlen;
 		pve->pve_pathlen = 0;
 
 		obj = entry->object.vm_object;
 		if (obj != NULL)
 			VM_OBJECT_LOCK(obj);
 	} while (0);
 
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 
 	pve->pve_fsid = VNOVAL;
 	pve->pve_fileid = VNOVAL;
 
 	if (error == 0 && obj != NULL) {
 		lobj = obj;
 		for (tobj = obj; tobj != NULL; tobj = tobj->backing_object) {
 			if (tobj != obj)
 				VM_OBJECT_LOCK(tobj);
 			if (lobj != obj)
 				VM_OBJECT_UNLOCK(lobj);
 			lobj = tobj;
 			pve->pve_offset += tobj->backing_object_offset;
 		}
 		vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
 		if (vp != NULL)
 			vref(vp);
 		if (lobj != obj)
 			VM_OBJECT_UNLOCK(lobj);
 		VM_OBJECT_UNLOCK(obj);
 
 		if (vp != NULL) {
 			freepath = NULL;
 			fullpath = NULL;
 			vn_fullpath(td, vp, &fullpath, &freepath);
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(vp, &vattr, td->td_ucred) == 0) {
 				pve->pve_fileid = vattr.va_fileid;
 				pve->pve_fsid = vattr.va_fsid;
 			}
 			vput(vp);
 
 			if (fullpath != NULL) {
 				pve->pve_pathlen = strlen(fullpath) + 1;
 				if (pve->pve_pathlen <= pathlen) {
 					error = copyout(fullpath, pve->pve_path,
 					    pve->pve_pathlen);
 				} else
 					error = ENAMETOOLONG;
 			}
 			if (freepath != NULL)
 				free(freepath, M_TEMP);
 		}
 	}
 
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD32
 static int      
 ptrace_vm_entry32(struct thread *td, struct proc *p,
     struct ptrace_vm_entry32 *pve32)
 {
 	struct ptrace_vm_entry pve;
 	int error;
 
 	pve.pve_entry = pve32->pve_entry;
 	pve.pve_pathlen = pve32->pve_pathlen;
 	pve.pve_path = (void *)(uintptr_t)pve32->pve_path;
 
 	error = ptrace_vm_entry(td, p, &pve);
 	if (error == 0) {
 		pve32->pve_entry = pve.pve_entry;
 		pve32->pve_timestamp = pve.pve_timestamp;
 		pve32->pve_start = pve.pve_start;
 		pve32->pve_end = pve.pve_end;
 		pve32->pve_offset = pve.pve_offset;
 		pve32->pve_prot = pve.pve_prot;
 		pve32->pve_fileid = pve.pve_fileid;
 		pve32->pve_fsid = pve.pve_fsid;
 	}
 
 	pve32->pve_pathlen = pve.pve_pathlen;
 	return (error);
 }
 
 static void
 ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl,
     struct ptrace_lwpinfo32 *pl32)
 {
 
 	pl32->pl_lwpid = pl->pl_lwpid;
 	pl32->pl_event = pl->pl_event;
 	pl32->pl_flags = pl->pl_flags;
 	pl32->pl_sigmask = pl->pl_sigmask;
 	pl32->pl_siglist = pl->pl_siglist;
 	siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo);
 	strcpy(pl32->pl_tdname, pl->pl_tdname);
 	pl32->pl_child_pid = pl->pl_child_pid;
 }
 #endif /* COMPAT_FREEBSD32 */
 
 /*
  * Process debugging system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ptrace_args {
 	int	req;
 	pid_t	pid;
 	caddr_t	addr;
 	int	data;
 };
 #endif
 
 #ifdef COMPAT_FREEBSD32
 /*
  * This CPP subterfuge is to try and reduce the number of ifdefs in
  * the body of the code.
  *   COPYIN(uap->addr, &r.reg, sizeof r.reg);
  * becomes either:
  *   copyin(uap->addr, &r.reg, sizeof r.reg);
  * or
  *   copyin(uap->addr, &r.reg32, sizeof r.reg32);
  * .. except this is done at runtime.
  */
 #define	COPYIN(u, k, s)		wrap32 ? \
 	copyin(u, k ## 32, s ## 32) : \
 	copyin(u, k, s)
 #define	COPYOUT(k, u, s)	wrap32 ? \
 	copyout(k ## 32, u, s ## 32) : \
 	copyout(k, u, s)
 #else
 #define	COPYIN(u, k, s)		copyin(u, k, s)
 #define	COPYOUT(k, u, s)	copyout(k, u, s)
 #endif
 int
 sys_ptrace(struct thread *td, struct ptrace_args *uap)
 {
 	/*
 	 * XXX this obfuscation is to reduce stack usage, but the register
 	 * structs may be too large to put on the stack anyway.
 	 */
 	union {
 		struct ptrace_io_desc piod;
 		struct ptrace_lwpinfo pl;
 		struct ptrace_vm_entry pve;
 		struct dbreg dbreg;
 		struct fpreg fpreg;
 		struct reg reg;
 #ifdef COMPAT_FREEBSD32
 		struct dbreg32 dbreg32;
 		struct fpreg32 fpreg32;
 		struct reg32 reg32;
 		struct ptrace_io_desc32 piod32;
 		struct ptrace_lwpinfo32 pl32;
 		struct ptrace_vm_entry32 pve32;
 #endif
 	} r;
 	void *addr;
 	int error = 0;
 #ifdef COMPAT_FREEBSD32
 	int wrap32 = 0;
 
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		wrap32 = 1;
 #endif
 	AUDIT_ARG_PID(uap->pid);
 	AUDIT_ARG_CMD(uap->req);
 	AUDIT_ARG_VALUE(uap->data);
 	addr = &r;
 	switch (uap->req) {
 	case PT_GETREGS:
 	case PT_GETFPREGS:
 	case PT_GETDBREGS:
 	case PT_LWPINFO:
 		break;
 	case PT_SETREGS:
 		error = COPYIN(uap->addr, &r.reg, sizeof r.reg);
 		break;
 	case PT_SETFPREGS:
 		error = COPYIN(uap->addr, &r.fpreg, sizeof r.fpreg);
 		break;
 	case PT_SETDBREGS:
 		error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg);
 		break;
 	case PT_IO:
 		error = COPYIN(uap->addr, &r.piod, sizeof r.piod);
 		break;
 	case PT_VM_ENTRY:
 		error = COPYIN(uap->addr, &r.pve, sizeof r.pve);
 		break;
 	default:
 		addr = uap->addr;
 		break;
 	}
 	if (error)
 		return (error);
 
 	error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data);
 	if (error)
 		return (error);
 
 	switch (uap->req) {
 	case PT_VM_ENTRY:
 		error = COPYOUT(&r.pve, uap->addr, sizeof r.pve);
 		break;
 	case PT_IO:
 		error = COPYOUT(&r.piod, uap->addr, sizeof r.piod);
 		break;
 	case PT_GETREGS:
 		error = COPYOUT(&r.reg, uap->addr, sizeof r.reg);
 		break;
 	case PT_GETFPREGS:
 		error = COPYOUT(&r.fpreg, uap->addr, sizeof r.fpreg);
 		break;
 	case PT_GETDBREGS:
 		error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg);
 		break;
 	case PT_LWPINFO:
 		error = copyout(&r.pl, uap->addr, uap->data);
 		break;
 	}
 
 	return (error);
 }
 #undef COPYIN
 #undef COPYOUT
 
 #ifdef COMPAT_FREEBSD32
 /*
  *   PROC_READ(regs, td2, addr);
  * becomes either:
  *   proc_read_regs(td2, addr);
  * or
  *   proc_read_regs32(td2, addr);
  * .. except this is done at runtime.  There is an additional
  * complication in that PROC_WRITE disallows 32 bit consumers
  * from writing to 64 bit address space targets.
  */
 #define	PROC_READ(w, t, a)	wrap32 ? \
 	proc_read_ ## w ## 32(t, a) : \
 	proc_read_ ## w (t, a)
 #define	PROC_WRITE(w, t, a)	wrap32 ? \
 	(safe ? proc_write_ ## w ## 32(t, a) : EINVAL ) : \
 	proc_write_ ## w (t, a)
 #else
 #define	PROC_READ(w, t, a)	proc_read_ ## w (t, a)
 #define	PROC_WRITE(w, t, a)	proc_write_ ## w (t, a)
 #endif
 
 int
 kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 {
 	struct iovec iov;
 	struct uio uio;
 	struct proc *curp, *p, *pp;
 	struct thread *td2 = NULL, *td3;
 	struct ptrace_io_desc *piod = NULL;
 	struct ptrace_lwpinfo *pl;
 	int error, write, tmp, num;
 	int proctree_locked = 0;
 	lwpid_t tid = 0, *buf;
 #ifdef COMPAT_FREEBSD32
 	int wrap32 = 0, safe = 0;
 	struct ptrace_io_desc32 *piod32 = NULL;
 	struct ptrace_lwpinfo32 *pl32 = NULL;
 	struct ptrace_lwpinfo plr;
 #endif
 
 	curp = td->td_proc;
 
 	/* Lock proctree before locking the process. */
 	switch (req) {
 	case PT_TRACE_ME:
 	case PT_ATTACH:
 	case PT_STEP:
 	case PT_CONTINUE:
 	case PT_TO_SCE:
 	case PT_TO_SCX:
 	case PT_SYSCALL:
 	case PT_FOLLOW_FORK:
 	case PT_DETACH:
 		sx_xlock(&proctree_lock);
 		proctree_locked = 1;
 		break;
 	default:
 		break;
 	}
 
 	write = 0;
 	if (req == PT_TRACE_ME) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		if (pid <= PID_MAX) {
 			if ((p = pfind(pid)) == NULL) {
 				if (proctree_locked)
 					sx_xunlock(&proctree_lock);
 				return (ESRCH);
 			}
 		} else {
 			td2 = tdfind(pid, -1);
 			if (td2 == NULL) {
 				if (proctree_locked)
 					sx_xunlock(&proctree_lock);
 				return (ESRCH);
 			}
 			p = td2->td_proc;
 			tid = pid;
 			pid = p->p_pid;
 		}
 	}
 	AUDIT_ARG_PROCESS(p);
 
 	if ((p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto fail;
 	}
 	if ((error = p_cansee(td, p)) != 0)
 		goto fail;
 
 	if ((error = p_candebug(td, p)) != 0)
 		goto fail;
 
 	/*
 	 * System processes can't be debugged.
 	 */
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (tid == 0) {
 		if ((p->p_flag & P_STOPPED_TRACE) != 0) {
 			KASSERT(p->p_xthread != NULL, ("NULL p_xthread"));
 			td2 = p->p_xthread;
 		} else {
 			td2 = FIRST_THREAD_IN_PROC(p);
 		}
 		tid = td2->td_tid;
 	}
 
 #ifdef COMPAT_FREEBSD32
 	/*
 	 * Test if we're a 32 bit client and what the target is.
 	 * Set the wrap controls accordingly.
 	 */
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
 		if (SV_PROC_FLAG(td2->td_proc, SV_ILP32))
 			safe = 1;
 		wrap32 = 1;
 	}
 #endif
 	/*
 	 * Permissions check
 	 */
 	switch (req) {
 	case PT_TRACE_ME:
 		/* Always legal. */
 		break;
 
 	case PT_ATTACH:
 		/* Self */
 		if (p->p_pid == td->td_proc->p_pid) {
 			error = EINVAL;
 			goto fail;
 		}
 
 		/* Already traced */
 		if (p->p_flag & P_TRACED) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		/* Can't trace an ancestor if you're being traced. */
 		if (curp->p_flag & P_TRACED) {
 			for (pp = curp->p_pptr; pp != NULL; pp = pp->p_pptr) {
 				if (pp == p) {
 					error = EINVAL;
 					goto fail;
 				}
 			}
 		}
 
 
 		/* OK */
 		break;
 
 	case PT_CLEARSTEP:
 		/* Allow thread to clear single step for itself */
 		if (td->td_tid == tid)
 			break;
 
 		/* FALLTHROUGH */
 	default:
 		/* not being traced... */
 		if ((p->p_flag & P_TRACED) == 0) {
 			error = EPERM;
 			goto fail;
 		}
 
 		/* not being traced by YOU */
 		if (p->p_pptr != td->td_proc) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		/* not currently stopped */
 		if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) == 0 ||
 		    p->p_suspcount != p->p_numthreads  ||
 		    (p->p_flag & P_WAITED) == 0) {
 			error = EBUSY;
 			goto fail;
 		}
 
 		if ((p->p_flag & P_STOPPED_TRACE) == 0) {
 			static int count = 0;
 			if (count++ == 0)
 				printf("P_STOPPED_TRACE not set.\n");
 		}
 
 		/* OK */
 		break;
 	}
 
 	/* Keep this process around until we finish this request. */
 	_PHOLD(p);
 
 #ifdef FIX_SSTEP
 	/*
 	 * Single step fixup ala procfs
 	 */
 	FIX_SSTEP(td2);
 #endif
 
 	/*
 	 * Actually do the requests
 	 */
 
 	td->td_retval[0] = 0;
 
 	switch (req) {
 	case PT_TRACE_ME:
 		/* set my trace flag and "owner" so it can read/write me */
 		p->p_flag |= P_TRACED;
 		if (p->p_flag & P_PPWAIT)
 			p->p_flag |= P_PPTRACE;
 		p->p_oppid = p->p_pptr->p_pid;
 		break;
 
 	case PT_ATTACH:
 		/* security check done above */
 		/*
 		 * It would be nice if the tracing relationship was separate
 		 * from the parent relationship but that would require
 		 * another set of links in the proc struct or for "wait"
 		 * to scan the entire proc table.  To make life easier,
 		 * we just re-parent the process we're trying to trace.
 		 * The old parent is remembered so we can put things back
 		 * on a "detach".
 		 */
 		p->p_flag |= P_TRACED;
 		p->p_oppid = p->p_pptr->p_pid;
 		if (p->p_pptr != td->td_proc) {
 			proc_reparent(p, td->td_proc);
 		}
 		data = SIGSTOP;
 		goto sendsig;	/* in PT_CONTINUE below */
 
 	case PT_CLEARSTEP:
 		error = ptrace_clear_single_step(td2);
 		break;
 
 	case PT_SETSTEP:
 		error = ptrace_single_step(td2);
 		break;
 
 	case PT_SUSPEND:
 		td2->td_dbgflags |= TDB_SUSPEND;
 		thread_lock(td2);
 		td2->td_flags |= TDF_NEEDSUSPCHK;
 		thread_unlock(td2);
 		break;
 
 	case PT_RESUME:
 		td2->td_dbgflags &= ~TDB_SUSPEND;
 		break;
 
 	case PT_FOLLOW_FORK:
 		if (data)
 			p->p_flag |= P_FOLLOWFORK;
 		else
 			p->p_flag &= ~P_FOLLOWFORK;
 		break;
 
 	case PT_STEP:
 	case PT_CONTINUE:
 	case PT_TO_SCE:
 	case PT_TO_SCX:
 	case PT_SYSCALL:
 	case PT_DETACH:
 		/* Zero means do not send any signal */
 		if (data < 0 || data > _SIG_MAXSIG) {
 			error = EINVAL;
 			break;
 		}
 
 		switch (req) {
 		case PT_STEP:
 			error = ptrace_single_step(td2);
 			if (error)
 				goto out;
 			break;
 		case PT_CONTINUE:
 		case PT_TO_SCE:
 		case PT_TO_SCX:
 		case PT_SYSCALL:
 			if (addr != (void *)1) {
 				error = ptrace_set_pc(td2,
 				    (u_long)(uintfptr_t)addr);
 				if (error)
 					goto out;
 			}
 			switch (req) {
 			case PT_TO_SCE:
 				p->p_stops |= S_PT_SCE;
 				break;
 			case PT_TO_SCX:
 				p->p_stops |= S_PT_SCX;
 				break;
 			case PT_SYSCALL:
 				p->p_stops |= S_PT_SCE | S_PT_SCX;
 				break;
 			}
 			break;
 		case PT_DETACH:
 			/* reset process parent */
 			if (p->p_oppid != p->p_pptr->p_pid) {
 				struct proc *pp;
 
 				PROC_LOCK(p->p_pptr);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(p->p_pptr);
 
 				PROC_UNLOCK(p);
 				pp = pfind(p->p_oppid);
 				if (pp == NULL)
 					pp = initproc;
 				else
 					PROC_UNLOCK(pp);
 				PROC_LOCK(p);
 				proc_reparent(p, pp);
 				if (pp == initproc)
 					p->p_sigparent = SIGCHLD;
 			}
 			p->p_oppid = 0;
 			p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK);
 
 			/* should we send SIGCHLD? */
 			/* childproc_continued(p); */
 			break;
 		}
 
 	sendsig:
 		if (proctree_locked) {
 			sx_xunlock(&proctree_lock);
 			proctree_locked = 0;
 		}
 		p->p_xstat = data;
 		p->p_xthread = NULL;
 		if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) {
 			/* deliver or queue signal */
 			td2->td_dbgflags &= ~TDB_XSIG;
 			td2->td_xsig = data;
 
 			if (req == PT_DETACH) {
 				FOREACH_THREAD_IN_PROC(p, td3)
 					td3->td_dbgflags &= ~TDB_SUSPEND; 
 			}
 			/*
 			 * unsuspend all threads, to not let a thread run,
 			 * you should use PT_SUSPEND to suspend it before
 			 * continuing process.
 			 */
 			PROC_SLOCK(p);
 			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
 			thread_unsuspend(p);
 			PROC_SUNLOCK(p);
 			if (req == PT_ATTACH)
 				kern_psignal(p, data);
 		} else {
 			if (data)
 				kern_psignal(p, data);
 		}
 		break;
 
 	case PT_WRITE_I:
 	case PT_WRITE_D:
 		td2->td_dbgflags |= TDB_USERWR;
 		write = 1;
 		/* FALLTHROUGH */
 	case PT_READ_I:
 	case PT_READ_D:
 		PROC_UNLOCK(p);
 		tmp = 0;
 		/* write = 0 set above */
 		iov.iov_base = write ? (caddr_t)&data : (caddr_t)&tmp;
 		iov.iov_len = sizeof(int);
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_offset = (off_t)(uintptr_t)addr;
 		uio.uio_resid = sizeof(int);
 		uio.uio_segflg = UIO_SYSSPACE;	/* i.e.: the uap */
 		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
 		uio.uio_td = td;
 		error = proc_rwmem(p, &uio);
 		if (uio.uio_resid != 0) {
 			/*
 			 * XXX proc_rwmem() doesn't currently return ENOSPC,
 			 * so I think write() can bogusly return 0.
 			 * XXX what happens for short writes?  We don't want
 			 * to write partial data.
 			 * XXX proc_rwmem() returns EPERM for other invalid
 			 * addresses.  Convert this to EINVAL.  Does this
 			 * clobber returns of EPERM for other reasons?
 			 */
 			if (error == 0 || error == ENOSPC || error == EPERM)
 				error = EINVAL;	/* EOF */
 		}
 		if (!write)
 			td->td_retval[0] = tmp;
 		PROC_LOCK(p);
 		break;
 
 	case PT_IO:
 #ifdef COMPAT_FREEBSD32
 		if (wrap32) {
 			piod32 = addr;
 			iov.iov_base = (void *)(uintptr_t)piod32->piod_addr;
 			iov.iov_len = piod32->piod_len;
 			uio.uio_offset = (off_t)(uintptr_t)piod32->piod_offs;
 			uio.uio_resid = piod32->piod_len;
 		} else
 #endif
 		{
 			piod = addr;
 			iov.iov_base = piod->piod_addr;
 			iov.iov_len = piod->piod_len;
 			uio.uio_offset = (off_t)(uintptr_t)piod->piod_offs;
 			uio.uio_resid = piod->piod_len;
 		}
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_segflg = UIO_USERSPACE;
 		uio.uio_td = td;
 #ifdef COMPAT_FREEBSD32
 		tmp = wrap32 ? piod32->piod_op : piod->piod_op;
 #else
 		tmp = piod->piod_op;
 #endif
 		switch (tmp) {
 		case PIOD_READ_D:
 		case PIOD_READ_I:
 			uio.uio_rw = UIO_READ;
 			break;
 		case PIOD_WRITE_D:
 		case PIOD_WRITE_I:
 			td2->td_dbgflags |= TDB_USERWR;
 			uio.uio_rw = UIO_WRITE;
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 		PROC_UNLOCK(p);
 		error = proc_rwmem(p, &uio);
 #ifdef COMPAT_FREEBSD32
 		if (wrap32)
 			piod32->piod_len -= uio.uio_resid;
 		else
 #endif
 			piod->piod_len -= uio.uio_resid;
 		PROC_LOCK(p);
 		break;
 
 	case PT_KILL:
 		data = SIGKILL;
 		goto sendsig;	/* in PT_CONTINUE above */
 
 	case PT_SETREGS:
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(regs, td2, addr);
 		break;
 
 	case PT_GETREGS:
 		error = PROC_READ(regs, td2, addr);
 		break;
 
 	case PT_SETFPREGS:
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(fpregs, td2, addr);
 		break;
 
 	case PT_GETFPREGS:
 		error = PROC_READ(fpregs, td2, addr);
 		break;
 
 	case PT_SETDBREGS:
 		td2->td_dbgflags |= TDB_USERWR;
 		error = PROC_WRITE(dbregs, td2, addr);
 		break;
 
 	case PT_GETDBREGS:
 		error = PROC_READ(dbregs, td2, addr);
 		break;
 
 	case PT_LWPINFO:
 		if (data <= 0 ||
 #ifdef COMPAT_FREEBSD32
 		    (!wrap32 && data > sizeof(*pl)) ||
 		    (wrap32 && data > sizeof(*pl32))) {
 #else
 		    data > sizeof(*pl)) {
 #endif
 			error = EINVAL;
 			break;
 		}
 #ifdef COMPAT_FREEBSD32
 		if (wrap32) {
 			pl = &plr;
 			pl32 = addr;
 		} else
 #endif
 		pl = addr;
 		pl->pl_lwpid = td2->td_tid;
 		pl->pl_event = PL_EVENT_NONE;
 		pl->pl_flags = 0;
 		if (td2->td_dbgflags & TDB_XSIG) {
 			pl->pl_event = PL_EVENT_SIGNAL;
 			if (td2->td_dbgksi.ksi_signo != 0 &&
 #ifdef COMPAT_FREEBSD32
 			    ((!wrap32 && data >= offsetof(struct ptrace_lwpinfo,
 			    pl_siginfo) + sizeof(pl->pl_siginfo)) ||
 			    (wrap32 && data >= offsetof(struct ptrace_lwpinfo32,
 			    pl_siginfo) + sizeof(struct siginfo32)))
 #else
 			    data >= offsetof(struct ptrace_lwpinfo, pl_siginfo)
 			    + sizeof(pl->pl_siginfo)
 #endif
 			){
 				pl->pl_flags |= PL_FLAG_SI;
 				pl->pl_siginfo = td2->td_dbgksi.ksi_info;
 			}
 		}
 		if ((pl->pl_flags & PL_FLAG_SI) == 0)
 			bzero(&pl->pl_siginfo, sizeof(pl->pl_siginfo));
 		if (td2->td_dbgflags & TDB_SCE)
 			pl->pl_flags |= PL_FLAG_SCE;
 		else if (td2->td_dbgflags & TDB_SCX)
 			pl->pl_flags |= PL_FLAG_SCX;
 		if (td2->td_dbgflags & TDB_EXEC)
 			pl->pl_flags |= PL_FLAG_EXEC;
 		if (td2->td_dbgflags & TDB_FORK) {
 			pl->pl_flags |= PL_FLAG_FORKED;
 			pl->pl_child_pid = td2->td_dbg_forked;
 		}
 		if (td2->td_dbgflags & TDB_CHILD)
 			pl->pl_flags |= PL_FLAG_CHILD;
 		pl->pl_sigmask = td2->td_sigmask;
 		pl->pl_siglist = td2->td_siglist;
 		strcpy(pl->pl_tdname, td2->td_name);
 #ifdef COMPAT_FREEBSD32
 		if (wrap32)
 			ptrace_lwpinfo_to32(pl, pl32);
 #endif
 		break;
 
 	case PT_GETNUMLWPS:
 		td->td_retval[0] = p->p_numthreads;
 		break;
 
 	case PT_GETLWPLIST:
 		if (data <= 0) {
 			error = EINVAL;
 			break;
 		}
 		num = imin(p->p_numthreads, data);
 		PROC_UNLOCK(p);
 		buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK);
 		tmp = 0;
 		PROC_LOCK(p);
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (tmp >= num)
 				break;
 			buf[tmp++] = td2->td_tid;
 		}
 		PROC_UNLOCK(p);
 		error = copyout(buf, addr, tmp * sizeof(lwpid_t));
 		free(buf, M_TEMP);
 		if (!error)
 			td->td_retval[0] = tmp;
 		PROC_LOCK(p);
 		break;
 
 	case PT_VM_TIMESTAMP:
 		td->td_retval[0] = p->p_vmspace->vm_map.timestamp;
 		break;
 
 	case PT_VM_ENTRY:
 		PROC_UNLOCK(p);
 #ifdef COMPAT_FREEBSD32
 		if (wrap32)
 			error = ptrace_vm_entry32(td, p, addr);
 		else
 #endif
 		error = ptrace_vm_entry(td, p, addr);
 		PROC_LOCK(p);
 		break;
 
 	default:
 #ifdef __HAVE_PTRACE_MACHDEP
 		if (req >= PT_FIRSTMACH) {
 			PROC_UNLOCK(p);
 			error = cpu_ptrace(td2, req, addr, data);
 			PROC_LOCK(p);
 		} else
 #endif
 			/* Unknown request. */
 			error = EINVAL;
 		break;
 	}
 
 out:
 	/* Drop our hold on this process now that the request has completed. */
 	_PRELE(p);
 fail:
 	PROC_UNLOCK(p);
 	if (proctree_locked)
 		sx_xunlock(&proctree_lock);
 	return (error);
 }
 #undef PROC_READ
 #undef PROC_WRITE
 
 /*
  * Stop a process because of a debugging event;
  * stay stopped until p->p_step is cleared
  * (cleared by PIOCCONT in procfs).
  */
 void
 stopevent(struct proc *p, unsigned int event, unsigned int val)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_step = 1;
 	do {
 		p->p_xstat = val;
 		p->p_xthread = NULL;
 		p->p_stype = event;	/* Which event caused the stop? */
 		wakeup(&p->p_stype);	/* Wake up any PIOCWAIT'ing procs */
 		msleep(&p->p_step, &p->p_mtx, PWAIT, "stopevent", 0);
 	} while (p->p_step);
 }
diff --git a/sys/mips/adm5120/adm5120_machdep.c b/sys/mips/adm5120/adm5120_machdep.c
index 29079c804cbf..2713618607eb 100644
--- a/sys/mips/adm5120/adm5120_machdep.c
+++ b/sys/mips/adm5120/adm5120_machdep.c
@@ -1,145 +1,144 @@
 /*-
  * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 extern int	*edata;
 extern int	*end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i;
 
 	printf("entry: mips_init()\n");
 
 	bootverbose = 1;
 	realmem = btoc(16 << 20);
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 void
 platform_reset(void)
 {
 
 	__asm __volatile("li	$25, 0xbfc00000");
 	__asm __volatile("j	$25");
 }
 
 void
 platform_start(__register_t a0 __unused, __register_t a1 __unused, 
     __register_t a2 __unused, __register_t a3 __unused)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq = 175 * 1000 * 1000;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 	cninit();
 	mips_init();
 	mips_timer_init_params(platform_counter_freq, 0);
 }
diff --git a/sys/mips/alchemy/alchemy_machdep.c b/sys/mips/alchemy/alchemy_machdep.c
index 0593f83cddd2..d7f242ae0ad9 100644
--- a/sys/mips/alchemy/alchemy_machdep.c
+++ b/sys/mips/alchemy/alchemy_machdep.c
@@ -1,149 +1,148 @@
 /*-
  * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 extern int	*edata;
 extern int	*end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i;
 
 	printf("entry: mips_init()\n");
 
 	bootverbose = 1;
 	realmem = btoc(16 << 20);
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 void
 platform_reset(void)
 {
 
 	__asm __volatile("li	$25, 0xbfc00000");
 	__asm __volatile("j	$25");
 }
 
 void
 platform_start(__register_t a0 __unused, __register_t a1 __unused, 
     __register_t a2 __unused, __register_t a3 __unused)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq = 175 * 1000 * 1000;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 	cninit();
 	mips_init();
 	/* Set counter_freq for tick_init_params() */
 	platform_counter_freq = 175 * 1000 * 1000;
 
 	mips_timer_init_params(platform_counter_freq, 0);
 }
diff --git a/sys/mips/beri/beri_machdep.c b/sys/mips/beri/beri_machdep.c
index 98fcb85aa3aa..f36b0ea1bb12 100644
--- a/sys/mips/beri/beri_machdep.c
+++ b/sys/mips/beri/beri_machdep.c
@@ -1,209 +1,208 @@
 /*-
  * Copyright (c) 2006 Wojciech A. Koszek <wkoszek@FreeBSD.org>
  * Copyright (c) 2012 Robert N. M. Watson
  * All rights reserved.
  *
  * This software was developed by SRI International and the University of
  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
  * ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/linker.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pmap.h>
 #include <machine/trap.h>
 
 extern int	*edata;
 extern int	*end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i;
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 /*
  * Perform a board-level soft-reset.
  *
  * XXXRW: BERI doesn't yet have a board-level soft-reset.
  */
 void
 platform_reset(void)
 {
 
 	panic("%s: not yet", __func__);
 }
 
 void
 platform_start(__register_t a0, __register_t a1,  __register_t a2, 
     __register_t a3)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq;
 	int argc = a0;
 	char **argv = (char **)a1;
 	char **envp = (char **)a2;
 	unsigned int memsize = a3;
 #ifdef FDT
 	vm_offset_t dtbp;
 	void *kmdp;
 #endif
 	int i;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	mips_pcpu0_init();
 
 #ifdef FDT
 	/*
 	 * Find the dtb passed in by the boot loader (currently fictional).
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp != NULL)
 		dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	else
 		dtbp = (vm_offset_t)NULL;
 
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #else
 #error	"Non-static FDT not yet supported on BERI"
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		while (1);
 	if (OF_init(&fdt_static_dtb) != 0)
 		while (1);
 #endif
 
 	/*
 	 * XXXRW: We have no way to compare wallclock time to cycle rate on
 	 * BERI, so for now assume we run at the MALTA default (100MHz).
 	 */
 	platform_counter_freq = MIPS_DEFAULT_HZ;
 	mips_timer_early_init(platform_counter_freq);
 
 	cninit();
 	printf("entry: platform_start()\n");
 
 	bootverbose = 1;
 	if (bootverbose) {
 		printf("cmd line: ");
 		for (i = 0; i < argc; i++)
 			printf("%s ", argv[i]);
 		printf("\n");
 
 		printf("envp:\n");
 		for (i = 0; envp[i]; i += 2)
 			printf("\t%s = %s\n", envp[i], envp[i+1]);
 
 		printf("memsize = %08x\n", memsize);
 	}
 
 	realmem = btoc(memsize);
 	mips_init();
 
 	mips_timer_init_params(platform_counter_freq, 0);
 }
diff --git a/sys/mips/cavium/octeon_machdep.c b/sys/mips/cavium/octeon_machdep.c
index fcf3ab66da14..53a84427a0a6 100644
--- a/sys/mips/cavium/octeon_machdep.c
+++ b/sys/mips/cavium/octeon_machdep.c
@@ -1,663 +1,662 @@
 /*-
  * Copyright (c) 2006 Wojciech A. Koszek <wkoszek@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/time.h>
 #include <sys/timetc.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/atomic.h>
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/cpufunc.h>
 #include <mips/cavium/octeon_pcmap_regs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pcpu.h>
 #include <machine/pte.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #include <contrib/octeon-sdk/cvmx.h>
 #include <contrib/octeon-sdk/cvmx-bootmem.h>
 #include <contrib/octeon-sdk/cvmx-ebt3000.h>
 #include <contrib/octeon-sdk/cvmx-helper-cfg.h>
 #include <contrib/octeon-sdk/cvmx-interrupt.h>
 #include <contrib/octeon-sdk/cvmx-version.h>
 
 #include <mips/cavium/octeon_irq.h>
 
 #if defined(__mips_n64) 
 #define MAX_APP_DESC_ADDR     0xffffffffafffffff
 #else
 #define MAX_APP_DESC_ADDR     0xafffffff
 #endif
 
 struct octeon_feature_description {
 	octeon_feature_t ofd_feature;
 	const char *ofd_string;
 };
 
 extern int	*end;
 extern char cpu_model[];
 extern char cpu_board[];
 
 static const struct octeon_feature_description octeon_feature_descriptions[] = {
 	{ OCTEON_FEATURE_SAAD,			"SAAD" },
 	{ OCTEON_FEATURE_ZIP,			"ZIP" },
 	{ OCTEON_FEATURE_CRYPTO,		"CRYPTO" },
 	{ OCTEON_FEATURE_DORM_CRYPTO,		"DORM_CRYPTO" },
 	{ OCTEON_FEATURE_PCIE,			"PCIE" },
 	{ OCTEON_FEATURE_SRIO,			"SRIO" },
 	{ OCTEON_FEATURE_KEY_MEMORY,		"KEY_MEMORY" },
 	{ OCTEON_FEATURE_LED_CONTROLLER,	"LED_CONTROLLER" },
 	{ OCTEON_FEATURE_TRA,			"TRA" },
 	{ OCTEON_FEATURE_MGMT_PORT,		"MGMT_PORT" },
 	{ OCTEON_FEATURE_RAID,			"RAID" },
 	{ OCTEON_FEATURE_USB,			"USB" },
 	{ OCTEON_FEATURE_NO_WPTR,		"NO_WPTR" },
 	{ OCTEON_FEATURE_DFA,			"DFA" },
 	{ OCTEON_FEATURE_MDIO_CLAUSE_45,	"MDIO_CLAUSE_45" },
 	{ OCTEON_FEATURE_NPEI,			"NPEI" },
 	{ OCTEON_FEATURE_ILK,			"ILK" },
 	{ OCTEON_FEATURE_HFA,			"HFA" },
 	{ OCTEON_FEATURE_DFM,			"DFM" },
 	{ OCTEON_FEATURE_CIU2,			"CIU2" },
 	{ OCTEON_FEATURE_DICI_MODE,		"DICI_MODE" },
 	{ OCTEON_FEATURE_BIT_EXTRACTOR,		"BIT_EXTRACTOR" },
 	{ OCTEON_FEATURE_NAND,			"NAND" },
 	{ OCTEON_FEATURE_MMC,			"MMC" },
 	{ OCTEON_FEATURE_PKND,			"PKND" },
 	{ OCTEON_FEATURE_CN68XX_WQE,		"CN68XX_WQE" },
 	{ 0,					NULL }
 };
 
 static uint64_t octeon_get_ticks(void);
 static unsigned octeon_get_timecount(struct timecounter *tc);
 
 static void octeon_boot_params_init(register_t ptr);
 
 static struct timecounter octeon_timecounter = {
 	octeon_get_timecount,	/* get_timecount */
 	0,			/* no poll_pps */
 	0xffffffffu,		/* octeon_mask */
 	0,			/* frequency */
 	"Octeon",		/* name */
 	900,			/* quality (adjusted in code) */
 };
 
 void
 platform_cpu_init()
 {
 	/* Nothing special yet */
 }
 
 /*
  * Perform a board-level soft-reset.
  */
 void
 platform_reset(void)
 {
 	cvmx_write_csr(CVMX_CIU_SOFT_RST, 1);
 }
 
 /*
  * octeon_debug_symbol
  *
  * Does nothing.
  * Used to mark the point for simulator to begin tracing
  */
 void
 octeon_debug_symbol(void)
 {
 }
 
 /*
  * octeon_ciu_reset
  *
  * Shutdown all CIU to IP2, IP3 mappings
  */
 void
 octeon_ciu_reset(void)
 {
 	uint64_t cvmctl;
 
 	/* Disable all CIU interrupts by default */
 	cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2), 0);
 	cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2+1), 0);
 	cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num()*2), 0);
 	cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num()*2+1), 0);
 
 #ifdef SMP
 	/* Enable the MBOX interrupts.  */
 	cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num()*2+1),
 		       (1ull << (OCTEON_IRQ_MBOX0 - 8)) |
 		       (1ull << (OCTEON_IRQ_MBOX1 - 8)));
 #endif
 
 	/* 
 	 * Move the Performance Counter interrupt to OCTEON_PMC_IRQ
 	 */
 	cvmctl = mips_rd_cvmctl();
 	cvmctl &= ~(7 << 7);
 	cvmctl |= (OCTEON_PMC_IRQ + 2) << 7;
 	mips_wr_cvmctl(cvmctl);
 }
 
 static void
 octeon_memory_init(void)
 {
 	vm_paddr_t phys_end;
 	int64_t addr;
 	unsigned i, j;
 
 	phys_end = round_page(MIPS_KSEG0_TO_PHYS((vm_offset_t)&end));
 
 	if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_SIM) {
 		/* Simulator we limit to 96 meg */
 		phys_avail[0] = phys_end;
 		phys_avail[1] = 96 << 20;
 
 		dump_avail[0] = phys_avail[0];
 		dump_avail[1] = phys_avail[1];
 
 		realmem = physmem = btoc(phys_avail[1] - phys_avail[0]);
 		return;
 	}
 
 	/*
 	 * Allocate memory from bootmem 1MB at a time and merge
 	 * adjacent entries.
 	 */
 	i = 0;
 	while (i < PHYS_AVAIL_ENTRIES) {
 		/*
 		 * If there is less than 2MB of memory available in 128-byte
 		 * blocks, do not steal any more memory.  We need to leave some
 		 * memory for the command queues to be allocated out of.
 		 */
 		if (cvmx_bootmem_available_mem(128) < 2 << 20)
 			break;
 
 		addr = cvmx_bootmem_phy_alloc(1 << 20, phys_end,
 					      ~(vm_paddr_t)0, PAGE_SIZE, 0);
 		if (addr == -1)
 			break;
 
 		/*
 		 * The SDK needs to be able to easily map any memory that might
 		 * come to it e.g. in the form of an mbuf.  Because on !n64 we
 		 * can't direct-map some addresses and we don't want to manage
 		 * temporary mappings within the SDK, don't feed memory that
 		 * can't be direct-mapped to the kernel.
 		 */
 #if !defined(__mips_n64)
 		if (!MIPS_DIRECT_MAPPABLE(addr + (1 << 20) - 1))
 			continue;
 #endif
 
 		physmem += btoc(1 << 20);
 
 		if (i > 0 && phys_avail[i - 1] == addr) {
 			phys_avail[i - 1] += 1 << 20;
 			continue;
 		}
 
 		phys_avail[i + 0] = addr;
 		phys_avail[i + 1] = addr + (1 << 20);
 
 		i += 2;
 	}
 
 	for (j = 0; j < i; j++)
 		dump_avail[j] = phys_avail[j];
 
 	realmem = physmem;
 }
 
 void
 platform_start(__register_t a0, __register_t a1, __register_t a2 __unused,
     __register_t a3)
 {
 	const struct octeon_feature_description *ofd;
 	uint64_t platform_counter_freq;
 	int rv;
 
 	mips_postboot_fixup();
 
 	/*
 	 * Initialize boot parameters so that we can determine things like
 	 * which console we shoud use, etc.
 	 */
 	octeon_boot_params_init(a3);
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 	mips_timer_early_init(cvmx_sysinfo_get()->cpu_clock_hz);
 
 	/* Initialize console.  */
 	cninit();
 
 	/*
 	 * Display information about the CPU.
 	 */
 #if !defined(OCTEON_MODEL)
 	printf("Using runtime CPU model checks.\n");
 #else
 	printf("Compiled for CPU model: " __XSTRING(OCTEON_MODEL) "\n");
 #endif
 	strcpy(cpu_model, octeon_model_get_string(cvmx_get_proc_id()));
 	printf("CPU Model: %s\n", cpu_model);
 	printf("CPU clock: %uMHz  Core Mask: %#x\n",
 	       cvmx_sysinfo_get()->cpu_clock_hz / 1000000,
 	       cvmx_sysinfo_get()->core_mask);
 	rv = octeon_model_version_check(cvmx_get_proc_id());
 	if (rv == -1)
 		panic("%s: kernel not compatible with this processor.", __func__);
 
 	/*
 	 * Display information about the board.
 	 */
 #if defined(OCTEON_BOARD_CAPK_0100ND)
 	strcpy(cpu_board, "CAPK-0100ND");
 	if (cvmx_sysinfo_get()->board_type != CVMX_BOARD_TYPE_CN3010_EVB_HS5) {
 		panic("Compiled for %s, but board type is %s.", cpu_board,
 		       cvmx_board_type_to_string(cvmx_sysinfo_get()->board_type));
 	}
 #else
 	strcpy(cpu_board,
 	       cvmx_board_type_to_string(cvmx_sysinfo_get()->board_type));
 #endif
 	printf("Board: %s\n", cpu_board);
 	printf("Board Type: %u  Revision: %u/%u\n",
 	       cvmx_sysinfo_get()->board_type,
 	       cvmx_sysinfo_get()->board_rev_major,
 	       cvmx_sysinfo_get()->board_rev_minor);
 	printf("Serial number: %s\n", cvmx_sysinfo_get()->board_serial_number);
 
 	/*
 	 * Additional on-chip hardware/settings.
 	 *
 	 * XXX Display PCI host/target?  What else?
 	 */
 	printf("MAC address base: %6D (%u configured)\n",
 	       cvmx_sysinfo_get()->mac_addr_base, ":",
 	       cvmx_sysinfo_get()->mac_addr_count);
 
 
 	octeon_ciu_reset();
 	/*
 	 * XXX
 	 * We can certainly parse command line arguments or U-Boot environment
 	 * to determine whether to bootverbose / single user / ...  I think
 	 * stass has patches to add support for loader things to U-Boot even.
 	 */
 	bootverbose = 1;
 
 	/*
 	 * For some reason on the cn38xx simulator ebase register is set to
 	 * 0x80001000 at bootup time.  Move it back to the default, but
 	 * when we move to having support for multiple executives, we need
 	 * to rethink this.
 	 */
 	mips_wr_ebase(0x80000000);
 
 	octeon_memory_init();
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 	cpu_clock = cvmx_sysinfo_get()->cpu_clock_hz;
 	platform_counter_freq = cpu_clock;
 	octeon_timecounter.tc_frequency = cpu_clock;
 	platform_timecounter = &octeon_timecounter;
 	mips_timer_init_params(platform_counter_freq, 0);
 	set_cputicker(octeon_get_ticks, cpu_clock, 0);
 
 #ifdef SMP
 	/*
 	 * Clear any pending IPIs.
 	 */
 	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(0), 0xffffffff);
 #endif
 
 	printf("Octeon SDK: %s\n", OCTEON_SDK_VERSION_STRING);
 	printf("Available Octeon features:");
 	for (ofd = octeon_feature_descriptions; ofd->ofd_string != NULL; ofd++)
 		if (octeon_has_feature(ofd->ofd_feature))
 			printf(" %s", ofd->ofd_string);
 	printf("\n");
 }
 
 static uint64_t
 octeon_get_ticks(void)
 {
 	uint64_t cvmcount;
 
 	CVMX_MF_CYCLE(cvmcount);
 	return (cvmcount);
 }
 
 static unsigned
 octeon_get_timecount(struct timecounter *tc)
 {
 	return ((unsigned)octeon_get_ticks());
 }
 
 static int
 sysctl_machdep_led_display(SYSCTL_HANDLER_ARGS)
 {
 	size_t buflen;
 	char buf[9];
 	int error;
 
 	if (req->newptr == NULL)
 		return (EINVAL);
 
 	if (cvmx_sysinfo_get()->led_display_base_addr == 0)
 		return (ENODEV);
 
 	/*
 	 * Revision 1.x of the EBT3000 only supports 4 characters, but
 	 * other devices support 8.
 	 */
 	if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_EBT3000 &&
 	    cvmx_sysinfo_get()->board_rev_major == 1)
 		buflen = 4;
 	else
 		buflen = 8;
 
 	if (req->newlen > buflen)
 		return (E2BIG);
 
 	error = SYSCTL_IN(req, buf, req->newlen);
 	if (error != 0)
 		return (error);
 
 	buf[req->newlen] = '\0';
 	ebt3000_str_write(buf);
 
 	return (0);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, led_display, CTLTYPE_STRING | CTLFLAG_WR,
     NULL, 0, sysctl_machdep_led_display, "A",
     "String to display on LED display");
 
 void
 cvmx_dvprintf(const char *fmt, va_list ap)
 {
 	if (!bootverbose)
 		return;
 	vprintf(fmt, ap);
 }
 
 void
 cvmx_dprintf(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	cvmx_dvprintf(fmt, ap);
 	va_end(ap);
 }
 
 /**
  * version of printf that works better in exception context.
  *
  * @param format
  *
  * XXX If this function weren't in cvmx-interrupt.c, we'd use the SDK version.
  */
 void cvmx_safe_printf(const char *format, ...)
 {
     char buffer[256];
     char *ptr = buffer;
     int count;
     va_list args;
 
     va_start(args, format);
 #ifndef __U_BOOT__
     count = vsnprintf(buffer, sizeof(buffer), format, args);
 #else
     count = vsprintf(buffer, format, args);
 #endif
     va_end(args);
 
     while (count-- > 0)
     {
         cvmx_uart_lsr_t lsrval;
 
         /* Spin until there is room */
         do
         {
             lsrval.u64 = cvmx_read_csr(CVMX_MIO_UARTX_LSR(0));
 #if !defined(CONFIG_OCTEON_SIM_SPEED)
             if (lsrval.s.temt == 0)
                 cvmx_wait(10000);   /* Just to reduce the load on the system */
 #endif
         }
         while (lsrval.s.temt == 0);
 
         if (*ptr == '\n')
             cvmx_write_csr(CVMX_MIO_UARTX_THR(0), '\r');
         cvmx_write_csr(CVMX_MIO_UARTX_THR(0), *ptr++);
     }
 }
 
 /* impSTART: This stuff should move back into the Cavium SDK */
 /*
  ****************************************************************************************
  *
  * APP/BOOT  DESCRIPTOR  STUFF
  *
  ****************************************************************************************
  */
 
 /* Define the struct that is initialized by the bootloader used by the 
  * startup code.
  *
  * Copyright (c) 2004, 2005, 2006 Cavium Networks.
  *
  * The authors hereby grant permission to use, copy, modify, distribute,
  * and license this software and its documentation for any purpose, provided
  * that existing copyright notices are retained in all copies and that this
  * notice is included verbatim in any distributions. No written agreement,
  * license, or royalty fee is required for any of the authorized uses.
  * Modifications to this software may be copyrighted by their authors
  * and need not follow the licensing terms described here, provided that
  * the new terms are clearly indicated on the first page of each file where
  * they apply.
  */
 
 #define OCTEON_CURRENT_DESC_VERSION     6
 #define OCTEON_ARGV_MAX_ARGS            (64)
 #define OCTOEN_SERIAL_LEN 20
 
 typedef struct {
 	/* Start of block referenced by assembly code - do not change! */
 	uint32_t desc_version;
 	uint32_t desc_size;
 
 	uint64_t stack_top;
 	uint64_t heap_base;
 	uint64_t heap_end;
 	uint64_t entry_point;   /* Only used by bootloader */
 	uint64_t desc_vaddr;
 	/* End of This block referenced by assembly code - do not change! */
 
 	uint32_t exception_base_addr;
 	uint32_t stack_size;
 	uint32_t heap_size;
 	uint32_t argc;  /* Argc count for application */
 	uint32_t argv[OCTEON_ARGV_MAX_ARGS];
 	uint32_t flags;
 	uint32_t core_mask;
 	uint32_t dram_size;  /**< DRAM size in megabyes */
 	uint32_t phy_mem_desc_addr;  /**< physical address of free memory descriptor block*/
 	uint32_t debugger_flags_base_addr;  /**< used to pass flags from app to debugger */
 	uint32_t eclock_hz;  /**< CPU clock speed, in hz */
 	uint32_t dclock_hz;  /**< DRAM clock speed, in hz */
 	uint32_t spi_clock_hz;  /**< SPI4 clock in hz */
 	uint16_t board_type;
 	uint8_t board_rev_major;
 	uint8_t board_rev_minor;
 	uint16_t chip_type;
 	uint8_t chip_rev_major;
 	uint8_t chip_rev_minor;
 	char board_serial_number[OCTOEN_SERIAL_LEN];
 	uint8_t mac_addr_base[6];
 	uint8_t mac_addr_count;
 	uint64_t cvmx_desc_vaddr;
 } octeon_boot_descriptor_t;
 
 static cvmx_bootinfo_t *
 octeon_process_app_desc_ver_6(octeon_boot_descriptor_t *app_desc_ptr)
 {
 	cvmx_bootinfo_t *octeon_bootinfo;
 
 	/* XXX Why is 0x00000000ffffffffULL a bad value?  */
 	if (app_desc_ptr->cvmx_desc_vaddr == 0 ||
 	    app_desc_ptr->cvmx_desc_vaddr == 0xfffffffful) {
             	cvmx_safe_printf("Bad octeon_bootinfo %#jx\n",
 		    (uintmax_t)app_desc_ptr->cvmx_desc_vaddr);
 		return (NULL);
 	}
 
     	octeon_bootinfo = cvmx_phys_to_ptr(app_desc_ptr->cvmx_desc_vaddr);
         if (octeon_bootinfo->major_version != 1) {
             	cvmx_safe_printf("Incompatible CVMX descriptor from bootloader: %d.%d %p\n",
 		    (int) octeon_bootinfo->major_version,
 		    (int) octeon_bootinfo->minor_version, octeon_bootinfo);
 		return (NULL);
 	}
 
 	cvmx_sysinfo_minimal_initialize(octeon_bootinfo->phy_mem_desc_addr,
 					octeon_bootinfo->board_type,
 					octeon_bootinfo->board_rev_major,
 					octeon_bootinfo->board_rev_minor,
 					octeon_bootinfo->eclock_hz);
 	memcpy(cvmx_sysinfo_get()->mac_addr_base,
 	       octeon_bootinfo->mac_addr_base, 6);
 	cvmx_sysinfo_get()->mac_addr_count = octeon_bootinfo->mac_addr_count;
 	cvmx_sysinfo_get()->compact_flash_common_base_addr = 
 		octeon_bootinfo->compact_flash_common_base_addr;
 	cvmx_sysinfo_get()->compact_flash_attribute_base_addr = 
 		octeon_bootinfo->compact_flash_attribute_base_addr;
 	cvmx_sysinfo_get()->core_mask = octeon_bootinfo->core_mask;
 	cvmx_sysinfo_get()->led_display_base_addr =
 		octeon_bootinfo->led_display_base_addr;
 	memcpy(cvmx_sysinfo_get()->board_serial_number,
 	       octeon_bootinfo->board_serial_number,
 	       sizeof cvmx_sysinfo_get()->board_serial_number);
 	return (octeon_bootinfo);
 }
 
 static void
 octeon_boot_params_init(register_t ptr)
 {
 	octeon_boot_descriptor_t *app_desc_ptr;
 	cvmx_bootinfo_t *octeon_bootinfo;
 
 	if (ptr == 0 || ptr >= MAX_APP_DESC_ADDR) {
 		cvmx_safe_printf("app descriptor passed at invalid address %#jx\n",
 		    (uintmax_t)ptr);
 		platform_reset();
 	}
 
 	app_desc_ptr = (octeon_boot_descriptor_t *)(intptr_t)ptr;
 	if (app_desc_ptr->desc_version < 6) {
 		cvmx_safe_printf("Your boot code is too old to be supported.\n");
 		platform_reset();
 	}
 	octeon_bootinfo = octeon_process_app_desc_ver_6(app_desc_ptr);
 	if (octeon_bootinfo == NULL) {
 		cvmx_safe_printf("Could not parse boot descriptor.\n");
 		platform_reset();
 	}
 
 	if (cvmx_sysinfo_get()->led_display_base_addr != 0) {
 		/*
 		 * Revision 1.x of the EBT3000 only supports 4 characters, but
 		 * other devices support 8.
 		 */
 		if (cvmx_sysinfo_get()->board_type == CVMX_BOARD_TYPE_EBT3000 &&
 		    cvmx_sysinfo_get()->board_rev_major == 1)
 			ebt3000_str_write("FBSD");
 		else
 			ebt3000_str_write("FreeBSD!");
 	}
 
 	if (cvmx_sysinfo_get()->phy_mem_desc_addr == (uint64_t)0) {
 		cvmx_safe_printf("Your boot loader did not supply a memory descriptor.\n");
 		platform_reset();
 	}
 	cvmx_bootmem_init(cvmx_sysinfo_get()->phy_mem_desc_addr);
 
 	octeon_feature_init();
 
 	__cvmx_helper_cfg_init();
 }
 /* impEND: This stuff should move back into the Cavium SDK */
diff --git a/sys/mips/gxemul/gxemul_machdep.c b/sys/mips/gxemul/gxemul_machdep.c
index 8996919d3e6a..a068efc7b4ed 100644
--- a/sys/mips/gxemul/gxemul_machdep.c
+++ b/sys/mips/gxemul/gxemul_machdep.c
@@ -1,238 +1,237 @@
 /*-
  * Copyright (c) 2006 Wojciech A. Koszek <wkoszek@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/md_var.h>
 #include <machine/pmap.h>
 #include <machine/trap.h>
 
 #ifdef SMP
 #include <sys/smp.h>
 #include <machine/smp.h>
 #endif
 
 #include <mips/gxemul/mpreg.h>
 
 extern int	*edata;
 extern int	*end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i;
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 /*
  * Perform a board-level soft-reset.
  *
  * XXXRW: Does gxemul have a moral equivalent to board-level reset?
  */
 void
 platform_reset(void)
 {
 
 	panic("%s: not yet", __func__);
 }
 
 void
 platform_start(__register_t a0, __register_t a1,  __register_t a2, 
     __register_t a3)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq;
 	int argc = a0;
 	char **argv = (char **)a1;
 	char **envp = (char **)a2;
 	int i;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	mips_pcpu0_init();
 
 	/*
 	 * XXXRW: Support for the gxemul real-time clock required in order to
 	 * usefully determine our emulated timer frequency.  Go with something
 	 * classic as the default in the mean time.
 	 */
 	platform_counter_freq = MIPS_DEFAULT_HZ;
 	mips_timer_early_init(platform_counter_freq);
 
 	cninit();
 	printf("entry: platform_start()\n");
 
 	bootverbose = 1;
 	if (bootverbose) {
 		printf("cmd line: ");
 		for (i = 0; i < argc; i++)
 			printf("%s ", argv[i]);
 		printf("\n");
 
 		if (envp != NULL) {
 			printf("envp:\n");
 			for (i = 0; envp[i]; i += 2)
 				printf("\t%s = %s\n", envp[i], envp[i+1]);
 		} else {
 			printf("no envp.\n");
 		}
 	}
 
 	realmem = btoc(GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_MEMORY));
 	mips_init();
 
 	mips_timer_init_params(platform_counter_freq, 0);
 }
 
 #ifdef SMP
 void
 platform_ipi_send(int cpuid)
 {
 	GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_IPI_ONE, (1 << 16) | cpuid);
 }
 
 void
 platform_ipi_clear(void)
 {
 	GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_IPI_READ, 0);
 }
 
 int
 platform_ipi_intrnum(void)
 {
 	return (GXEMUL_MP_DEV_IPI_INTERRUPT - 2);
 }
 
 struct cpu_group *
 platform_smp_topo(void)
 {
 	return (smp_topo_none());
 }
 
 void
 platform_init_ap(int cpuid)
 {
 	int ipi_int_mask, clock_int_mask;
 
 	/*
 	 * Unmask the clock and ipi interrupts.
 	 */
 	clock_int_mask = hard_int_mask(5);
 	ipi_int_mask = hard_int_mask(platform_ipi_intrnum());
 	set_intr_mask(ipi_int_mask | clock_int_mask);
 }
 
 void
 platform_cpu_mask(cpuset_t *mask)
 {
 	unsigned i, n;
 
 	n = GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_NCPUS);
 	CPU_ZERO(mask);
 	for (i = 0; i < n; i++)
 		CPU_SET(i, mask);
 }
 
 int
 platform_processor_id(void)
 {
 	return (GXEMUL_MP_DEV_READ(GXEMUL_MP_DEV_WHOAMI));
 }
 
 int
 platform_start_ap(int cpuid)
 {
 	GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_STARTADDR, (intptr_t)mpentry);
 	GXEMUL_MP_DEV_WRITE(GXEMUL_MP_DEV_START, cpuid);
 	return (0);
 }
 #endif	/* SMP */
diff --git a/sys/mips/idt/idt_machdep.c b/sys/mips/idt/idt_machdep.c
index ac18748f5672..b48931987d29 100644
--- a/sys/mips/idt/idt_machdep.c
+++ b/sys/mips/idt/idt_machdep.c
@@ -1,179 +1,178 @@
 /*-
  * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $Id: $
  * 
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 extern int	*edata;
 extern int	*end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 void
 platform_reset(void)
 {
 	volatile unsigned int * p = (void *)0xb8008000;
 	/* 
 	 * TODO: we should take care of TLB stuff here. Otherwise
 	 * board does not boots properly next time
 	 */
 
 	/* Write 0x8000_0001 to the Reset register */
 	*p = 0x80000001;
 
 	__asm __volatile("li	$25, 0xbfc00000");
 	__asm __volatile("j	$25");
 }
 
 void
 platform_start(__register_t a0, __register_t a1,
     __register_t a2 __unused, __register_t a3 __unused)
 {
 	uint64_t platform_counter_freq;
 	vm_offset_t kernend;
 	int argc = a0;
 	char **argv = (char **)a1;
 	int i, mem;
 
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 	/*
 	 * Looking for mem=XXM argument
 	 */
 	mem = 0; /* Just something to start with */
 	for (i=0; i < argc; i++) {
 		if (strncmp(argv[i], "mem=", 4) == 0) {
 			mem = strtol(argv[i] + 4, NULL, 0);
 			break;
 		}
 	}
 
 	bootverbose = 1;
 	if (mem > 0)
 		realmem = btoc(mem << 20);
 	else
 		realmem = btoc(32 << 20);
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	/* 
 	 * ns8250 uart code uses DELAY so ticker should be inititalized 
 	 * before cninit. And tick_init_params refers to hz, so * init_param1 
 	 * should be called first.
 	 */
 	init_param1();
 	/* TODO: parse argc,argv */
 	platform_counter_freq = 330000000UL;
 	mips_timer_init_params(platform_counter_freq, 1);
 	cninit();
 	/* Panic here, after cninit */ 
 	if (mem == 0)
 		panic("No mem=XX parameter in arguments");
 
 	printf("cmd line: ");
 	for (i=0; i < argc; i++)
 		printf("%s ", argv[i]);
 	printf("\n");
 
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
diff --git a/sys/mips/malta/malta_machdep.c b/sys/mips/malta/malta_machdep.c
index 8efb98778b60..27cfa9f664cb 100644
--- a/sys/mips/malta/malta_machdep.c
+++ b/sys/mips/malta/malta_machdep.c
@@ -1,304 +1,303 @@
 /*-
  * Copyright (c) 2006 Wojciech A. Koszek <wkoszek@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/md_var.h>
 #include <machine/pmap.h>
 #include <machine/trap.h>
 
 #ifdef TICK_USE_YAMON_FREQ
 #include <mips/malta/yamon.h>
 #endif
 
 #ifdef TICK_USE_MALTA_RTC
 #include <mips/mips4k/malta/maltareg.h>
 #include <dev/mc146818/mc146818reg.h>
 #include <isa/rtc.h>
 #endif
 
 #include <mips/malta/maltareg.h>
 
 extern int	*edata;
 extern int	*end;
 
 void	lcd_init(void);
 void	lcd_puts(char *);
 void	malta_reset(void);
 
 /*
  * Offsets to MALTA LCD characters.
  */
 static int malta_lcd_offs[] = {
 	MALTA_ASCIIPOS0,
 	MALTA_ASCIIPOS1,
 	MALTA_ASCIIPOS2,
 	MALTA_ASCIIPOS3,
 	MALTA_ASCIIPOS4,
 	MALTA_ASCIIPOS5,
 	MALTA_ASCIIPOS6,
 	MALTA_ASCIIPOS7
 };
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 /*
  * Put character to Malta LCD at given position.
  */
 static void
 malta_lcd_putc(int pos, char c)
 {
 	void *addr;
 	char *ch;
 
 	if (pos < 0 || pos > 7)
 		return;
 	addr = (void *)(MALTA_ASCII_BASE + malta_lcd_offs[pos]);
 	ch = (char *)MIPS_PHYS_TO_KSEG0(addr);
 	*ch = c;
 }
 
 /*
  * Print given string on LCD.
  */
 static void
 malta_lcd_print(char *str)
 {
 	int i;
 	
 	if (str == NULL)
 		return;
 
 	for (i = 0; *str != '\0'; i++, str++)
 		malta_lcd_putc(i, *str);
 }
 
 void
 lcd_init(void)
 {
 	malta_lcd_print("FreeBSD_");
 }
 
 void
 lcd_puts(char *s)
 {
 	malta_lcd_print(s);
 }
 
 #ifdef TICK_USE_MALTA_RTC
 static __inline uint8_t
 rtcin(uint8_t addr)
 {
 
 	*((volatile uint8_t *)
 	    MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCADR))) = addr;
 	return (*((volatile uint8_t *)
 	    MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCDAT))));
 }
 
 static __inline void
 writertc(uint8_t addr, uint8_t val)
 {
 
 	*((volatile uint8_t *)
 	    MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCADR))) = addr;
 	*((volatile uint8_t *)
 	    MIPS_PHYS_TO_KSEG1(MALTA_PCI0_ADDR(MALTA_RTCDAT))) = val;
 }
 #endif
 
 static void
 mips_init(void)
 {
 	int i;
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	phys_avail[1] = ctob(realmem);
 
 	dump_avail[0] = phys_avail[0];
 	dump_avail[1] = phys_avail[1];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 /*
  * Perform a board-level soft-reset.
  * Note that this is not emulated by gxemul.
  */
 void
 platform_reset(void)
 {
 	char *c;
 
 	c = (char *)MIPS_PHYS_TO_KSEG0(MALTA_SOFTRES);
 	*c = MALTA_GORESET;
 }
 
 static uint64_t
 malta_cpu_freq(void)
 {
 	uint64_t platform_counter_freq = 0;
 
 #if defined(TICK_USE_YAMON_FREQ)
 	/*
 	 * If we are running on a board which uses YAMON firmware,
 	 * then query CPU pipeline clock from the syscon object.
 	 * If unsuccessful, use hard-coded default.
 	 */
 	platform_counter_freq = yamon_getcpufreq();
 
 #elif defined(TICK_USE_MALTA_RTC)
 	/*
 	 * If we are running on a board with the MC146818 RTC,
 	 * use it to determine CPU pipeline clock frequency.
 	 */
 	u_int64_t counterval[2];
 
 	/* Set RTC to binary mode. */
 	writertc(RTC_STATUSB, (rtcin(RTC_STATUSB) | RTCSB_BCD));
 
 	/* Busy-wait for falling edge of RTC update. */
 	while (((rtcin(RTC_STATUSA) & RTCSA_TUP) == 0))
 		;
 	while (((rtcin(RTC_STATUSA)& RTCSA_TUP) != 0))
 		;
 	counterval[0] = mips_rd_count();
 
 	/* Busy-wait for falling edge of RTC update. */
 	while (((rtcin(RTC_STATUSA) & RTCSA_TUP) == 0))
 		;
 	while (((rtcin(RTC_STATUSA)& RTCSA_TUP) != 0))
 		;
 	counterval[1] = mips_rd_count();
 
 	platform_counter_freq = counterval[1] - counterval[0];
 #endif
 
 	if (platform_counter_freq == 0)
 		platform_counter_freq = MIPS_DEFAULT_HZ;
 
 	return (platform_counter_freq);
 }
 
 void
 platform_start(__register_t a0, __register_t a1,  __register_t a2, 
     __register_t a3)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq;
 	int argc = a0;
 	char **argv = (char **)a1;
 	char **envp = (char **)a2;
 	unsigned int memsize = a3;
 	int i;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	mips_pcpu0_init();
 	platform_counter_freq = malta_cpu_freq();
 	mips_timer_early_init(platform_counter_freq);
 
 	cninit();
 	printf("entry: platform_start()\n");
 
 	bootverbose = 1;
 	if (bootverbose) {
 		printf("cmd line: ");
 		for (i = 0; i < argc; i++)
 			printf("%s ", argv[i]);
 		printf("\n");
 
 		printf("envp:\n");
 		for (i = 0; envp[i]; i += 2)
 			printf("\t%s = %s\n", envp[i], envp[i+1]);
 
 		printf("memsize = %08x\n", memsize);
 	}
 
 	realmem = btoc(memsize);
 	mips_init();
 
 	mips_timer_init_params(platform_counter_freq, 0);
 }
diff --git a/sys/mips/rt305x/rt305x_machdep.c b/sys/mips/rt305x/rt305x_machdep.c
index 33c131cb0666..07342bde9eda 100644
--- a/sys/mips/rt305x/rt305x_machdep.c
+++ b/sys/mips/rt305x/rt305x_machdep.c
@@ -1,192 +1,191 @@
 /*-
  * Copyright (C) 2010-2011 by Aleksandr Rybalko. All rights reserved.
  * Copyright (C) 2007 by Oleksandr Tymoshenko. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #include <mips/rt305x/rt305xreg.h>
 
 extern int	*edata;
 extern int	*end;
 static char 	boot1_env[0x1000];
 
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i;
 
 	printf("entry: mips_init()\n");
 
 	bootverbose = 1;
 	realmem = btoc(32 << 20);
 
 	for (i = 0; i < 10; i++) {
 		phys_avail[i] = 0;
 	}
 
 	/* phys_avail regions are in bytes */
 	dump_avail[0] = phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 	dump_avail[1] = phys_avail[1] = ctob(realmem);
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 void
 platform_reset(void)
 {
 
 	__asm __volatile("li	$25, 0xbf000000");
 	__asm __volatile("j	$25");
 }
 
 void
 platform_start(__register_t a0 __unused, __register_t a1 __unused, 
     __register_t a2 __unused, __register_t a3 __unused)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq = PLATFORM_COUNTER_FREQ;
 	int i;
 	int argc = a0;
 	char **argv = (char **)MIPS_PHYS_TO_KSEG0(a1);
 	char **envp = (char **)MIPS_PHYS_TO_KSEG0(a2);
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 	/* initialize console so that we have printf */
 	boothowto |= (RB_SERIAL | RB_MULTIPLE);	/* Use multiple consoles */
 	boothowto |= (RB_VERBOSE);
 	cninit();
 
 	init_static_kenv(boot1_env, sizeof(boot1_env));
 
 	printf("U-Boot args (from %d args):\n", argc - 1);
 
 	if (argc == 1)
 		printf("\tNone\n");
 
 	for (i = 1; i < argc; i++) {
 		char *n = "argv  ", *arg;
 
 		if (i > 99)
 			break;
 
 		if (argv[i])
 		{
 			arg = (char *)(intptr_t)MIPS_PHYS_TO_KSEG0(argv[i]);
 			printf("\targv[%d] = %s\n", i, arg);
 			sprintf(n, "argv%d", i);
 			setenv(n, arg);
 		}
 	}
 
 	printf("Environment:\n");
 
 	for (i = 0; envp[i] ; i++) {
 		char *n, *arg;
 
 		arg = (char *)(intptr_t)MIPS_PHYS_TO_KSEG0(envp[i]);
 		printf("\t%s\n", arg);
 		n = strsep(&arg, "=");
 		if (arg == NULL)
 			setenv(n, "1");
 		else
 			setenv(n, arg);
 	}
 
 
 	mips_init();
 	mips_timer_init_params(platform_counter_freq, 2);
 }
diff --git a/sys/mips/sentry5/s5_machdep.c b/sys/mips/sentry5/s5_machdep.c
index cde081bc92d9..a10b48d2bf9b 100644
--- a/sys/mips/sentry5/s5_machdep.c
+++ b/sys/mips/sentry5/s5_machdep.c
@@ -1,224 +1,223 @@
 /*-
  * Copyright (c) 2007 Bruce M. Simpson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #include <mips/sentry5/s5reg.h>
 
 #ifdef CFE
 #include <dev/cfe/cfe_api.h>
 #endif
 
 extern int *edata;
 extern int *end;
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 mips_init(void)
 {
 	int i, j;
 
 	printf("entry: mips_init()\n");
 
 #ifdef CFE
 	/*
 	 * Query DRAM memory map from CFE.
 	 */
 	physmem = 0;
 	for (i = 0; i < 10; i += 2) {
 		int result;
 		uint64_t addr, len, type;
 
 		result = cfe_enummem(i, 0, &addr, &len, &type);
 		if (result < 0) {
 			phys_avail[i] = phys_avail[i + 1] = 0;
 			break;
 		}
 		if (type != CFE_MI_AVAILABLE)
 			continue;
 
 		phys_avail[i] = addr;
 		if (i == 0 && addr == 0) {
 			/*
 			 * If this is the first physical memory segment probed
 			 * from CFE, omit the region at the start of physical
 			 * memory where the kernel has been loaded.
 			 */
 			phys_avail[i] += MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 		}
 		phys_avail[i + 1] = addr + len;
 		physmem += len;
 	}
 
 	realmem = btoc(physmem);
 #endif
 
 	for (j = 0; j < i; j++)
 		dump_avail[j] = phys_avail[j];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 void
 platform_reset(void)
 {
 
 #if defined(CFE)
 	cfe_exit(0, 0);
 #else
 	*((volatile uint8_t *)MIPS_PHYS_TO_KSEG1(SENTRY5_EXTIFADR)) = 0x80;
 #endif
 }
 
 void
 platform_start(__register_t a0, __register_t a1, __register_t a2,
 	       __register_t a3)
 {
 	vm_offset_t kernend;
 	uint64_t platform_counter_freq;
 
 	/* clear the BSS and SBSS segments */
 	kernend = (vm_offset_t)&end;
 	memset(&edata, 0, kernend - (vm_offset_t)(&edata));
 
 	mips_postboot_fixup();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 #ifdef CFE
 	/*
 	 * Initialize CFE firmware trampolines before
 	 * we initialize the low-level console.
 	 *
 	 * CFE passes the following values in registers:
 	 * a0: firmware handle
 	 * a2: firmware entry point
 	 * a3: entry point seal
 	 */
 	if (a3 == CFE_EPTSEAL)
 		cfe_init(a0, a2);
 #endif
 	cninit();
 
 	mips_init();
 
 # if 0
 	/*
 	 * Probe the Broadcom Sentry5's on-chip PLL clock registers
 	 * and discover the CPU pipeline clock and bus clock
 	 * multipliers from this.
 	 * XXX: Wrong place. You have to ask the ChipCommon
 	 * or External Interface cores on the SiBa.
 	 */
 	uint32_t busmult, cpumult, refclock, clkcfg1;
 #define S5_CLKCFG1_REFCLOCK_MASK	0x0000001F
 #define S5_CLKCFG1_BUSMULT_MASK		0x000003E0
 #define S5_CLKCFG1_BUSMULT_SHIFT	5
 #define S5_CLKCFG1_CPUMULT_MASK		0xFFFFFC00
 #define S5_CLKCFG1_CPUMULT_SHIFT	10
 
 	counter_freq = 100000000;	/* XXX */
 
 	clkcfg1 = s5_rd_clkcfg1();
 	printf("clkcfg1 = 0x%08x\n", clkcfg1);
 
 	refclock = clkcfg1 & 0x1F;
 	busmult = ((clkcfg1 & 0x000003E0) >> 5) + 1;
 	cpumult = ((clkcfg1 & 0xFFFFFC00) >> 10) + 1;
 
 	printf("refclock = %u\n", refclock);
 	printf("busmult = %u\n", busmult);
 	printf("cpumult = %u\n", cpumult);
 
 	counter_freq = cpumult * refclock;
 # else
 	platform_counter_freq = 200 * 1000 * 1000; /* Sentry5 is 200MHz */
 # endif
 
 	mips_timer_init_params(platform_counter_freq, 0);
 }
diff --git a/sys/mips/sibyte/sb_machdep.c b/sys/mips/sibyte/sb_machdep.c
index 452eea6fa631..5e7c2fded872 100644
--- a/sys/mips/sibyte/sb_machdep.c
+++ b/sys/mips/sibyte/sb_machdep.c
@@ -1,433 +1,432 @@
 /*-
  * Copyright (c) 2007 Bruce M. Simpson.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_kdb.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/cons.h>
 #include <sys/exec.h>
 #include <sys/ucontext.h>
 #include <sys/proc.h>
 #include <sys/kdb.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 #include <sys/timetc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cache.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuregs.h>
 #include <machine/hwfunc.h>
 #include <machine/intr_machdep.h>
 #include <machine/locore.h>
 #include <machine/md_var.h>
 #include <machine/pte.h>
 #include <machine/sigframe.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #ifdef SMP
 #include <sys/smp.h>
 #include <machine/smp.h>
 #endif
 
 #ifdef CFE
 #include <dev/cfe/cfe_api.h>
 #endif
 
 #include "sb_scd.h"
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #endif
 
 #ifdef CFE_ENV
 extern void cfe_env_init(void);
 #endif
 
 extern int *edata;
 extern int *end;
 
 extern char MipsTLBMiss[], MipsTLBMissEnd[];
 
 void
 platform_cpu_init()
 {
 	/* Nothing special */
 }
 
 static void
 sb_intr_init(int cpuid)
 {
 	int intrnum, intsrc;
 
 	/*
 	 * Disable all sources to the interrupt mapper and setup the mapping
 	 * between an interrupt source and the mips hard interrupt number.
 	 */
 	for (intsrc = 0; intsrc < NUM_INTSRC; ++intsrc) {
 		intrnum = sb_route_intsrc(intsrc);
 		sb_disable_intsrc(cpuid, intsrc);
 		sb_write_intmap(cpuid, intsrc, intrnum);
 #ifdef SMP
 		/*
 		 * Set up the mailbox interrupt mapping.
 		 *
 		 * The mailbox interrupt is "special" in that it is not shared
 		 * with any other interrupt source.
 		 */
 		if (intsrc == INTSRC_MAILBOX3) {
 			intrnum = platform_ipi_intrnum();
 			sb_write_intmap(cpuid, INTSRC_MAILBOX3, intrnum);
 			sb_enable_intsrc(cpuid, INTSRC_MAILBOX3);
 		}
 #endif
 	}
 }
 
 static void
 mips_init(void)
 {
 	int i, j, cfe_mem_idx, tmp;
 	uint64_t maxmem;
 
 #ifdef CFE_ENV
 	cfe_env_init();
 #endif
 
 	TUNABLE_INT_FETCH("boothowto", &boothowto);
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 #ifdef MAXMEM
 	tmp = MAXMEM;
 #else
 	tmp = 0;
 #endif
 	TUNABLE_INT_FETCH("hw.physmem", &tmp);
 	maxmem = (uint64_t)tmp * 1024;
 
 	/*
 	 * XXX
 	 * If we used vm_paddr_t consistently in pmap, etc., we could
 	 * use 64-bit page numbers on !n64 systems, too, like i386
 	 * does with PAE.
 	 */
 #if !defined(__mips_n64)
 	if (maxmem == 0 || maxmem > 0xffffffff)
 		maxmem = 0xffffffff;
 #endif
 
 #ifdef CFE
 	/*
 	 * Query DRAM memory map from CFE.
 	 */
 	physmem = 0;
 	cfe_mem_idx = 0;
 	for (i = 0; i < 10; i += 2) {
 		int result;
 		uint64_t addr, len, type;
 
 		result = cfe_enummem(cfe_mem_idx++, 0, &addr, &len, &type);
 		if (result < 0) {
 			phys_avail[i] = phys_avail[i + 1] = 0;
 			break;
 		}
 
 		KASSERT(type == CFE_MI_AVAILABLE,
 			("CFE DRAM region is not available?"));
 
 		if (bootverbose)
 			printf("cfe_enummem: 0x%016jx/%ju.\n", addr, len);
 
 		if (maxmem != 0) {
 			if (addr >= maxmem) {
 				printf("Ignoring %ju bytes of memory at 0x%jx "
 				       "that is above maxmem %dMB\n",
 				       len, addr,
 				       (int)(maxmem / (1024 * 1024)));
 				continue;
 			}
 
 			if (addr + len > maxmem) {
 				printf("Ignoring %ju bytes of memory "
 				       "that is above maxmem %dMB\n",
 				       (addr + len) - maxmem,
 				       (int)(maxmem / (1024 * 1024)));
 				len = maxmem - addr;
 			}
 		}
 
 		phys_avail[i] = addr;
 		if (i == 0 && addr == 0) {
 			/*
 			 * If this is the first physical memory segment probed
 			 * from CFE, omit the region at the start of physical
 			 * memory where the kernel has been loaded.
 			 */
 			phys_avail[i] += MIPS_KSEG0_TO_PHYS(kernel_kseg0_end);
 		}
 		phys_avail[i + 1] = addr + len;
 		physmem += len;
 	}
 
 	realmem = btoc(physmem);
 #endif
 
 	for (j = 0; j < i; j++)
 		dump_avail[j] = phys_avail[j];
 
 	physmem = realmem;
 
 	init_param1();
 	init_param2(physmem);
 	mips_cpu_init();
 
 	/*
 	 * Sibyte has a L1 data cache coherent with DMA. This includes
 	 * on-chip network interfaces as well as PCI/HyperTransport bus
 	 * masters.
 	 */
 	cpuinfo.cache_coherent_dma = TRUE;
 
 	/*
 	 * XXX
 	 * The kernel is running in 32-bit mode but the CFE is running in
 	 * 64-bit mode. So the SR_KX bit in the status register is turned
 	 * on by the CFE every time we call into it - for e.g. CFE_CONSOLE.
 	 *
 	 * This means that if get a TLB miss for any address above 0xc0000000
 	 * and the SR_KX bit is set then we will end up in the XTLB exception
 	 * vector.
 	 *
 	 * For now work around this by copying the TLB exception handling
 	 * code to the XTLB exception vector.
 	 */
 	{
 		bcopy(MipsTLBMiss, (void *)MIPS_XTLB_MISS_EXC_VEC,
 		      MipsTLBMissEnd - MipsTLBMiss);
 
 		mips_icache_sync_all();
 		mips_dcache_wbinv_all();
 	}
 
 	pmap_bootstrap();
 	mips_proc0_init();
 	mutex_init();
 
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 void
 platform_reset(void)
 {
 	
 	/*
 	 * XXX SMP
 	 * XXX flush data caches
 	 */
 	sb_system_reset();
 }
 
 static void
 kseg0_map_coherent(void)
 {
 	uint32_t config;
 	const int CFG_K0_COHERENT = 5;
 
 	config = mips_rd_config();
 	config &= ~MIPS_CONFIG_K0_MASK;
 	config |= CFG_K0_COHERENT;
 	mips_wr_config(config);
 }
 
 #ifdef SMP
 void
 platform_ipi_send(int cpuid)
 {
 	KASSERT(cpuid == 0 || cpuid == 1,
 		("platform_ipi_send: invalid cpuid %d", cpuid));
 
 	sb_set_mailbox(cpuid, 1ULL);
 }
 
 void
 platform_ipi_clear(void)
 {
 	int cpuid;
 
 	cpuid = PCPU_GET(cpuid);
 	sb_clear_mailbox(cpuid, 1ULL);
 }
 
 int
 platform_ipi_intrnum(void)
 {
 
 	return (4);
 }
 
 struct cpu_group *
 platform_smp_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 void
 platform_init_ap(int cpuid)
 {
 	int ipi_int_mask, clock_int_mask;
 
 	KASSERT(cpuid == 1, ("AP has an invalid cpu id %d", cpuid));
 
 	/*
 	 * Make sure that kseg0 is mapped cacheable-coherent
 	 */
 	kseg0_map_coherent();
 
 	sb_intr_init(cpuid);
 
 	/*
 	 * Unmask the clock and ipi interrupts.
 	 */
 	clock_int_mask = hard_int_mask(5);
 	ipi_int_mask = hard_int_mask(platform_ipi_intrnum());
 	set_intr_mask(ipi_int_mask | clock_int_mask);
 }
 
 int
 platform_start_ap(int cpuid)
 {
 #ifdef CFE
 	int error;
 
 	if ((error = cfe_cpu_start(cpuid, mpentry, 0, 0, 0))) {
 		printf("cfe_cpu_start error: %d\n", error);
 		return (-1);
 	} else {
 		return (0);
 	}
 #else
 	return (-1);
 #endif	/* CFE */
 }
 #endif	/* SMP */
 
 static u_int
 sb_get_timecount(struct timecounter *tc)
 {
 
 	return ((u_int)sb_zbbus_cycle_count());
 }
 
 static void
 sb_timecounter_init(void)
 {
 	static struct timecounter sb_timecounter = {
 		sb_get_timecount,
 		NULL,
 		~0u,
 		0,
 		"sibyte_zbbus_counter",
 		2000
 	};
 
 	/*
 	 * The ZBbus cycle counter runs at half the cpu frequency.
 	 */
 	sb_timecounter.tc_frequency = sb_cpu_speed() / 2;
 	platform_timecounter = &sb_timecounter;
 }
 
 void
 platform_start(__register_t a0, __register_t a1, __register_t a2,
 	       __register_t a3)
 {
 	/*
 	 * Make sure that kseg0 is mapped cacheable-coherent
 	 */
 	kseg0_map_coherent();
 
 	/* clear the BSS and SBSS segments */
 	memset(&edata, 0, (vm_offset_t)&end - (vm_offset_t)&edata);
 	mips_postboot_fixup();
 
 	sb_intr_init(0);
 	sb_timecounter_init();
 
 	/* Initialize pcpu stuff */
 	mips_pcpu0_init();
 
 #ifdef CFE
 	/*
 	 * Initialize CFE firmware trampolines before
 	 * we initialize the low-level console.
 	 *
 	 * CFE passes the following values in registers:
 	 * a0: firmware handle
 	 * a2: firmware entry point
 	 * a3: entry point seal
 	 */
 	if (a3 == CFE_EPTSEAL)
 		cfe_init(a0, a2);
 #endif
 	cninit();
 
 	mips_init();
 
 	mips_timer_init_params(sb_cpu_speed(), 0);
 }
diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index b17376039ebb..24350c44f956 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@@ -1,2544 +1,2543 @@
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
  */
 /*-
  * Copyright (C) 2001 Benno Rice.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Manages physical address maps.
  *
  * Since the information managed by this module is also stored by the
  * logical address mapping module, this module may throw away valid virtual
  * to physical mappings at almost any time.  However, invalidations of
  * mappings must be done as requested.
  *
  * In order to cope with hardware architectures which make virtual to
  * physical map invalidates expensive, this module may delay invalidate
  * reduced protection operations until such time as they are actually
  * necessary.  This module is given full information as to which processors
  * are currently using which maps, and to when physical maps must be made
  * correct.
  */
 
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/cpuset.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <dev/ofw/openfirm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/platform.h>
 #include <machine/bat.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/psl.h>
 #include <machine/pte.h>
 #include <machine/smp.h>
 #include <machine/sr.h>
 #include <machine/mmuvar.h>
 #include <machine/trap_aim.h>
 
 #include "mmu_if.h"
 
 #define	MOEA_DEBUG
 
 #define TODO	panic("%s: not implemented", __func__);
 
 #define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
 #define	VSID_TO_SR(vsid)	((vsid) & 0xf)
 #define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
 
 struct ofw_map {
 	vm_offset_t	om_va;
 	vm_size_t	om_len;
 	vm_offset_t	om_pa;
 	u_int		om_mode;
 };
 
 /*
  * Map of physical memory regions.
  */
 static struct	mem_region *regions;
 static struct	mem_region *pregions;
 static u_int    phys_avail_count;
 static int	regions_sz, pregions_sz;
 static struct	ofw_map *translations;
 
 /*
  * Lock for the pteg and pvo tables.
  */
 struct mtx	moea_table_mutex;
 struct mtx	moea_vsid_mutex;
 
 /* tlbie instruction synchronization */
 static struct mtx tlbie_mtx;
 
 /*
  * PTEG data.
  */
 static struct	pteg *moea_pteg_table;
 u_int		moea_pteg_count;
 u_int		moea_pteg_mask;
 
 /*
  * PVO data.
  */
 struct	pvo_head *moea_pvo_table;		/* pvo entries by pteg index */
 struct	pvo_head moea_pvo_kunmanaged =
     LIST_HEAD_INITIALIZER(moea_pvo_kunmanaged);	/* list of unmanaged pages */
 
 static struct rwlock_padalign pvh_global_lock;
 
 uma_zone_t	moea_upvo_zone;	/* zone for pvo entries for unmanaged pages */
 uma_zone_t	moea_mpvo_zone;	/* zone for pvo entries for managed pages */
 
 #define	BPVO_POOL_SIZE	32768
 static struct	pvo_entry *moea_bpvo_pool;
 static int	moea_bpvo_pool_index = 0;
 
 #define	VSID_NBPW	(sizeof(u_int32_t) * 8)
 static u_int	moea_vsid_bitmap[NPMAPS / VSID_NBPW];
 
 static boolean_t moea_initialized = FALSE;
 
 /*
  * Statistics.
  */
 u_int	moea_pte_valid = 0;
 u_int	moea_pte_overflow = 0;
 u_int	moea_pte_replacements = 0;
 u_int	moea_pvo_entries = 0;
 u_int	moea_pvo_enter_calls = 0;
 u_int	moea_pvo_remove_calls = 0;
 u_int	moea_pte_spills = 0;
 SYSCTL_INT(_machdep, OID_AUTO, moea_pte_valid, CTLFLAG_RD, &moea_pte_valid,
     0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pte_overflow, CTLFLAG_RD,
     &moea_pte_overflow, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pte_replacements, CTLFLAG_RD,
     &moea_pte_replacements, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_entries, CTLFLAG_RD, &moea_pvo_entries,
     0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_enter_calls, CTLFLAG_RD,
     &moea_pvo_enter_calls, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pvo_remove_calls, CTLFLAG_RD,
     &moea_pvo_remove_calls, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea_pte_spills, CTLFLAG_RD,
     &moea_pte_spills, 0, "");
 
 /*
  * Allocate physical memory for use in moea_bootstrap.
  */
 static vm_offset_t	moea_bootstrap_alloc(vm_size_t, u_int);
 
 /*
  * PTE calls.
  */
 static int		moea_pte_insert(u_int, struct pte *);
 
 /*
  * PVO calls.
  */
 static int	moea_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *,
 		    vm_offset_t, vm_offset_t, u_int, int);
 static void	moea_pvo_remove(struct pvo_entry *, int);
 static struct	pvo_entry *moea_pvo_find_va(pmap_t, vm_offset_t, int *);
 static struct	pte *moea_pvo_to_pte(const struct pvo_entry *, int);
 
 /*
  * Utility routines.
  */
 static void		moea_enter_locked(pmap_t, vm_offset_t, vm_page_t,
 			    vm_prot_t, boolean_t);
 static void		moea_syncicache(vm_offset_t, vm_size_t);
 static boolean_t	moea_query_bit(vm_page_t, int);
 static u_int		moea_clear_bit(vm_page_t, int);
 static void		moea_kremove(mmu_t, vm_offset_t);
 int		moea_pte_spill(vm_offset_t);
 
 /*
  * Kernel MMU interface
  */
 void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
 void moea_clear_modify(mmu_t, vm_page_t);
 void moea_clear_reference(mmu_t, vm_page_t);
 void moea_copy_page(mmu_t, vm_page_t, vm_page_t);
 void moea_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
 void moea_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
     vm_prot_t);
 void moea_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
 vm_paddr_t moea_extract(mmu_t, pmap_t, vm_offset_t);
 vm_page_t moea_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
 void moea_init(mmu_t);
 boolean_t moea_is_modified(mmu_t, vm_page_t);
 boolean_t moea_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
 boolean_t moea_is_referenced(mmu_t, vm_page_t);
 int moea_ts_referenced(mmu_t, vm_page_t);
 vm_offset_t moea_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
 boolean_t moea_page_exists_quick(mmu_t, pmap_t, vm_page_t);
 int moea_page_wired_mappings(mmu_t, vm_page_t);
 void moea_pinit(mmu_t, pmap_t);
 void moea_pinit0(mmu_t, pmap_t);
 void moea_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
 void moea_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
 void moea_qremove(mmu_t, vm_offset_t, int);
 void moea_release(mmu_t, pmap_t);
 void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 void moea_remove_all(mmu_t, vm_page_t);
 void moea_remove_write(mmu_t, vm_page_t);
 void moea_zero_page(mmu_t, vm_page_t);
 void moea_zero_page_area(mmu_t, vm_page_t, int, int);
 void moea_zero_page_idle(mmu_t, vm_page_t);
 void moea_activate(mmu_t, struct thread *);
 void moea_deactivate(mmu_t, struct thread *);
 void moea_cpu_bootstrap(mmu_t, int);
 void moea_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
 void *moea_mapdev(mmu_t, vm_paddr_t, vm_size_t);
 void *moea_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t);
 void moea_unmapdev(mmu_t, vm_offset_t, vm_size_t);
 vm_paddr_t moea_kextract(mmu_t, vm_offset_t);
 void moea_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t);
 void moea_kenter(mmu_t, vm_offset_t, vm_paddr_t);
 void moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma);
 boolean_t moea_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
 static void moea_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
 
 static mmu_method_t moea_methods[] = {
 	MMUMETHOD(mmu_change_wiring,	moea_change_wiring),
 	MMUMETHOD(mmu_clear_modify,	moea_clear_modify),
 	MMUMETHOD(mmu_clear_reference,	moea_clear_reference),
 	MMUMETHOD(mmu_copy_page,	moea_copy_page),
 	MMUMETHOD(mmu_enter,		moea_enter),
 	MMUMETHOD(mmu_enter_object,	moea_enter_object),
 	MMUMETHOD(mmu_enter_quick,	moea_enter_quick),
 	MMUMETHOD(mmu_extract,		moea_extract),
 	MMUMETHOD(mmu_extract_and_hold,	moea_extract_and_hold),
 	MMUMETHOD(mmu_init,		moea_init),
 	MMUMETHOD(mmu_is_modified,	moea_is_modified),
 	MMUMETHOD(mmu_is_prefaultable,	moea_is_prefaultable),
 	MMUMETHOD(mmu_is_referenced,	moea_is_referenced),
 	MMUMETHOD(mmu_ts_referenced,	moea_ts_referenced),
 	MMUMETHOD(mmu_map,     		moea_map),
 	MMUMETHOD(mmu_page_exists_quick,moea_page_exists_quick),
 	MMUMETHOD(mmu_page_wired_mappings,moea_page_wired_mappings),
 	MMUMETHOD(mmu_pinit,		moea_pinit),
 	MMUMETHOD(mmu_pinit0,		moea_pinit0),
 	MMUMETHOD(mmu_protect,		moea_protect),
 	MMUMETHOD(mmu_qenter,		moea_qenter),
 	MMUMETHOD(mmu_qremove,		moea_qremove),
 	MMUMETHOD(mmu_release,		moea_release),
 	MMUMETHOD(mmu_remove,		moea_remove),
 	MMUMETHOD(mmu_remove_all,      	moea_remove_all),
 	MMUMETHOD(mmu_remove_write,	moea_remove_write),
 	MMUMETHOD(mmu_sync_icache,	moea_sync_icache),
 	MMUMETHOD(mmu_zero_page,       	moea_zero_page),
 	MMUMETHOD(mmu_zero_page_area,	moea_zero_page_area),
 	MMUMETHOD(mmu_zero_page_idle,	moea_zero_page_idle),
 	MMUMETHOD(mmu_activate,		moea_activate),
 	MMUMETHOD(mmu_deactivate,      	moea_deactivate),
 	MMUMETHOD(mmu_page_set_memattr,	moea_page_set_memattr),
 
 	/* Internal interfaces */
 	MMUMETHOD(mmu_bootstrap,       	moea_bootstrap),
 	MMUMETHOD(mmu_cpu_bootstrap,   	moea_cpu_bootstrap),
 	MMUMETHOD(mmu_mapdev_attr,	moea_mapdev_attr),
 	MMUMETHOD(mmu_mapdev,		moea_mapdev),
 	MMUMETHOD(mmu_unmapdev,		moea_unmapdev),
 	MMUMETHOD(mmu_kextract,		moea_kextract),
 	MMUMETHOD(mmu_kenter,		moea_kenter),
 	MMUMETHOD(mmu_kenter_attr,	moea_kenter_attr),
 	MMUMETHOD(mmu_dev_direct_mapped,moea_dev_direct_mapped),
 
 	{ 0, 0 }
 };
 
 MMU_DEF(oea_mmu, MMU_TYPE_OEA, moea_methods, 0);
 
 static __inline uint32_t
 moea_calc_wimg(vm_offset_t pa, vm_memattr_t ma)
 {
 	uint32_t pte_lo;
 	int i;
 
 	if (ma != VM_MEMATTR_DEFAULT) {
 		switch (ma) {
 		case VM_MEMATTR_UNCACHEABLE:
 			return (PTE_I | PTE_G);
 		case VM_MEMATTR_WRITE_COMBINING:
 		case VM_MEMATTR_WRITE_BACK:
 		case VM_MEMATTR_PREFETCHABLE:
 			return (PTE_I);
 		case VM_MEMATTR_WRITE_THROUGH:
 			return (PTE_W | PTE_M);
 		}
 	}
 
 	/*
 	 * Assume the page is cache inhibited and access is guarded unless
 	 * it's in our available memory array.
 	 */
 	pte_lo = PTE_I | PTE_G;
 	for (i = 0; i < pregions_sz; i++) {
 		if ((pa >= pregions[i].mr_start) &&
 		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
 			pte_lo = PTE_M;
 			break;
 		}
 	}
 
 	return pte_lo;
 }
 
 static void
 tlbie(vm_offset_t va)
 {
 
 	mtx_lock_spin(&tlbie_mtx);
 	__asm __volatile("ptesync");
 	__asm __volatile("tlbie %0" :: "r"(va));
 	__asm __volatile("eieio; tlbsync; ptesync");
 	mtx_unlock_spin(&tlbie_mtx);
 }
 
 static void
 tlbia(void)
 {
 	vm_offset_t va;
  
 	for (va = 0; va < 0x00040000; va += 0x00001000) {
 		__asm __volatile("tlbie %0" :: "r"(va));
 		powerpc_sync();
 	}
 	__asm __volatile("tlbsync");
 	powerpc_sync();
 }
 
 static __inline int
 va_to_sr(u_int *sr, vm_offset_t va)
 {
 	return (sr[(uintptr_t)va >> ADDR_SR_SHFT]);
 }
 
 static __inline u_int
 va_to_pteg(u_int sr, vm_offset_t addr)
 {
 	u_int hash;
 
 	hash = (sr & SR_VSID_MASK) ^ (((u_int)addr & ADDR_PIDX) >>
 	    ADDR_PIDX_SHFT);
 	return (hash & moea_pteg_mask);
 }
 
 static __inline struct pvo_head *
 vm_page_to_pvoh(vm_page_t m)
 {
 
 	return (&m->md.mdpg_pvoh);
 }
 
 static __inline void
 moea_attr_clear(vm_page_t m, int ptebit)
 {
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	m->md.mdpg_attrs &= ~ptebit;
 }
 
 static __inline int
 moea_attr_fetch(vm_page_t m)
 {
 
 	return (m->md.mdpg_attrs);
 }
 
 static __inline void
 moea_attr_save(vm_page_t m, int ptebit)
 {
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	m->md.mdpg_attrs |= ptebit;
 }
 
 static __inline int
 moea_pte_compare(const struct pte *pt, const struct pte *pvo_pt)
 {
 	if (pt->pte_hi == pvo_pt->pte_hi)
 		return (1);
 
 	return (0);
 }
 
 static __inline int
 moea_pte_match(struct pte *pt, u_int sr, vm_offset_t va, int which)
 {
 	return (pt->pte_hi & ~PTE_VALID) ==
 	    (((sr & SR_VSID_MASK) << PTE_VSID_SHFT) |
 	    ((va >> ADDR_API_SHFT) & PTE_API) | which);
 }
 
 static __inline void
 moea_pte_create(struct pte *pt, u_int sr, vm_offset_t va, u_int pte_lo)
 {
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 
 	/*
 	 * Construct a PTE.  Default to IMB initially.  Valid bit only gets
 	 * set when the real pte is set in memory.
 	 *
 	 * Note: Don't set the valid bit for correct operation of tlb update.
 	 */
 	pt->pte_hi = ((sr & SR_VSID_MASK) << PTE_VSID_SHFT) |
 	    (((va & ADDR_PIDX) >> ADDR_API_SHFT) & PTE_API);
 	pt->pte_lo = pte_lo;
 }
 
 static __inline void
 moea_pte_synch(struct pte *pt, struct pte *pvo_pt)
 {
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 	pvo_pt->pte_lo |= pt->pte_lo & (PTE_REF | PTE_CHG);
 }
 
 static __inline void
 moea_pte_clear(struct pte *pt, vm_offset_t va, int ptebit)
 {
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 
 	/*
 	 * As shown in Section 7.6.3.2.3
 	 */
 	pt->pte_lo &= ~ptebit;
 	tlbie(va);
 }
 
 static __inline void
 moea_pte_set(struct pte *pt, struct pte *pvo_pt)
 {
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 	pvo_pt->pte_hi |= PTE_VALID;
 
 	/*
 	 * Update the PTE as defined in section 7.6.3.1.
 	 * Note that the REF/CHG bits are from pvo_pt and thus should havce
 	 * been saved so this routine can restore them (if desired).
 	 */
 	pt->pte_lo = pvo_pt->pte_lo;
 	powerpc_sync();
 	pt->pte_hi = pvo_pt->pte_hi;
 	powerpc_sync();
 	moea_pte_valid++;
 }
 
 static __inline void
 moea_pte_unset(struct pte *pt, struct pte *pvo_pt, vm_offset_t va)
 {
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 	pvo_pt->pte_hi &= ~PTE_VALID;
 
 	/*
 	 * Force the reg & chg bits back into the PTEs.
 	 */
 	powerpc_sync();
 
 	/*
 	 * Invalidate the pte.
 	 */
 	pt->pte_hi &= ~PTE_VALID;
 
 	tlbie(va);
 
 	/*
 	 * Save the reg & chg bits.
 	 */
 	moea_pte_synch(pt, pvo_pt);
 	moea_pte_valid--;
 }
 
 static __inline void
 moea_pte_change(struct pte *pt, struct pte *pvo_pt, vm_offset_t va)
 {
 
 	/*
 	 * Invalidate the PTE
 	 */
 	moea_pte_unset(pt, pvo_pt, va);
 	moea_pte_set(pt, pvo_pt);
 }
 
 /*
  * Quick sort callout for comparing memory regions.
  */
 static int	om_cmp(const void *a, const void *b);
 
 static int
 om_cmp(const void *a, const void *b)
 {
 	const struct	ofw_map *mapa;
 	const struct	ofw_map *mapb;
 
 	mapa = a;
 	mapb = b;
 	if (mapa->om_pa < mapb->om_pa)
 		return (-1);
 	else if (mapa->om_pa > mapb->om_pa)
 		return (1);
 	else
 		return (0);
 }
 
 void
 moea_cpu_bootstrap(mmu_t mmup, int ap)
 {
 	u_int sdr;
 	int i;
 
 	if (ap) {
 		powerpc_sync();
 		__asm __volatile("mtdbatu 0,%0" :: "r"(battable[0].batu));
 		__asm __volatile("mtdbatl 0,%0" :: "r"(battable[0].batl));
 		isync();
 		__asm __volatile("mtibatu 0,%0" :: "r"(battable[0].batu));
 		__asm __volatile("mtibatl 0,%0" :: "r"(battable[0].batl));
 		isync();
 	}
 
 #ifdef WII
 	/*
 	 * Special case for the Wii: don't install the PCI BAT.
 	 */
 	if (strcmp(installed_platform(), "wii") != 0) {
 #endif
 		__asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu));
 		__asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl));
 #ifdef WII
 	}
 #endif
 	isync();
 
 	__asm __volatile("mtibatu 1,%0" :: "r"(0));
 	__asm __volatile("mtdbatu 2,%0" :: "r"(0));
 	__asm __volatile("mtibatu 2,%0" :: "r"(0));
 	__asm __volatile("mtdbatu 3,%0" :: "r"(0));
 	__asm __volatile("mtibatu 3,%0" :: "r"(0));
 	isync();
 
 	for (i = 0; i < 16; i++)
 		mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]);
 	powerpc_sync();
 
 	sdr = (u_int)moea_pteg_table | (moea_pteg_mask >> 10);
 	__asm __volatile("mtsdr1 %0" :: "r"(sdr));
 	isync();
 
 	tlbia();
 }
 
 void
 moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	ihandle_t	mmui;
 	phandle_t	chosen, mmu;
 	int		sz;
 	int		i, j;
 	vm_size_t	size, physsz, hwphyssz;
 	vm_offset_t	pa, va, off;
 	void		*dpcpu;
 	register_t	msr;
 
         /*
          * Set up BAT0 to map the lowest 256 MB area
          */
         battable[0x0].batl = BATL(0x00000000, BAT_M, BAT_PP_RW);
         battable[0x0].batu = BATU(0x00000000, BAT_BL_256M, BAT_Vs);
 
 	/*
 	 * Map PCI memory space.
 	 */
 	battable[0x8].batl = BATL(0x80000000, BAT_I|BAT_G, BAT_PP_RW);
 	battable[0x8].batu = BATU(0x80000000, BAT_BL_256M, BAT_Vs);
 
 	battable[0x9].batl = BATL(0x90000000, BAT_I|BAT_G, BAT_PP_RW);
 	battable[0x9].batu = BATU(0x90000000, BAT_BL_256M, BAT_Vs);
 
 	battable[0xa].batl = BATL(0xa0000000, BAT_I|BAT_G, BAT_PP_RW);
 	battable[0xa].batu = BATU(0xa0000000, BAT_BL_256M, BAT_Vs);
 
 	battable[0xb].batl = BATL(0xb0000000, BAT_I|BAT_G, BAT_PP_RW);
 	battable[0xb].batu = BATU(0xb0000000, BAT_BL_256M, BAT_Vs);
 
 	/*
 	 * Map obio devices.
 	 */
 	battable[0xf].batl = BATL(0xf0000000, BAT_I|BAT_G, BAT_PP_RW);
 	battable[0xf].batu = BATU(0xf0000000, BAT_BL_256M, BAT_Vs);
 
 	/*
 	 * Use an IBAT and a DBAT to map the bottom segment of memory
 	 * where we are. Turn off instruction relocation temporarily
 	 * to prevent faults while reprogramming the IBAT.
 	 */
 	msr = mfmsr();
 	mtmsr(msr & ~PSL_IR);
 	__asm (".balign 32; \n"
 	       "mtibatu 0,%0; mtibatl 0,%1; isync; \n"
 	       "mtdbatu 0,%0; mtdbatl 0,%1; isync"
 	    :: "r"(battable[0].batu), "r"(battable[0].batl));
 	mtmsr(msr);
 
 #ifdef WII
         if (strcmp(installed_platform(), "wii") != 0) {
 #endif
 		/* map pci space */
 		__asm __volatile("mtdbatu 1,%0" :: "r"(battable[8].batu));
 		__asm __volatile("mtdbatl 1,%0" :: "r"(battable[8].batl));
 #ifdef WII
 	}
 #endif
 	isync();
 
 	/* set global direct map flag */
 	hw_direct_map = 1;
 
 	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
 	CTR0(KTR_PMAP, "moea_bootstrap: physical memory");
 
 	for (i = 0; i < pregions_sz; i++) {
 		vm_offset_t pa;
 		vm_offset_t end;
 
 		CTR3(KTR_PMAP, "physregion: %#x - %#x (%#x)",
 			pregions[i].mr_start,
 			pregions[i].mr_start + pregions[i].mr_size,
 			pregions[i].mr_size);
 		/*
 		 * Install entries into the BAT table to allow all
 		 * of physmem to be convered by on-demand BAT entries.
 		 * The loop will sometimes set the same battable element
 		 * twice, but that's fine since they won't be used for
 		 * a while yet.
 		 */
 		pa = pregions[i].mr_start & 0xf0000000;
 		end = pregions[i].mr_start + pregions[i].mr_size;
 		do {
                         u_int n = pa >> ADDR_SR_SHFT;
 
 			battable[n].batl = BATL(pa, BAT_M, BAT_PP_RW);
 			battable[n].batu = BATU(pa, BAT_BL_256M, BAT_Vs);
 			pa += SEGMENT_LENGTH;
 		} while (pa < end);
 	}
 
 	if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz)
 		panic("moea_bootstrap: phys_avail too small");
 
 	phys_avail_count = 0;
 	physsz = 0;
 	hwphyssz = 0;
 	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
 		CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start,
 		    regions[i].mr_start + regions[i].mr_size,
 		    regions[i].mr_size);
 		if (hwphyssz != 0 &&
 		    (physsz + regions[i].mr_size) >= hwphyssz) {
 			if (physsz < hwphyssz) {
 				phys_avail[j] = regions[i].mr_start;
 				phys_avail[j + 1] = regions[i].mr_start +
 				    hwphyssz - physsz;
 				physsz = hwphyssz;
 				phys_avail_count++;
 			}
 			break;
 		}
 		phys_avail[j] = regions[i].mr_start;
 		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
 		phys_avail_count++;
 		physsz += regions[i].mr_size;
 	}
 
 	/* Check for overlap with the kernel and exception vectors */
 	for (j = 0; j < 2*phys_avail_count; j+=2) {
 		if (phys_avail[j] < EXC_LAST)
 			phys_avail[j] += EXC_LAST;
 
 		if (kernelstart >= phys_avail[j] &&
 		    kernelstart < phys_avail[j+1]) {
 			if (kernelend < phys_avail[j+1]) {
 				phys_avail[2*phys_avail_count] =
 				    (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 				phys_avail[2*phys_avail_count + 1] =
 				    phys_avail[j+1];
 				phys_avail_count++;
 			}
 
 			phys_avail[j+1] = kernelstart & ~PAGE_MASK;
 		}
 
 		if (kernelend >= phys_avail[j] &&
 		    kernelend < phys_avail[j+1]) {
 			if (kernelstart > phys_avail[j]) {
 				phys_avail[2*phys_avail_count] = phys_avail[j];
 				phys_avail[2*phys_avail_count + 1] =
 				    kernelstart & ~PAGE_MASK;
 				phys_avail_count++;
 			}
 
 			phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 		}
 	}
 
 	physmem = btoc(physsz);
 
 	/*
 	 * Allocate PTEG table.
 	 */
 #ifdef PTEGCOUNT
 	moea_pteg_count = PTEGCOUNT;
 #else
 	moea_pteg_count = 0x1000;
 
 	while (moea_pteg_count < physmem)
 		moea_pteg_count <<= 1;
 
 	moea_pteg_count >>= 1;
 #endif /* PTEGCOUNT */
 
 	size = moea_pteg_count * sizeof(struct pteg);
 	CTR2(KTR_PMAP, "moea_bootstrap: %d PTEGs, %d bytes", moea_pteg_count,
 	    size);
 	moea_pteg_table = (struct pteg *)moea_bootstrap_alloc(size, size);
 	CTR1(KTR_PMAP, "moea_bootstrap: PTEG table at %p", moea_pteg_table);
 	bzero((void *)moea_pteg_table, moea_pteg_count * sizeof(struct pteg));
 	moea_pteg_mask = moea_pteg_count - 1;
 
 	/*
 	 * Allocate pv/overflow lists.
 	 */
 	size = sizeof(struct pvo_head) * moea_pteg_count;
 	moea_pvo_table = (struct pvo_head *)moea_bootstrap_alloc(size,
 	    PAGE_SIZE);
 	CTR1(KTR_PMAP, "moea_bootstrap: PVO table at %p", moea_pvo_table);
 	for (i = 0; i < moea_pteg_count; i++)
 		LIST_INIT(&moea_pvo_table[i]);
 
 	/*
 	 * Initialize the lock that synchronizes access to the pteg and pvo
 	 * tables.
 	 */
 	mtx_init(&moea_table_mutex, "pmap table", NULL, MTX_DEF |
 	    MTX_RECURSE);
 	mtx_init(&moea_vsid_mutex, "VSID table", NULL, MTX_DEF);
 
 	mtx_init(&tlbie_mtx, "tlbie", NULL, MTX_SPIN);
 
 	/*
 	 * Initialise the unmanaged pvo pool.
 	 */
 	moea_bpvo_pool = (struct pvo_entry *)moea_bootstrap_alloc(
 		BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0);
 	moea_bpvo_pool_index = 0;
 
 	/*
 	 * Make sure kernel vsid is allocated as well as VSID 0.
 	 */
 	moea_vsid_bitmap[(KERNEL_VSIDBITS & (NPMAPS - 1)) / VSID_NBPW]
 		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
 	moea_vsid_bitmap[0] |= 1;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	for (i = 0; i < 16; i++)
 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
 	CPU_FILL(&kernel_pmap->pm_active);
 	RB_INIT(&kernel_pmap->pmap_pvo);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	/*
 	 * Set up the Open Firmware mappings
 	 */
 	chosen = OF_finddevice("/chosen");
 	if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1 &&
 	    (mmu = OF_instance_to_package(mmui)) != -1 && 
 	    (sz = OF_getproplen(mmu, "translations")) != -1) {
 		translations = NULL;
 		for (i = 0; phys_avail[i] != 0; i += 2) {
 			if (phys_avail[i + 1] >= sz) {
 				translations = (struct ofw_map *)phys_avail[i];
 				break;
 			}
 		}
 		if (translations == NULL)
 			panic("moea_bootstrap: no space to copy translations");
 		bzero(translations, sz);
 		if (OF_getprop(mmu, "translations", translations, sz) == -1)
 			panic("moea_bootstrap: can't get ofw translations");
 		CTR0(KTR_PMAP, "moea_bootstrap: translations");
 		sz /= sizeof(*translations);
 		qsort(translations, sz, sizeof (*translations), om_cmp);
 		for (i = 0; i < sz; i++) {
 			CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x",
 			    translations[i].om_pa, translations[i].om_va,
 			    translations[i].om_len);
 
 			/*
 			 * If the mapping is 1:1, let the RAM and device
 			 * on-demand BAT tables take care of the translation.
 			 */
 			if (translations[i].om_va == translations[i].om_pa)
 				continue;
 
 			/* Enter the pages */
 			for (off = 0; off < translations[i].om_len;
 			    off += PAGE_SIZE)
 				moea_kenter(mmup, translations[i].om_va + off, 
 					    translations[i].om_pa + off);
 		}
 	}
 
 	/*
 	 * Calculate the last available physical address.
 	 */
 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
 		;
 	Maxmem = powerpc_btop(phys_avail[i + 1]);
 
 	moea_cpu_bootstrap(mmup,0);
 
 	pmap_bootstrapped++;
 
 	/*
 	 * Set the start and end of kva.
 	 */
 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
 	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
 
 	/*
 	 * Allocate a kernel stack with a guard page for thread0 and map it
 	 * into the kernel page map.
 	 */
 	pa = moea_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
 	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 	virtual_avail = va + KSTACK_PAGES * PAGE_SIZE;
 	CTR2(KTR_PMAP, "moea_bootstrap: kstack0 at %#x (%#x)", pa, va);
 	thread0.td_kstack = va;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	for (i = 0; i < KSTACK_PAGES; i++) {
 		moea_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the message buffer.
 	 */
 	pa = msgbuf_phys = moea_bootstrap_alloc(msgbufsize, PAGE_SIZE);
 	msgbufp = (struct msgbuf *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += round_page(msgbufsize);
 	while (va < virtual_avail) {
 		moea_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the dynamic percpu area.
 	 */
 	pa = moea_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
 	dpcpu = (void *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += DPCPU_SIZE;
 	while (va < virtual_avail) {
 		moea_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 	dpcpu_init(dpcpu, 0);
 }
 
 /*
  * Activate a user pmap.  The pmap must be activated before it's address
  * space can be accessed in any way.
  */
 void
 moea_activate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm, pmr;
 
 	/*
 	 * Load all the data we need up front to encourage the compiler to
 	 * not issue any loads while we have interrupts disabled below.
 	 */
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	pmr = pm->pmap_phys;
 
 	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
 	PCPU_SET(curpmap, pmr);
 }
 
 void
 moea_deactivate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
 	PCPU_SET(curpmap, NULL);
 }
 
 void
 moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired)
 {
 	struct	pvo_entry *pvo;
 
 	PMAP_LOCK(pm);
 	pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
 
 	if (pvo != NULL) {
 		if (wired) {
 			if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
 				pm->pm_stats.wired_count++;
 			pvo->pvo_vaddr |= PVO_WIRED;
 		} else {
 			if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
 				pm->pm_stats.wired_count--;
 			pvo->pvo_vaddr &= ~PVO_WIRED;
 		}
 	}
 	PMAP_UNLOCK(pm);
 }
 
 void
 moea_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t	dst;
 	vm_offset_t	src;
 
 	dst = VM_PAGE_TO_PHYS(mdst);
 	src = VM_PAGE_TO_PHYS(msrc);
 
 	bcopy((void *)src, (void *)dst, PAGE_SIZE);
 }
 
 /*
  * Zero a page of physical memory by temporarily mapping it into the tlb.
  */
 void
 moea_zero_page(mmu_t mmu, vm_page_t m)
 {
 	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
 	void *va = (void *)pa;
 
 	bzero(va, PAGE_SIZE);
 }
 
 void
 moea_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 {
 	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
 	void *va = (void *)(pa + off);
 
 	bzero(va, size);
 }
 
 void
 moea_zero_page_idle(mmu_t mmu, vm_page_t m)
 {
 	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
 	void *va = (void *)pa;
 
 	bzero(va, PAGE_SIZE);
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  */
 void
 moea_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	moea_enter_locked(pmap, va, m, prot, wired);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  *
  * The page queues and pmap must be locked.
  */
 static void
 moea_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     boolean_t wired)
 {
 	struct		pvo_head *pvo_head;
 	uma_zone_t	zone;
 	vm_page_t	pg;
 	u_int		pte_lo, pvo_flags;
 	int		error;
 
 	if (!moea_initialized) {
 		pvo_head = &moea_pvo_kunmanaged;
 		zone = moea_upvo_zone;
 		pvo_flags = 0;
 		pg = NULL;
 	} else {
 		pvo_head = vm_page_to_pvoh(m);
 		pg = m;
 		zone = moea_mpvo_zone;
 		pvo_flags = PVO_MANAGED;
 	}
 	if (pmap_bootstrapped)
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
 	    VM_OBJECT_LOCKED(m->object),
 	    ("moea_enter_locked: page %p is not busy", m));
 
 	/* XXX change the pvo head for fake pages */
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		pvo_flags &= ~PVO_MANAGED;
 		pvo_head = &moea_pvo_kunmanaged;
 		zone = moea_upvo_zone;
 	}
 
 	pte_lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
 
 	if (prot & VM_PROT_WRITE) {
 		pte_lo |= PTE_BW;
 		if (pmap_bootstrapped &&
 		    (m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	} else
 		pte_lo |= PTE_BR;
 
 	if (prot & VM_PROT_EXECUTE)
 		pvo_flags |= PVO_EXECUTABLE;
 
 	if (wired)
 		pvo_flags |= PVO_WIRED;
 
 	error = moea_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m),
 	    pte_lo, pvo_flags);
 
 	/*
 	 * Flush the real page from the instruction cache. This has be done
 	 * for all user mappings to prevent information leakage via the
 	 * instruction cache. moea_pvo_enter() returns ENOENT for the first
 	 * mapping for a page.
 	 */
 	if (pmap != kernel_pmap && error == ENOENT &&
 	    (pte_lo & (PTE_I | PTE_G)) == 0)
 		moea_syncicache(VM_PAGE_TO_PHYS(m), PAGE_SIZE);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 moea_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
 
 	psize = atop(end - start);
 	m = m_start;
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pm);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		moea_enter_locked(pm, start + ptoa(diff), m, prot &
 		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pm);
 }
 
 void
 moea_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
     vm_prot_t prot)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pm);
 	moea_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 	    FALSE);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pm);
 }
 
 vm_paddr_t
 moea_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
 {
 	struct	pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	PMAP_LOCK(pm);
 	pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
 	if (pvo == NULL)
 		pa = 0;
 	else
 		pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF);
 	PMAP_UNLOCK(pm);
 	return (pa);
 }
 
 /*
  * Atomically extract and hold the physical page with the given
  * pmap and virtual address pair if that mapping permits the given
  * protection.
  */
 vm_page_t
 moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	struct	pvo_entry *pvo;
 	vm_page_t m;
         vm_paddr_t pa;
 
 	m = NULL;
 	pa = 0;
 	PMAP_LOCK(pmap);
 retry:
 	pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL);
 	if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) &&
 	    ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW ||
 	     (prot & VM_PROT_WRITE) == 0)) {
 		if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa))
 			goto retry;
 		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
 		vm_page_hold(m);
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 void
 moea_init(mmu_t mmu)
 {
 
 	moea_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	moea_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	moea_initialized = TRUE;
 }
 
 boolean_t
 moea_is_referenced(mmu_t mmu, vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_is_referenced: page %p is not managed", m));
 	rw_wlock(&pvh_global_lock);
 	rv = moea_query_bit(m, PTE_REF);
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 boolean_t
 moea_is_modified(mmu_t mmu, vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PTE_CHG set.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = moea_query_bit(m, PTE_CHG);
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 boolean_t
 moea_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	struct pvo_entry *pvo;
 	boolean_t rv;
 
 	PMAP_LOCK(pmap);
 	pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL);
 	rv = pvo == NULL || (pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0;
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 void
 moea_clear_reference(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_clear_reference: page %p is not managed", m));
 	rw_wlock(&pvh_global_lock);
 	moea_clear_bit(m, PTE_REF);
 	rw_wunlock(&pvh_global_lock);
 }
 
 void
 moea_clear_modify(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_clear_modify: page %p is not managed", m));
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	KASSERT((m->oflags & VPO_BUSY) == 0,
 	    ("moea_clear_modify: page %p is busy", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_CHG
 	 * set.  If the object containing the page is locked and the page is
 	 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	moea_clear_bit(m, PTE_CHG);
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 moea_remove_write(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo;
 	struct	pte *pt;
 	pmap_t	pmap;
 	u_int	lo;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
 	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	lo = moea_attr_fetch(m);
 	powerpc_sync();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if ((pvo->pvo_pte.pte.pte_lo & PTE_PP) != PTE_BR) {
 			pt = moea_pvo_to_pte(pvo, -1);
 			pvo->pvo_pte.pte.pte_lo &= ~PTE_PP;
 			pvo->pvo_pte.pte.pte_lo |= PTE_BR;
 			if (pt != NULL) {
 				moea_pte_synch(pt, &pvo->pvo_pte.pte);
 				lo |= pvo->pvo_pte.pte.pte_lo;
 				pvo->pvo_pte.pte.pte_lo &= ~PTE_CHG;
 				moea_pte_change(pt, &pvo->pvo_pte.pte,
 				    pvo->pvo_vaddr);
 				mtx_unlock(&moea_table_mutex);
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	if ((lo & PTE_CHG) != 0) {
 		moea_attr_clear(m, PTE_CHG);
 		vm_page_dirty(m);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  *	moea_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 moea_ts_referenced(mmu_t mmu, vm_page_t m)
 {
 	int count;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_ts_referenced: page %p is not managed", m));
 	rw_wlock(&pvh_global_lock);
 	count = moea_clear_bit(m, PTE_REF);
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  * Modify the WIMG settings of all mappings for a page.
  */
 void
 moea_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
 {
 	struct	pvo_entry *pvo;
 	struct	pvo_head *pvo_head;
 	struct	pte *pt;
 	pmap_t	pmap;
 	u_int	lo;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
 	}
 
 	rw_wlock(&pvh_global_lock);
 	pvo_head = vm_page_to_pvoh(m);
 	lo = moea_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
 
 	LIST_FOREACH(pvo, pvo_head, pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		pt = moea_pvo_to_pte(pvo, -1);
 		pvo->pvo_pte.pte.pte_lo &= ~PTE_WIMG;
 		pvo->pvo_pte.pte.pte_lo |= lo;
 		if (pt != NULL) {
 			moea_pte_change(pt, &pvo->pvo_pte.pte,
 			    pvo->pvo_vaddr);
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
 		mtx_unlock(&moea_table_mutex);
 		PMAP_UNLOCK(pmap);
 	}
 	m->md.mdpg_cache_attrs = ma;
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Map a wired page into kernel virtual address space.
  */
 void
 moea_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
 {
 
 	moea_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
 }
 
 void
 moea_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma)
 {
 	u_int		pte_lo;
 	int		error;	
 
 #if 0
 	if (va < VM_MIN_KERNEL_ADDRESS)
 		panic("moea_kenter: attempt to enter non-kernel address %#x",
 		    va);
 #endif
 
 	pte_lo = moea_calc_wimg(pa, ma);
 
 	PMAP_LOCK(kernel_pmap);
 	error = moea_pvo_enter(kernel_pmap, moea_upvo_zone,
 	    &moea_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED);
 
 	if (error != 0 && error != ENOENT)
 		panic("moea_kenter: failed to enter va %#x pa %#x: %d", va,
 		    pa, error);
 
 	PMAP_UNLOCK(kernel_pmap);
 }
 
 /*
  * Extract the physical page address associated with the given kernel virtual
  * address.
  */
 vm_paddr_t
 moea_kextract(mmu_t mmu, vm_offset_t va)
 {
 	struct		pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	/*
 	 * Allow direct mappings on 32-bit OEA
 	 */
 	if (va < VM_MIN_KERNEL_ADDRESS) {
 		return (va);
 	}
 
 	PMAP_LOCK(kernel_pmap);
 	pvo = moea_pvo_find_va(kernel_pmap, va & ~ADDR_POFF, NULL);
 	KASSERT(pvo != NULL, ("moea_kextract: no addr found"));
 	pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) | (va & ADDR_POFF);
 	PMAP_UNLOCK(kernel_pmap);
 	return (pa);
 }
 
 /*
  * Remove a wired page from kernel virtual address space.
  */
 void
 moea_kremove(mmu_t mmu, vm_offset_t va)
 {
 
 	moea_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
 }
 
 /*
  * Map a range of physical addresses into kernel virtual address space.
  *
  * The value passed in *virt is a suggested virtual address for the mapping.
  * Architectures which can support a direct-mapped physical to virtual region
  * can return the appropriate address within that region, leaving '*virt'
  * unchanged.  We cannot and therefore do not; *virt is updated with the
  * first usable address after the mapped region.
  */
 vm_offset_t
 moea_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
     vm_paddr_t pa_end, int prot)
 {
 	vm_offset_t	sva, va;
 
 	sva = *virt;
 	va = sva;
 	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
 		moea_kenter(mmu, va, pa_start);
 	*virt = va;
 	return (sva);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 moea_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
 {
         int loops;
 	struct pvo_entry *pvo;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea_page_exists_quick: page %p is not managed", m));
 	loops = 0;
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		if (pvo->pvo_pmap == pmap) {
 			rv = TRUE;
 			break;
 		}
 		if (++loops >= 16)
 			break;
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Return the number of managed mappings to the given physical page
  * that are wired.
  */
 int
 moea_page_wired_mappings(mmu_t mmu, vm_page_t m)
 {
 	struct pvo_entry *pvo;
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&pvh_global_lock);
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
 		if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
 			count++;
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 static u_int	moea_vsidcontext;
 
 void
 moea_pinit(mmu_t mmu, pmap_t pmap)
 {
 	int	i, mask;
 	u_int	entropy;
 
 	KASSERT((int)pmap < VM_MIN_KERNEL_ADDRESS, ("moea_pinit: virt pmap"));
 	PMAP_LOCK_INIT(pmap);
 	RB_INIT(&pmap->pmap_pvo);
 
 	entropy = 0;
 	__asm __volatile("mftb %0" : "=r"(entropy));
 
 	if ((pmap->pmap_phys = (pmap_t)moea_kextract(mmu, (vm_offset_t)pmap))
 	    == NULL) {
 		pmap->pmap_phys = pmap;
 	}
 	
 
 	mtx_lock(&moea_vsid_mutex);
 	/*
 	 * Allocate some segment registers for this pmap.
 	 */
 	for (i = 0; i < NPMAPS; i += VSID_NBPW) {
 		u_int	hash, n;
 
 		/*
 		 * Create a new value by mutiplying by a prime and adding in
 		 * entropy from the timebase register.  This is to make the
 		 * VSID more random so that the PT hash function collides
 		 * less often.  (Note that the prime casues gcc to do shifts
 		 * instead of a multiply.)
 		 */
 		moea_vsidcontext = (moea_vsidcontext * 0x1105) + entropy;
 		hash = moea_vsidcontext & (NPMAPS - 1);
 		if (hash == 0)		/* 0 is special, avoid it */
 			continue;
 		n = hash >> 5;
 		mask = 1 << (hash & (VSID_NBPW - 1));
 		hash = (moea_vsidcontext & 0xfffff);
 		if (moea_vsid_bitmap[n] & mask) {	/* collision? */
 			/* anything free in this bucket? */
 			if (moea_vsid_bitmap[n] == 0xffffffff) {
 				entropy = (moea_vsidcontext >> 20);
 				continue;
 			}
 			i = ffs(~moea_vsid_bitmap[n]) - 1;
 			mask = 1 << i;
 			hash &= 0xfffff & ~(VSID_NBPW - 1);
 			hash |= i;
 		}
 		KASSERT(!(moea_vsid_bitmap[n] & mask),
 		    ("Allocating in-use VSID group %#x\n", hash));
 		moea_vsid_bitmap[n] |= mask;
 		for (i = 0; i < 16; i++)
 			pmap->pm_sr[i] = VSID_MAKE(i, hash);
 		mtx_unlock(&moea_vsid_mutex);
 		return;
 	}
 
 	mtx_unlock(&moea_vsid_mutex);
 	panic("moea_pinit: out of segments");
 }
 
 /*
  * Initialize the pmap associated with process 0.
  */
 void
 moea_pinit0(mmu_t mmu, pmap_t pm)
 {
 
 	moea_pinit(mmu, pm);
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
 }
 
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
 void
 moea_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
     vm_prot_t prot)
 {
 	struct	pvo_entry *pvo, *tpvo, key;
 	struct	pte *pt;
 
 	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
 	    ("moea_protect: non current pmap"));
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		moea_remove(mmu, pm, sva, eva);
 		return;
 	}
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 		if ((prot & VM_PROT_EXECUTE) == 0)
 			pvo->pvo_vaddr &= ~PVO_EXECUTABLE;
 
 		/*
 		 * Grab the PTE pointer before we diddle with the cached PTE
 		 * copy.
 		 */
 		pt = moea_pvo_to_pte(pvo, -1);
 		/*
 		 * Change the protection of the page.
 		 */
 		pvo->pvo_pte.pte.pte_lo &= ~PTE_PP;
 		pvo->pvo_pte.pte.pte_lo |= PTE_BR;
 
 		/*
 		 * If the PVO is in the page table, update that pte as well.
 		 */
 		if (pt != NULL) {
 			moea_pte_change(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr);
 			mtx_unlock(&moea_table_mutex);
 		}
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Map a list of wired pages into kernel virtual address space.  This is
  * intended for temporary mappings which do not need page modification or
  * references recorded.  Existing mappings in the region are overwritten.
  */
 void
 moea_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		moea_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 }
 
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by moea_qenter.
  */
 void
 moea_qremove(mmu_t mmu, vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		moea_kremove(mmu, va);
 		va += PAGE_SIZE;
 	}
 }
 
 void
 moea_release(mmu_t mmu, pmap_t pmap)
 {
         int idx, mask;
         
 	/*
 	 * Free segment register's VSID
 	 */
         if (pmap->pm_sr[0] == 0)
                 panic("moea_release");
 
 	mtx_lock(&moea_vsid_mutex);
         idx = VSID_TO_HASH(pmap->pm_sr[0]) & (NPMAPS-1);
         mask = 1 << (idx % VSID_NBPW);
         idx /= VSID_NBPW;
         moea_vsid_bitmap[idx] &= ~mask;
 	mtx_unlock(&moea_vsid_mutex);
 	PMAP_LOCK_DESTROY(pmap);
 }
 
 /*
  * Remove the given range of addresses from the specified map.
  */
 void
 moea_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	struct	pvo_entry *pvo, *tpvo, key;
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 		moea_pvo_remove(pvo, -1);
 	}
 	PMAP_UNLOCK(pm);
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Remove physical page from all pmaps in which it resides. moea_pvo_remove()
  * will reflect changes in pte's back to the vm_page.
  */
 void
 moea_remove_all(mmu_t mmu, vm_page_t m)
 {
 	struct  pvo_head *pvo_head;
 	struct	pvo_entry *pvo, *next_pvo;
 	pmap_t	pmap;
 
 	rw_wlock(&pvh_global_lock);
 	pvo_head = vm_page_to_pvoh(m);
 	for (pvo = LIST_FIRST(pvo_head); pvo != NULL; pvo = next_pvo) {
 		next_pvo = LIST_NEXT(pvo, pvo_vlink);
 
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		moea_pvo_remove(pvo, -1);
 		PMAP_UNLOCK(pmap);
 	}
 	if ((m->aflags & PGA_WRITEABLE) && moea_query_bit(m, PTE_CHG)) {
 		moea_attr_clear(m, PTE_CHG);
 		vm_page_dirty(m);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Allocate a physical page of memory directly from the phys_avail map.
  * Can only be called from moea_bootstrap before avail start and end are
  * calculated.
  */
 static vm_offset_t
 moea_bootstrap_alloc(vm_size_t size, u_int align)
 {
 	vm_offset_t	s, e;
 	int		i, j;
 
 	size = round_page(size);
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		if (align != 0)
 			s = (phys_avail[i] + align - 1) & ~(align - 1);
 		else
 			s = phys_avail[i];
 		e = s + size;
 
 		if (s < phys_avail[i] || e > phys_avail[i + 1])
 			continue;
 
 		if (s == phys_avail[i]) {
 			phys_avail[i] += size;
 		} else if (e == phys_avail[i + 1]) {
 			phys_avail[i + 1] -= size;
 		} else {
 			for (j = phys_avail_count * 2; j > i; j -= 2) {
 				phys_avail[j] = phys_avail[j - 2];
 				phys_avail[j + 1] = phys_avail[j - 1];
 			}
 
 			phys_avail[i + 3] = phys_avail[i + 1];
 			phys_avail[i + 1] = s;
 			phys_avail[i + 2] = e;
 			phys_avail_count++;
 		}
 
 		return (s);
 	}
 	panic("moea_bootstrap_alloc: could not allocate memory");
 }
 
 static void
 moea_syncicache(vm_offset_t pa, vm_size_t len)
 {
 	__syncicache((void *)pa, len);
 }
 
 static int
 moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head,
     vm_offset_t va, vm_offset_t pa, u_int pte_lo, int flags)
 {
 	struct	pvo_entry *pvo;
 	u_int	sr;
 	int	first;
 	u_int	ptegidx;
 	int	i;
 	int     bootstrap;
 
 	moea_pvo_enter_calls++;
 	first = 0;
 	bootstrap = 0;
 
 	/*
 	 * Compute the PTE Group index.
 	 */
 	va &= ~ADDR_POFF;
 	sr = va_to_sr(pm->pm_sr, va);
 	ptegidx = va_to_pteg(sr, va);
 
 	/*
 	 * Remove any existing mapping for this page.  Reuse the pvo entry if
 	 * there is a mapping.
 	 */
 	mtx_lock(&moea_table_mutex);
 	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
 		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
 			if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa &&
 			    (pvo->pvo_pte.pte.pte_lo & PTE_PP) ==
 			    (pte_lo & PTE_PP)) {
 				mtx_unlock(&moea_table_mutex);
 				return (0);
 			}
 			moea_pvo_remove(pvo, -1);
 			break;
 		}
 	}
 
 	/*
 	 * If we aren't overwriting a mapping, try to allocate.
 	 */
 	if (moea_initialized) {
 		pvo = uma_zalloc(zone, M_NOWAIT);
 	} else {
 		if (moea_bpvo_pool_index >= BPVO_POOL_SIZE) {
 			panic("moea_enter: bpvo pool exhausted, %d, %d, %d",
 			      moea_bpvo_pool_index, BPVO_POOL_SIZE, 
 			      BPVO_POOL_SIZE * sizeof(struct pvo_entry));
 		}
 		pvo = &moea_bpvo_pool[moea_bpvo_pool_index];
 		moea_bpvo_pool_index++;
 		bootstrap = 1;
 	}
 
 	if (pvo == NULL) {
 		mtx_unlock(&moea_table_mutex);
 		return (ENOMEM);
 	}
 
 	moea_pvo_entries++;
 	pvo->pvo_vaddr = va;
 	pvo->pvo_pmap = pm;
 	LIST_INSERT_HEAD(&moea_pvo_table[ptegidx], pvo, pvo_olink);
 	pvo->pvo_vaddr &= ~ADDR_POFF;
 	if (flags & VM_PROT_EXECUTE)
 		pvo->pvo_vaddr |= PVO_EXECUTABLE;
 	if (flags & PVO_WIRED)
 		pvo->pvo_vaddr |= PVO_WIRED;
 	if (pvo_head != &moea_pvo_kunmanaged)
 		pvo->pvo_vaddr |= PVO_MANAGED;
 	if (bootstrap)
 		pvo->pvo_vaddr |= PVO_BOOTSTRAP;
 
 	moea_pte_create(&pvo->pvo_pte.pte, sr, va, pa | pte_lo);
 
 	/*
 	 * Add to pmap list
 	 */
 	RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo);
 
 	/*
 	 * Remember if the list was empty and therefore will be the first
 	 * item.
 	 */
 	if (LIST_FIRST(pvo_head) == NULL)
 		first = 1;
 	LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
 
 	if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED)
 		pm->pm_stats.wired_count++;
 	pm->pm_stats.resident_count++;
 
 	/*
 	 * We hope this succeeds but it isn't required.
 	 */
 	i = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte);
 	if (i >= 0) {
 		PVO_PTEGIDX_SET(pvo, i);
 	} else {
 		panic("moea_pvo_enter: overflow");
 		moea_pte_overflow++;
 	}
 	mtx_unlock(&moea_table_mutex);
 
 	return (first ? ENOENT : 0);
 }
 
 static void
 moea_pvo_remove(struct pvo_entry *pvo, int pteidx)
 {
 	struct	pte *pt;
 
 	/*
 	 * If there is an active pte entry, we need to deactivate it (and
 	 * save the ref & cfg bits).
 	 */
 	pt = moea_pvo_to_pte(pvo, pteidx);
 	if (pt != NULL) {
 		moea_pte_unset(pt, &pvo->pvo_pte.pte, pvo->pvo_vaddr);
 		mtx_unlock(&moea_table_mutex);
 		PVO_PTEGIDX_CLR(pvo);
 	} else {
 		moea_pte_overflow--;
 	}
 
 	/*
 	 * Update our statistics.
 	 */
 	pvo->pvo_pmap->pm_stats.resident_count--;
 	if (pvo->pvo_pte.pte.pte_lo & PVO_WIRED)
 		pvo->pvo_pmap->pm_stats.wired_count--;
 
 	/*
 	 * Save the REF/CHG bits into their cache if the page is managed.
 	 */
 	if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED) {
 		struct	vm_page *pg;
 
 		pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN);
 		if (pg != NULL) {
 			moea_attr_save(pg, pvo->pvo_pte.pte.pte_lo &
 			    (PTE_REF | PTE_CHG));
 		}
 	}
 
 	/*
 	 * Remove this PVO from the PV and pmap lists.
 	 */
 	LIST_REMOVE(pvo, pvo_vlink);
 	RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
 
 	/*
 	 * Remove this from the overflow list and return it to the pool
 	 * if we aren't going to reuse it.
 	 */
 	LIST_REMOVE(pvo, pvo_olink);
 	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
 		uma_zfree(pvo->pvo_vaddr & PVO_MANAGED ? moea_mpvo_zone :
 		    moea_upvo_zone, pvo);
 	moea_pvo_entries--;
 	moea_pvo_remove_calls++;
 }
 
 static __inline int
 moea_pvo_pte_index(const struct pvo_entry *pvo, int ptegidx)
 {
 	int	pteidx;
 
 	/*
 	 * We can find the actual pte entry without searching by grabbing
 	 * the PTEG index from 3 unused bits in pte_lo[11:9] and by
 	 * noticing the HID bit.
 	 */
 	pteidx = ptegidx * 8 + PVO_PTEGIDX_GET(pvo);
 	if (pvo->pvo_pte.pte.pte_hi & PTE_HID)
 		pteidx ^= moea_pteg_mask * 8;
 
 	return (pteidx);
 }
 
 static struct pvo_entry *
 moea_pvo_find_va(pmap_t pm, vm_offset_t va, int *pteidx_p)
 {
 	struct	pvo_entry *pvo;
 	int	ptegidx;
 	u_int	sr;
 
 	va &= ~ADDR_POFF;
 	sr = va_to_sr(pm->pm_sr, va);
 	ptegidx = va_to_pteg(sr, va);
 
 	mtx_lock(&moea_table_mutex);
 	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
 		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
 			if (pteidx_p)
 				*pteidx_p = moea_pvo_pte_index(pvo, ptegidx);
 			break;
 		}
 	}
 	mtx_unlock(&moea_table_mutex);
 
 	return (pvo);
 }
 
 static struct pte *
 moea_pvo_to_pte(const struct pvo_entry *pvo, int pteidx)
 {
 	struct	pte *pt;
 
 	/*
 	 * If we haven't been supplied the ptegidx, calculate it.
 	 */
 	if (pteidx == -1) {
 		int	ptegidx;
 		u_int	sr;
 
 		sr = va_to_sr(pvo->pvo_pmap->pm_sr, pvo->pvo_vaddr);
 		ptegidx = va_to_pteg(sr, pvo->pvo_vaddr);
 		pteidx = moea_pvo_pte_index(pvo, ptegidx);
 	}
 
 	pt = &moea_pteg_table[pteidx >> 3].pt[pteidx & 7];
 	mtx_lock(&moea_table_mutex);
 
 	if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) && !PVO_PTEGIDX_ISSET(pvo)) {
 		panic("moea_pvo_to_pte: pvo %p has valid pte in pvo but no "
 		    "valid pte index", pvo);
 	}
 
 	if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0 && PVO_PTEGIDX_ISSET(pvo)) {
 		panic("moea_pvo_to_pte: pvo %p has valid pte index in pvo "
 		    "pvo but no valid pte", pvo);
 	}
 
 	if ((pt->pte_hi ^ (pvo->pvo_pte.pte.pte_hi & ~PTE_VALID)) == PTE_VALID) {
 		if ((pvo->pvo_pte.pte.pte_hi & PTE_VALID) == 0) {
 			panic("moea_pvo_to_pte: pvo %p has valid pte in "
 			    "moea_pteg_table %p but invalid in pvo", pvo, pt);
 		}
 
 		if (((pt->pte_lo ^ pvo->pvo_pte.pte.pte_lo) & ~(PTE_CHG|PTE_REF))
 		    != 0) {
 			panic("moea_pvo_to_pte: pvo %p pte does not match "
 			    "pte %p in moea_pteg_table", pvo, pt);
 		}
 
 		mtx_assert(&moea_table_mutex, MA_OWNED);
 		return (pt);
 	}
 
 	if (pvo->pvo_pte.pte.pte_hi & PTE_VALID) {
 		panic("moea_pvo_to_pte: pvo %p has invalid pte %p in "
 		    "moea_pteg_table but valid in pvo", pvo, pt);
 	}
 
 	mtx_unlock(&moea_table_mutex);
 	return (NULL);
 }
 
 /*
  * XXX: THIS STUFF SHOULD BE IN pte.c?
  */
 int
 moea_pte_spill(vm_offset_t addr)
 {
 	struct	pvo_entry *source_pvo, *victim_pvo;
 	struct	pvo_entry *pvo;
 	int	ptegidx, i, j;
 	u_int	sr;
 	struct	pteg *pteg;
 	struct	pte *pt;
 
 	moea_pte_spills++;
 
 	sr = mfsrin(addr);
 	ptegidx = va_to_pteg(sr, addr);
 
 	/*
 	 * Have to substitute some entry.  Use the primary hash for this.
 	 * Use low bits of timebase as random generator.
 	 */
 	pteg = &moea_pteg_table[ptegidx];
 	mtx_lock(&moea_table_mutex);
 	__asm __volatile("mftb %0" : "=r"(i));
 	i &= 7;
 	pt = &pteg->pt[i];
 
 	source_pvo = NULL;
 	victim_pvo = NULL;
 	LIST_FOREACH(pvo, &moea_pvo_table[ptegidx], pvo_olink) {
 		/*
 		 * We need to find a pvo entry for this address.
 		 */
 		if (source_pvo == NULL &&
 		    moea_pte_match(&pvo->pvo_pte.pte, sr, addr,
 		    pvo->pvo_pte.pte.pte_hi & PTE_HID)) {
 			/*
 			 * Now found an entry to be spilled into the pteg.
 			 * The PTE is now valid, so we know it's active.
 			 */
 			j = moea_pte_insert(ptegidx, &pvo->pvo_pte.pte);
 
 			if (j >= 0) {
 				PVO_PTEGIDX_SET(pvo, j);
 				moea_pte_overflow--;
 				mtx_unlock(&moea_table_mutex);
 				return (1);
 			}
 
 			source_pvo = pvo;
 
 			if (victim_pvo != NULL)
 				break;
 		}
 
 		/*
 		 * We also need the pvo entry of the victim we are replacing
 		 * so save the R & C bits of the PTE.
 		 */
 		if ((pt->pte_hi & PTE_HID) == 0 && victim_pvo == NULL &&
 		    moea_pte_compare(pt, &pvo->pvo_pte.pte)) {
 			victim_pvo = pvo;
 			if (source_pvo != NULL)
 				break;
 		}
 	}
 
 	if (source_pvo == NULL) {
 		mtx_unlock(&moea_table_mutex);
 		return (0);
 	}
 
 	if (victim_pvo == NULL) {
 		if ((pt->pte_hi & PTE_HID) == 0)
 			panic("moea_pte_spill: victim p-pte (%p) has no pvo"
 			    "entry", pt);
 
 		/*
 		 * If this is a secondary PTE, we need to search it's primary
 		 * pvo bucket for the matching PVO.
 		 */
 		LIST_FOREACH(pvo, &moea_pvo_table[ptegidx ^ moea_pteg_mask],
 		    pvo_olink) {
 			/*
 			 * We also need the pvo entry of the victim we are
 			 * replacing so save the R & C bits of the PTE.
 			 */
 			if (moea_pte_compare(pt, &pvo->pvo_pte.pte)) {
 				victim_pvo = pvo;
 				break;
 			}
 		}
 
 		if (victim_pvo == NULL)
 			panic("moea_pte_spill: victim s-pte (%p) has no pvo"
 			    "entry", pt);
 	}
 
 	/*
 	 * We are invalidating the TLB entry for the EA we are replacing even
 	 * though it's valid.  If we don't, we lose any ref/chg bit changes
 	 * contained in the TLB entry.
 	 */
 	source_pvo->pvo_pte.pte.pte_hi &= ~PTE_HID;
 
 	moea_pte_unset(pt, &victim_pvo->pvo_pte.pte, victim_pvo->pvo_vaddr);
 	moea_pte_set(pt, &source_pvo->pvo_pte.pte);
 
 	PVO_PTEGIDX_CLR(victim_pvo);
 	PVO_PTEGIDX_SET(source_pvo, i);
 	moea_pte_replacements++;
 
 	mtx_unlock(&moea_table_mutex);
 	return (1);
 }
 
 static int
 moea_pte_insert(u_int ptegidx, struct pte *pvo_pt)
 {
 	struct	pte *pt;
 	int	i;
 
 	mtx_assert(&moea_table_mutex, MA_OWNED);
 
 	/*
 	 * First try primary hash.
 	 */
 	for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
 		if ((pt->pte_hi & PTE_VALID) == 0) {
 			pvo_pt->pte_hi &= ~PTE_HID;
 			moea_pte_set(pt, pvo_pt);
 			return (i);
 		}
 	}
 
 	/*
 	 * Now try secondary hash.
 	 */
 	ptegidx ^= moea_pteg_mask;
 
 	for (pt = moea_pteg_table[ptegidx].pt, i = 0; i < 8; i++, pt++) {
 		if ((pt->pte_hi & PTE_VALID) == 0) {
 			pvo_pt->pte_hi |= PTE_HID;
 			moea_pte_set(pt, pvo_pt);
 			return (i);
 		}
 	}
 
 	panic("moea_pte_insert: overflow");
 	return (-1);
 }
 
 static boolean_t
 moea_query_bit(vm_page_t m, int ptebit)
 {
 	struct	pvo_entry *pvo;
 	struct	pte *pt;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	if (moea_attr_fetch(m) & ptebit)
 		return (TRUE);
 
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 
 		/*
 		 * See if we saved the bit off.  If so, cache it and return
 		 * success.
 		 */
 		if (pvo->pvo_pte.pte.pte_lo & ptebit) {
 			moea_attr_save(m, ptebit);
 			return (TRUE);
 		}
 	}
 
 	/*
 	 * No luck, now go through the hard part of looking at the PTEs
 	 * themselves.  Sync so that any pending REF/CHG bits are flushed to
 	 * the PTEs.
 	 */
 	powerpc_sync();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 
 		/*
 		 * See if this pvo has a valid PTE.  if so, fetch the
 		 * REF/CHG bits from the valid PTE.  If the appropriate
 		 * ptebit is set, cache it and return success.
 		 */
 		pt = moea_pvo_to_pte(pvo, -1);
 		if (pt != NULL) {
 			moea_pte_synch(pt, &pvo->pvo_pte.pte);
 			mtx_unlock(&moea_table_mutex);
 			if (pvo->pvo_pte.pte.pte_lo & ptebit) {
 				moea_attr_save(m, ptebit);
 				return (TRUE);
 			}
 		}
 	}
 
 	return (FALSE);
 }
 
 static u_int
 moea_clear_bit(vm_page_t m, int ptebit)
 {
 	u_int	count;
 	struct	pvo_entry *pvo;
 	struct	pte *pt;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 
 	/*
 	 * Clear the cached value.
 	 */
 	moea_attr_clear(m, ptebit);
 
 	/*
 	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
 	 * we can reset the right ones).  note that since the pvo entries and
 	 * list heads are accessed via BAT0 and are never placed in the page
 	 * table, we don't have to worry about further accesses setting the
 	 * REF/CHG bits.
 	 */
 	powerpc_sync();
 
 	/*
 	 * For each pvo entry, clear the pvo's ptebit.  If this pvo has a
 	 * valid pte clear the ptebit from the valid pte.
 	 */
 	count = 0;
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pt = moea_pvo_to_pte(pvo, -1);
 		if (pt != NULL) {
 			moea_pte_synch(pt, &pvo->pvo_pte.pte);
 			if (pvo->pvo_pte.pte.pte_lo & ptebit) {
 				count++;
 				moea_pte_clear(pt, PVO_VADDR(pvo), ptebit);
 			}
 			mtx_unlock(&moea_table_mutex);
 		}
 		pvo->pvo_pte.pte.pte_lo &= ~ptebit;
 	}
 
 	return (count);
 }
 
 /*
  * Return true if the physical range is encompassed by the battable[idx]
  */
 static int
 moea_bat_mapped(int idx, vm_offset_t pa, vm_size_t size)
 {
 	u_int prot;
 	u_int32_t start;
 	u_int32_t end;
 	u_int32_t bat_ble;
 
 	/*
 	 * Return immediately if not a valid mapping
 	 */
 	if (!(battable[idx].batu & BAT_Vs))
 		return (EINVAL);
 
 	/*
 	 * The BAT entry must be cache-inhibited, guarded, and r/w
 	 * so it can function as an i/o page
 	 */
 	prot = battable[idx].batl & (BAT_I|BAT_G|BAT_PP_RW);
 	if (prot != (BAT_I|BAT_G|BAT_PP_RW))
 		return (EPERM);	
 
 	/*
 	 * The address should be within the BAT range. Assume that the
 	 * start address in the BAT has the correct alignment (thus
 	 * not requiring masking)
 	 */
 	start = battable[idx].batl & BAT_PBS;
 	bat_ble = (battable[idx].batu & ~(BAT_EBS)) | 0x03;
 	end = start | (bat_ble << 15) | 0x7fff;
 
 	if ((pa < start) || ((pa + size) > end))
 		return (ERANGE);
 
 	return (0);
 }
 
 boolean_t
 moea_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 	int i;
 
 	/*
 	 * This currently does not work for entries that 
 	 * overlap 256M BAT segments.
 	 */
 
 	for(i = 0; i < 16; i++)
 		if (moea_bat_mapped(i, pa, size) == 0)
 			return (0);
 
 	return (EFAULT);
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 moea_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 
 	return (moea_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT));
 }
 
 void *
 moea_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma)
 {
 	vm_offset_t va, tmpva, ppa, offset;
 	int i;
 
 	ppa = trunc_page(pa);
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	
 	/*
 	 * If the physical address lies within a valid BAT table entry,
 	 * return the 1:1 mapping. This currently doesn't work
 	 * for regions that overlap 256M BAT segments.
 	 */
 	for (i = 0; i < 16; i++) {
 		if (moea_bat_mapped(i, pa, size) == 0)
 			return ((void *) pa);
 	}
 
 	va = kmem_alloc_nofault(kernel_map, size);
 	if (!va)
 		panic("moea_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpva = va; size > 0;) {
 		moea_kenter_attr(mmu, tmpva, ppa, ma);
 		tlbie(tmpva);
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		ppa += PAGE_SIZE;
 	}
 
 	return ((void *)(va + offset));
 }
 
 void
 moea_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset;
 
 	/*
 	 * If this is outside kernel virtual space, then it's a
 	 * battable entry and doesn't require unmapping
 	 */
 	if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= virtual_end)) {
 		base = trunc_page(va);
 		offset = va & PAGE_MASK;
 		size = roundup(offset + size, PAGE_SIZE);
 		kmem_free(kernel_map, base, size);
 	}
 }
 
 static void
 moea_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 	struct pvo_entry *pvo;
 	vm_offset_t lim;
 	vm_paddr_t pa;
 	vm_size_t len;
 
 	PMAP_LOCK(pm);
 	while (sz > 0) {
 		lim = round_page(va);
 		len = MIN(lim - va, sz);
 		pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL);
 		if (pvo != NULL) {
 			pa = (pvo->pvo_pte.pte.pte_lo & PTE_RPGN) |
 			    (va & ADDR_POFF);
 			moea_syncicache(pa, len);
 		}
 		va += len;
 		sz -= len;
 	}
 	PMAP_UNLOCK(pm);
 }
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 00dab9bd883b..0d77b576cffd 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -1,2526 +1,2525 @@
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
  */
 /*-
  * Copyright (C) 2001 Benno Rice.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Manages physical address maps.
  *
  * Since the information managed by this module is also stored by the
  * logical address mapping module, this module may throw away valid virtual
  * to physical mappings at almost any time.  However, invalidations of
  * mappings must be done as requested.
  *
  * In order to cope with hardware architectures which make virtual to
  * physical map invalidates expensive, this module may delay invalidate
  * reduced protection operations until such time as they are actually
  * necessary.  This module is given full information as to which processors
  * are currently using which maps, and to when physical maps must be made
  * correct.
  */
 
 #include "opt_compat.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/cpuset.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <sys/kdb.h>
 
 #include <dev/ofw/openfirm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/_inttypes.h>
 #include <machine/cpu.h>
 #include <machine/platform.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/psl.h>
 #include <machine/bat.h>
 #include <machine/hid.h>
 #include <machine/pte.h>
 #include <machine/sr.h>
 #include <machine/trap.h>
 #include <machine/mmuvar.h>
 
 #include "mmu_oea64.h"
 #include "mmu_if.h"
 #include "moea64_if.h"
 
 void moea64_release_vsid(uint64_t vsid);
 uintptr_t moea64_get_unique_vsid(void); 
 
 #define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR)
 #define ENABLE_TRANS(msr)	mtmsr(msr)
 
 #define	VSID_MAKE(sr, hash)	((sr) | (((hash) & 0xfffff) << 4))
 #define	VSID_TO_HASH(vsid)	(((vsid) >> 4) & 0xfffff)
 #define	VSID_HASH_MASK		0x0000007fffffffffULL
 
 /*
  * Locking semantics:
  * -- Read lock: if no modifications are being made to either the PVO lists
  *    or page table or if any modifications being made result in internal
  *    changes (e.g. wiring, protection) such that the existence of the PVOs
  *    is unchanged and they remain associated with the same pmap (in which
  *    case the changes should be protected by the pmap lock)
  * -- Write lock: required if PTEs/PVOs are being inserted or removed.
  */
 
 #define LOCK_TABLE_RD() rw_rlock(&moea64_table_lock)
 #define UNLOCK_TABLE_RD() rw_runlock(&moea64_table_lock)
 #define LOCK_TABLE_WR() rw_wlock(&moea64_table_lock)
 #define UNLOCK_TABLE_WR() rw_wunlock(&moea64_table_lock)
 
 struct ofw_map {
 	cell_t	om_va;
 	cell_t	om_len;
 	cell_t	om_pa_hi;
 	cell_t	om_pa_lo;
 	cell_t	om_mode;
 };
 
 /*
  * Map of physical memory regions.
  */
 static struct	mem_region *regions;
 static struct	mem_region *pregions;
 static u_int	phys_avail_count;
 static int	regions_sz, pregions_sz;
 
 extern void bs_remap_earlyboot(void);
 
 /*
  * Lock for the pteg and pvo tables.
  */
 struct rwlock	moea64_table_lock;
 struct mtx	moea64_slb_mutex;
 
 /*
  * PTEG data.
  */
 u_int		moea64_pteg_count;
 u_int		moea64_pteg_mask;
 
 /*
  * PVO data.
  */
 struct	pvo_head *moea64_pvo_table;		/* pvo entries by pteg index */
 
 uma_zone_t	moea64_upvo_zone; /* zone for pvo entries for unmanaged pages */
 uma_zone_t	moea64_mpvo_zone; /* zone for pvo entries for managed pages */
 
 #define	BPVO_POOL_SIZE	327680
 static struct	pvo_entry *moea64_bpvo_pool;
 static int	moea64_bpvo_pool_index = 0;
 
 #define	VSID_NBPW	(sizeof(u_int32_t) * 8)
 #ifdef __powerpc64__
 #define	NVSIDS		(NPMAPS * 16)
 #define VSID_HASHMASK	0xffffffffUL
 #else
 #define NVSIDS		NPMAPS
 #define VSID_HASHMASK	0xfffffUL
 #endif
 static u_int	moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
 
 static boolean_t moea64_initialized = FALSE;
 
 /*
  * Statistics.
  */
 u_int	moea64_pte_valid = 0;
 u_int	moea64_pte_overflow = 0;
 u_int	moea64_pvo_entries = 0;
 u_int	moea64_pvo_enter_calls = 0;
 u_int	moea64_pvo_remove_calls = 0;
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD, 
     &moea64_pte_valid, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
     &moea64_pte_overflow, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD, 
     &moea64_pvo_entries, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
     &moea64_pvo_enter_calls, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
     &moea64_pvo_remove_calls, 0, "");
 
 vm_offset_t	moea64_scratchpage_va[2];
 struct pvo_entry *moea64_scratchpage_pvo[2];
 uintptr_t	moea64_scratchpage_pte[2];
 struct	mtx	moea64_scratchpage_mtx;
 
 uint64_t 	moea64_large_page_mask = 0;
 int		moea64_large_page_size = 0;
 int		moea64_large_page_shift = 0;
 
 /*
  * PVO calls.
  */
 static int	moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *,
 		    vm_offset_t, vm_offset_t, uint64_t, int);
 static void	moea64_pvo_remove(mmu_t, struct pvo_entry *);
 static struct	pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
 
 /*
  * Utility routines.
  */
 static boolean_t	moea64_query_bit(mmu_t, vm_page_t, u_int64_t);
 static u_int		moea64_clear_bit(mmu_t, vm_page_t, u_int64_t);
 static void		moea64_kremove(mmu_t, vm_offset_t);
 static void		moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, 
 			    vm_offset_t pa, vm_size_t sz);
 
 /*
  * Kernel MMU interface
  */
 void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t);
 void moea64_clear_modify(mmu_t, vm_page_t);
 void moea64_clear_reference(mmu_t, vm_page_t);
 void moea64_copy_page(mmu_t, vm_page_t, vm_page_t);
 void moea64_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t, boolean_t);
 void moea64_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
     vm_prot_t);
 void moea64_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
 vm_paddr_t moea64_extract(mmu_t, pmap_t, vm_offset_t);
 vm_page_t moea64_extract_and_hold(mmu_t, pmap_t, vm_offset_t, vm_prot_t);
 void moea64_init(mmu_t);
 boolean_t moea64_is_modified(mmu_t, vm_page_t);
 boolean_t moea64_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
 boolean_t moea64_is_referenced(mmu_t, vm_page_t);
 int moea64_ts_referenced(mmu_t, vm_page_t);
 vm_offset_t moea64_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
 boolean_t moea64_page_exists_quick(mmu_t, pmap_t, vm_page_t);
 int moea64_page_wired_mappings(mmu_t, vm_page_t);
 void moea64_pinit(mmu_t, pmap_t);
 void moea64_pinit0(mmu_t, pmap_t);
 void moea64_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
 void moea64_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
 void moea64_qremove(mmu_t, vm_offset_t, int);
 void moea64_release(mmu_t, pmap_t);
 void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 void moea64_remove_pages(mmu_t, pmap_t);
 void moea64_remove_all(mmu_t, vm_page_t);
 void moea64_remove_write(mmu_t, vm_page_t);
 void moea64_zero_page(mmu_t, vm_page_t);
 void moea64_zero_page_area(mmu_t, vm_page_t, int, int);
 void moea64_zero_page_idle(mmu_t, vm_page_t);
 void moea64_activate(mmu_t, struct thread *);
 void moea64_deactivate(mmu_t, struct thread *);
 void *moea64_mapdev(mmu_t, vm_paddr_t, vm_size_t);
 void *moea64_mapdev_attr(mmu_t, vm_offset_t, vm_size_t, vm_memattr_t);
 void moea64_unmapdev(mmu_t, vm_offset_t, vm_size_t);
 vm_paddr_t moea64_kextract(mmu_t, vm_offset_t);
 void moea64_page_set_memattr(mmu_t, vm_page_t m, vm_memattr_t ma);
 void moea64_kenter_attr(mmu_t, vm_offset_t, vm_offset_t, vm_memattr_t ma);
 void moea64_kenter(mmu_t, vm_offset_t, vm_paddr_t);
 boolean_t moea64_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
 static void moea64_sync_icache(mmu_t, pmap_t, vm_offset_t, vm_size_t);
 
 static mmu_method_t moea64_methods[] = {
 	MMUMETHOD(mmu_change_wiring,	moea64_change_wiring),
 	MMUMETHOD(mmu_clear_modify,	moea64_clear_modify),
 	MMUMETHOD(mmu_clear_reference,	moea64_clear_reference),
 	MMUMETHOD(mmu_copy_page,	moea64_copy_page),
 	MMUMETHOD(mmu_enter,		moea64_enter),
 	MMUMETHOD(mmu_enter_object,	moea64_enter_object),
 	MMUMETHOD(mmu_enter_quick,	moea64_enter_quick),
 	MMUMETHOD(mmu_extract,		moea64_extract),
 	MMUMETHOD(mmu_extract_and_hold,	moea64_extract_and_hold),
 	MMUMETHOD(mmu_init,		moea64_init),
 	MMUMETHOD(mmu_is_modified,	moea64_is_modified),
 	MMUMETHOD(mmu_is_prefaultable,	moea64_is_prefaultable),
 	MMUMETHOD(mmu_is_referenced,	moea64_is_referenced),
 	MMUMETHOD(mmu_ts_referenced,	moea64_ts_referenced),
 	MMUMETHOD(mmu_map,     		moea64_map),
 	MMUMETHOD(mmu_page_exists_quick,moea64_page_exists_quick),
 	MMUMETHOD(mmu_page_wired_mappings,moea64_page_wired_mappings),
 	MMUMETHOD(mmu_pinit,		moea64_pinit),
 	MMUMETHOD(mmu_pinit0,		moea64_pinit0),
 	MMUMETHOD(mmu_protect,		moea64_protect),
 	MMUMETHOD(mmu_qenter,		moea64_qenter),
 	MMUMETHOD(mmu_qremove,		moea64_qremove),
 	MMUMETHOD(mmu_release,		moea64_release),
 	MMUMETHOD(mmu_remove,		moea64_remove),
 	MMUMETHOD(mmu_remove_pages,	moea64_remove_pages),
 	MMUMETHOD(mmu_remove_all,      	moea64_remove_all),
 	MMUMETHOD(mmu_remove_write,	moea64_remove_write),
 	MMUMETHOD(mmu_sync_icache,	moea64_sync_icache),
 	MMUMETHOD(mmu_zero_page,       	moea64_zero_page),
 	MMUMETHOD(mmu_zero_page_area,	moea64_zero_page_area),
 	MMUMETHOD(mmu_zero_page_idle,	moea64_zero_page_idle),
 	MMUMETHOD(mmu_activate,		moea64_activate),
 	MMUMETHOD(mmu_deactivate,      	moea64_deactivate),
 	MMUMETHOD(mmu_page_set_memattr,	moea64_page_set_memattr),
 
 	/* Internal interfaces */
 	MMUMETHOD(mmu_mapdev,		moea64_mapdev),
 	MMUMETHOD(mmu_mapdev_attr,	moea64_mapdev_attr),
 	MMUMETHOD(mmu_unmapdev,		moea64_unmapdev),
 	MMUMETHOD(mmu_kextract,		moea64_kextract),
 	MMUMETHOD(mmu_kenter,		moea64_kenter),
 	MMUMETHOD(mmu_kenter_attr,	moea64_kenter_attr),
 	MMUMETHOD(mmu_dev_direct_mapped,moea64_dev_direct_mapped),
 
 	{ 0, 0 }
 };
 
 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0);
 
 static __inline u_int
 va_to_pteg(uint64_t vsid, vm_offset_t addr, int large)
 {
 	uint64_t hash;
 	int shift;
 
 	shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT;
 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >>
 	    shift);
 	return (hash & moea64_pteg_mask);
 }
 
 static __inline struct pvo_head *
 vm_page_to_pvoh(vm_page_t m)
 {
 
 	return (&m->md.mdpg_pvoh);
 }
 
 static __inline void
 moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, 
     uint64_t pte_lo, int flags)
 {
 
 	/*
 	 * Construct a PTE.  Default to IMB initially.  Valid bit only gets
 	 * set when the real pte is set in memory.
 	 *
 	 * Note: Don't set the valid bit for correct operation of tlb update.
 	 */
 	pt->pte_hi = (vsid << LPTE_VSID_SHIFT) |
 	    (((uint64_t)(va & ADDR_PIDX) >> ADDR_API_SHFT64) & LPTE_API);
 
 	if (flags & PVO_LARGE)
 		pt->pte_hi |= LPTE_BIG;
 
 	pt->pte_lo = pte_lo;
 }
 
 static __inline uint64_t
 moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma)
 {
 	uint64_t pte_lo;
 	int i;
 
 	if (ma != VM_MEMATTR_DEFAULT) {
 		switch (ma) {
 		case VM_MEMATTR_UNCACHEABLE:
 			return (LPTE_I | LPTE_G);
 		case VM_MEMATTR_WRITE_COMBINING:
 		case VM_MEMATTR_WRITE_BACK:
 		case VM_MEMATTR_PREFETCHABLE:
 			return (LPTE_I);
 		case VM_MEMATTR_WRITE_THROUGH:
 			return (LPTE_W | LPTE_M);
 		}
 	}
 
 	/*
 	 * Assume the page is cache inhibited and access is guarded unless
 	 * it's in our available memory array.
 	 */
 	pte_lo = LPTE_I | LPTE_G;
 	for (i = 0; i < pregions_sz; i++) {
 		if ((pa >= pregions[i].mr_start) &&
 		    (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
 			pte_lo &= ~(LPTE_I | LPTE_G);
 			pte_lo |= LPTE_M;
 			break;
 		}
 	}
 
 	return pte_lo;
 }
 
 /*
  * Quick sort callout for comparing memory regions.
  */
 static int	om_cmp(const void *a, const void *b);
 
 static int
 om_cmp(const void *a, const void *b)
 {
 	const struct	ofw_map *mapa;
 	const struct	ofw_map *mapb;
 
 	mapa = a;
 	mapb = b;
 	if (mapa->om_pa_hi < mapb->om_pa_hi)
 		return (-1);
 	else if (mapa->om_pa_hi > mapb->om_pa_hi)
 		return (1);
 	else if (mapa->om_pa_lo < mapb->om_pa_lo)
 		return (-1);
 	else if (mapa->om_pa_lo > mapb->om_pa_lo)
 		return (1);
 	else
 		return (0);
 }
 
 static void
 moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz)
 {
 	struct ofw_map	translations[sz/sizeof(struct ofw_map)];
 	register_t	msr;
 	vm_offset_t	off;
 	vm_paddr_t	pa_base;
 	int		i;
 
 	bzero(translations, sz);
 	if (OF_getprop(mmu, "translations", translations, sz) == -1)
 		panic("moea64_bootstrap: can't get ofw translations");
 
 	CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
 	sz /= sizeof(*translations);
 	qsort(translations, sz, sizeof (*translations), om_cmp);
 
 	for (i = 0; i < sz; i++) {
 		CTR3(KTR_PMAP, "translation: pa=%#x va=%#x len=%#x",
 		    (uint32_t)(translations[i].om_pa_lo), translations[i].om_va,
 		    translations[i].om_len);
 
 		if (translations[i].om_pa_lo % PAGE_SIZE)
 			panic("OFW translation not page-aligned!");
 
 		pa_base = translations[i].om_pa_lo;
 
 	      #ifdef __powerpc64__
 		pa_base += (vm_offset_t)translations[i].om_pa_hi << 32;
 	      #else
 		if (translations[i].om_pa_hi)
 			panic("OFW translations above 32-bit boundary!");
 	      #endif
 
 		/* Now enter the pages for this mapping */
 
 		DISABLE_TRANS(msr);
 		for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
 			if (moea64_pvo_find_va(kernel_pmap,
 			    translations[i].om_va + off) != NULL)
 				continue;
 
 			moea64_kenter(mmup, translations[i].om_va + off,
 			    pa_base + off);
 		}
 		ENABLE_TRANS(msr);
 	}
 }
 
 #ifdef __powerpc64__
 static void
 moea64_probe_large_page(void)
 {
 	uint16_t pvr = mfpvr() >> 16;
 
 	switch (pvr) {
 	case IBM970:
 	case IBM970FX:
 	case IBM970MP:
 		powerpc_sync(); isync();
 		mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
 		powerpc_sync(); isync();
 		
 		/* FALLTHROUGH */
 	case IBMCELLBE:
 		moea64_large_page_size = 0x1000000; /* 16 MB */
 		moea64_large_page_shift = 24;
 		break;
 	default:
 		moea64_large_page_size = 0;
 	}
 
 	moea64_large_page_mask = moea64_large_page_size - 1;
 }
 
 static void
 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
 {
 	struct slb *cache;
 	struct slb entry;
 	uint64_t esid, slbe;
 	uint64_t i;
 
 	cache = PCPU_GET(slb);
 	esid = va >> ADDR_SR_SHFT;
 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 
 	for (i = 0; i < 64; i++) {
 		if (cache[i].slbe == (slbe | i))
 			return;
 	}
 
 	entry.slbe = slbe;
 	entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
 	if (large)
 		entry.slbv |= SLBV_L;
 
 	slb_insert_kernel(entry.slbe, entry.slbv);
 }
 #endif
 
 static void
 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
     vm_offset_t kernelend)
 {
 	register_t msr;
 	vm_paddr_t pa;
 	vm_offset_t size, off;
 	uint64_t pte_lo;
 	int i;
 
 	if (moea64_large_page_size == 0) 
 		hw_direct_map = 0;
 
 	DISABLE_TRANS(msr);
 	if (hw_direct_map) {
 		LOCK_TABLE_WR();
 		PMAP_LOCK(kernel_pmap);
 		for (i = 0; i < pregions_sz; i++) {
 		  for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
 		     pregions[i].mr_size; pa += moea64_large_page_size) {
 			pte_lo = LPTE_M;
 
 			/*
 			 * Set memory access as guarded if prefetch within
 			 * the page could exit the available physmem area.
 			 */
 			if (pa & moea64_large_page_mask) {
 				pa &= moea64_large_page_mask;
 				pte_lo |= LPTE_G;
 			}
 			if (pa + moea64_large_page_size >
 			    pregions[i].mr_start + pregions[i].mr_size)
 				pte_lo |= LPTE_G;
 
 			moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone,
 				    NULL, pa, pa, pte_lo,
 				    PVO_WIRED | PVO_LARGE);
 		  }
 		}
 		PMAP_UNLOCK(kernel_pmap);
 		UNLOCK_TABLE_WR();
 	} else {
 		size = sizeof(struct pvo_head) * moea64_pteg_count;
 		off = (vm_offset_t)(moea64_pvo_table);
 		for (pa = off; pa < off + size; pa += PAGE_SIZE) 
 			moea64_kenter(mmup, pa, pa);
 		size = BPVO_POOL_SIZE*sizeof(struct pvo_entry);
 		off = (vm_offset_t)(moea64_bpvo_pool);
 		for (pa = off; pa < off + size; pa += PAGE_SIZE) 
 		moea64_kenter(mmup, pa, pa);
 
 		/*
 		 * Map certain important things, like ourselves.
 		 *
 		 * NOTE: We do not map the exception vector space. That code is
 		 * used only in real mode, and leaving it unmapped allows us to
 		 * catch NULL pointer deferences, instead of making NULL a valid
 		 * address.
 		 */
 
 		for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
 		    pa += PAGE_SIZE) 
 			moea64_kenter(mmup, pa, pa);
 	}
 	ENABLE_TRANS(msr);
 }
 
 void
 moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	int		i, j;
 	vm_size_t	physsz, hwphyssz;
 
 #ifndef __powerpc64__
 	/* We don't have a direct map since there is no BAT */
 	hw_direct_map = 0;
 
 	/* Make sure battable is zero, since we have no BAT */
 	for (i = 0; i < 16; i++) {
 		battable[i].batu = 0;
 		battable[i].batl = 0;
 	}
 #else
 	moea64_probe_large_page();
 
 	/* Use a direct map if we have large page support */
 	if (moea64_large_page_size > 0)
 		hw_direct_map = 1;
 	else
 		hw_direct_map = 0;
 #endif
 
 	/* Get physical memory regions from firmware */
 	mem_regions(&pregions, &pregions_sz, &regions, &regions_sz);
 	CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
 
 	if (sizeof(phys_avail)/sizeof(phys_avail[0]) < regions_sz)
 		panic("moea64_bootstrap: phys_avail too small");
 
 	phys_avail_count = 0;
 	physsz = 0;
 	hwphyssz = 0;
 	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 	for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
 		CTR3(KTR_PMAP, "region: %#x - %#x (%#x)", regions[i].mr_start,
 		    regions[i].mr_start + regions[i].mr_size,
 		    regions[i].mr_size);
 		if (hwphyssz != 0 &&
 		    (physsz + regions[i].mr_size) >= hwphyssz) {
 			if (physsz < hwphyssz) {
 				phys_avail[j] = regions[i].mr_start;
 				phys_avail[j + 1] = regions[i].mr_start +
 				    hwphyssz - physsz;
 				physsz = hwphyssz;
 				phys_avail_count++;
 			}
 			break;
 		}
 		phys_avail[j] = regions[i].mr_start;
 		phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
 		phys_avail_count++;
 		physsz += regions[i].mr_size;
 	}
 
 	/* Check for overlap with the kernel and exception vectors */
 	for (j = 0; j < 2*phys_avail_count; j+=2) {
 		if (phys_avail[j] < EXC_LAST)
 			phys_avail[j] += EXC_LAST;
 
 		if (kernelstart >= phys_avail[j] &&
 		    kernelstart < phys_avail[j+1]) {
 			if (kernelend < phys_avail[j+1]) {
 				phys_avail[2*phys_avail_count] =
 				    (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 				phys_avail[2*phys_avail_count + 1] =
 				    phys_avail[j+1];
 				phys_avail_count++;
 			}
 
 			phys_avail[j+1] = kernelstart & ~PAGE_MASK;
 		}
 
 		if (kernelend >= phys_avail[j] &&
 		    kernelend < phys_avail[j+1]) {
 			if (kernelstart > phys_avail[j]) {
 				phys_avail[2*phys_avail_count] = phys_avail[j];
 				phys_avail[2*phys_avail_count + 1] =
 				    kernelstart & ~PAGE_MASK;
 				phys_avail_count++;
 			}
 
 			phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE;
 		}
 	}
 
 	physmem = btoc(physsz);
 
 #ifdef PTEGCOUNT
 	moea64_pteg_count = PTEGCOUNT;
 #else
 	moea64_pteg_count = 0x1000;
 
 	while (moea64_pteg_count < physmem)
 		moea64_pteg_count <<= 1;
 
 	moea64_pteg_count >>= 1;
 #endif /* PTEGCOUNT */
 }
 
 void
 moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	vm_size_t	size;
 	register_t	msr;
 	int		i;
 
 	/*
 	 * Set PTEG mask
 	 */
 	moea64_pteg_mask = moea64_pteg_count - 1;
 
 	/*
 	 * Allocate pv/overflow lists.
 	 */
 	size = sizeof(struct pvo_head) * moea64_pteg_count;
 
 	moea64_pvo_table = (struct pvo_head *)moea64_bootstrap_alloc(size,
 	    PAGE_SIZE);
 	CTR1(KTR_PMAP, "moea64_bootstrap: PVO table at %p", moea64_pvo_table);
 
 	DISABLE_TRANS(msr);
 	for (i = 0; i < moea64_pteg_count; i++)
 		LIST_INIT(&moea64_pvo_table[i]);
 	ENABLE_TRANS(msr);
 
 	/*
 	 * Initialize the lock that synchronizes access to the pteg and pvo
 	 * tables.
 	 */
 	rw_init_flags(&moea64_table_lock, "pmap tables", RW_RECURSE);
 	mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
 
 	/*
 	 * Initialise the unmanaged pvo pool.
 	 */
 	moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
 		BPVO_POOL_SIZE*sizeof(struct pvo_entry), 0);
 	moea64_bpvo_pool_index = 0;
 
 	/*
 	 * Make sure kernel vsid is allocated as well as VSID 0.
 	 */
 	#ifndef __powerpc64__
 	moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
 		|= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
 	moea64_vsid_bitmap[0] |= 1;
 	#endif
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	#ifdef __powerpc64__
 	for (i = 0; i < 64; i++) {
 		pcpup->pc_slb[i].slbv = 0;
 		pcpup->pc_slb[i].slbe = 0;
 	}
 	#else
 	for (i = 0; i < 16; i++) 
 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
 	#endif
 
 	kernel_pmap->pmap_phys = kernel_pmap;
 	CPU_FILL(&kernel_pmap->pm_active);
 	RB_INIT(&kernel_pmap->pmap_pvo);
 
 	PMAP_LOCK_INIT(kernel_pmap);
 
 	/*
 	 * Now map in all the other buffers we allocated earlier
 	 */
 
 	moea64_setup_direct_map(mmup, kernelstart, kernelend);
 }
 
 void
 moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	ihandle_t	mmui;
 	phandle_t	chosen;
 	phandle_t	mmu;
 	size_t		sz;
 	int		i;
 	vm_offset_t	pa, va;
 	void		*dpcpu;
 
 	/*
 	 * Set up the Open Firmware pmap and add its mappings if not in real
 	 * mode.
 	 */
 
 	chosen = OF_finddevice("/chosen");
 	if (chosen != -1 && OF_getprop(chosen, "mmu", &mmui, 4) != -1) {
 	    mmu = OF_instance_to_package(mmui);
 	    if (mmu == -1 || (sz = OF_getproplen(mmu, "translations")) == -1)
 		sz = 0;
 	    if (sz > 6144 /* tmpstksz - 2 KB headroom */)
 		panic("moea64_bootstrap: too many ofw translations");
 
 	    if (sz > 0)
 		moea64_add_ofw_mappings(mmup, mmu, sz);
 	}
 
 	/*
 	 * Calculate the last available physical address.
 	 */
 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
 		;
 	Maxmem = powerpc_btop(phys_avail[i + 1]);
 
 	/*
 	 * Initialize MMU and remap early physical mappings
 	 */
 	MMU_CPU_BOOTSTRAP(mmup,0);
 	mtmsr(mfmsr() | PSL_DR | PSL_IR);
 	pmap_bootstrapped++;
 	bs_remap_earlyboot();
 
 	/*
 	 * Set the start and end of kva.
 	 */
 	virtual_avail = VM_MIN_KERNEL_ADDRESS;
 	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 
 
 	/*
 	 * Map the entire KVA range into the SLB. We must not fault there.
 	 */
 	#ifdef __powerpc64__
 	for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
 		moea64_bootstrap_slb_prefault(va, 0);
 	#endif
 
 	/*
 	 * Figure out how far we can extend virtual_end into segment 16
 	 * without running into existing mappings. Segment 16 is guaranteed
 	 * to contain neither RAM nor devices (at least on Apple hardware),
 	 * but will generally contain some OFW mappings we should not
 	 * step on.
 	 */
 
 	#ifndef __powerpc64__	/* KVA is in high memory on PPC64 */
 	PMAP_LOCK(kernel_pmap);
 	while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
 	    moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
 		virtual_end += PAGE_SIZE;
 	PMAP_UNLOCK(kernel_pmap);
 	#endif
 
 	/*
 	 * Allocate a kernel stack with a guard page for thread0 and map it
 	 * into the kernel page map.
 	 */
 	pa = moea64_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
 	va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 	virtual_avail = va + KSTACK_PAGES * PAGE_SIZE;
 	CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
 	thread0.td_kstack = va;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	for (i = 0; i < KSTACK_PAGES; i++) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the message buffer.
 	 */
 	pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
 	msgbufp = (struct msgbuf *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += round_page(msgbufsize);
 	while (va < virtual_avail) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 
 	/*
 	 * Allocate virtual address space for the dynamic percpu area.
 	 */
 	pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
 	dpcpu = (void *)virtual_avail;
 	va = virtual_avail;
 	virtual_avail += DPCPU_SIZE;
 	while (va < virtual_avail) {
 		moea64_kenter(mmup, va, pa);
 		pa += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
 	dpcpu_init(dpcpu, 0);
 
 	/*
 	 * Allocate some things for page zeroing. We put this directly
 	 * in the page table, marked with LPTE_LOCKED, to avoid any
 	 * of the PVO book-keeping or other parts of the VM system
 	 * from even knowing that this hack exists.
 	 */
 
 	if (!hw_direct_map) {
 		mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
 		    MTX_DEF);
 		for (i = 0; i < 2; i++) {
 			moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
 			virtual_end -= PAGE_SIZE;
 
 			moea64_kenter(mmup, moea64_scratchpage_va[i], 0);
 
 			moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
 			    kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
 			LOCK_TABLE_RD();
 			moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE(
 			    mmup, moea64_scratchpage_pvo[i]);
 			moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi
 			    |= LPTE_LOCKED;
 			MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i],
 			    &moea64_scratchpage_pvo[i]->pvo_pte.lpte,
 			    moea64_scratchpage_pvo[i]->pvo_vpn);
 			UNLOCK_TABLE_RD();
 		}
 	}
 }
 
 /*
  * Activate a user pmap.  The pmap must be activated before its address
  * space can be accessed in any way.
  */
 void
 moea64_activate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
 
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, pm->pm_slb);
 	#else
 	PCPU_SET(curpmap, pm->pmap_phys);
 	#endif
 }
 
 void
 moea64_deactivate(mmu_t mmu, struct thread *td)
 {
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, NULL);
 	#else
 	PCPU_SET(curpmap, NULL);
 	#endif
 }
 
 void
 moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired)
 {
 	struct	pvo_entry *pvo;
 	uintptr_t pt;
 	uint64_t vsid;
 	int	i, ptegidx;
 
 	LOCK_TABLE_WR();
 	PMAP_LOCK(pm);
 	pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
 
 	if (pvo != NULL) {
 		pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 
 		if (wired) {
 			if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
 				pm->pm_stats.wired_count++;
 			pvo->pvo_vaddr |= PVO_WIRED;
 			pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED;
 		} else {
 			if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
 				pm->pm_stats.wired_count--;
 			pvo->pvo_vaddr &= ~PVO_WIRED;
 			pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED;
 		}
 
 		if (pt != -1) {
 			/* Update wiring flag in page table. */
 			MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte,
 			    pvo->pvo_vpn);
 		} else if (wired) {
 			/*
 			 * If we are wiring the page, and it wasn't in the
 			 * page table before, add it.
 			 */
 			vsid = PVO_VSID(pvo);
 			ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo),
 			    pvo->pvo_vaddr & PVO_LARGE);
 
 			i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte);
 			
 			if (i >= 0) {
 				PVO_PTEGIDX_CLR(pvo);
 				PVO_PTEGIDX_SET(pvo, i);
 			}
 		}
 			
 	}
 	UNLOCK_TABLE_WR();
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * This goes through and sets the physical address of our
  * special scratch PTE to the PA we want to zero or copy. Because
  * of locking issues (this can get called in pvo_enter() by
  * the UMA allocator), we can't use most other utility functions here
  */
 
 static __inline
 void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) {
 
 	KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
 	mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
 
 	moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &=
 	    ~(LPTE_WIMG | LPTE_RPGN);
 	moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |=
 	    moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
 	MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which],
 	    &moea64_scratchpage_pvo[which]->pvo_pte.lpte,
 	    moea64_scratchpage_pvo[which]->pvo_vpn);
 	isync();
 }
 
 void
 moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t	dst;
 	vm_offset_t	src;
 
 	dst = VM_PAGE_TO_PHYS(mdst);
 	src = VM_PAGE_TO_PHYS(msrc);
 
 	if (hw_direct_map) {
 		bcopy((void *)src, (void *)dst, PAGE_SIZE);
 	} else {
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 0, src);
 		moea64_set_scratchpage_pa(mmu, 1, dst);
 
 		bcopy((void *)moea64_scratchpage_va[0], 
 		    (void *)moea64_scratchpage_va[1], PAGE_SIZE);
 
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 void
 moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 {
 	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
 
 	if (size + off > PAGE_SIZE)
 		panic("moea64_zero_page: size + off > PAGE_SIZE");
 
 	if (hw_direct_map) {
 		bzero((caddr_t)pa + off, size);
 	} else {
 		mtx_lock(&moea64_scratchpage_mtx);
 		moea64_set_scratchpage_pa(mmu, 0, pa);
 		bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 /*
  * Zero a page of physical memory by temporarily mapping it
  */
 void
 moea64_zero_page(mmu_t mmu, vm_page_t m)
 {
 	vm_offset_t pa = VM_PAGE_TO_PHYS(m);
 	vm_offset_t va, off;
 
 	if (!hw_direct_map) {
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 0, pa);
 		va = moea64_scratchpage_va[0];
 	} else {
 		va = pa;
 	}
 
 	for (off = 0; off < PAGE_SIZE; off += cacheline_size)
 		__asm __volatile("dcbz 0,%0" :: "r"(va + off));
 
 	if (!hw_direct_map)
 		mtx_unlock(&moea64_scratchpage_mtx);
 }
 
 void
 moea64_zero_page_idle(mmu_t mmu, vm_page_t m)
 {
 
 	moea64_zero_page(mmu, m);
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  */
 
 void
 moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, 
     vm_prot_t prot, boolean_t wired)
 {
 	struct		pvo_head *pvo_head;
 	uma_zone_t	zone;
 	vm_page_t	pg;
 	uint64_t	pte_lo;
 	u_int		pvo_flags;
 	int		error;
 
 	if (!moea64_initialized) {
 		pvo_head = NULL;
 		pg = NULL;
 		zone = moea64_upvo_zone;
 		pvo_flags = 0;
 	} else {
 		pvo_head = vm_page_to_pvoh(m);
 		pg = m;
 		zone = moea64_mpvo_zone;
 		pvo_flags = PVO_MANAGED;
 	}
 
 	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
 	    VM_OBJECT_LOCKED(m->object),
 	    ("moea64_enter: page %p is not busy", m));
 
 	/* XXX change the pvo head for fake pages */
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		pvo_flags &= ~PVO_MANAGED;
 		pvo_head = NULL;
 		zone = moea64_upvo_zone;
 	}
 
 	pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
 
 	if (prot & VM_PROT_WRITE) {
 		pte_lo |= LPTE_BW;
 		if (pmap_bootstrapped &&
 		    (m->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	} else
 		pte_lo |= LPTE_BR;
 
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pte_lo |= LPTE_NOEXEC;
 
 	if (wired)
 		pvo_flags |= PVO_WIRED;
 
 	LOCK_TABLE_WR();
 	PMAP_LOCK(pmap);
 	error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va,
 	    VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags);
 	PMAP_UNLOCK(pmap);
 	UNLOCK_TABLE_WR();
 
 	/*
 	 * Flush the page from the instruction cache if this page is
 	 * mapped executable and cacheable.
 	 */
 	if (pmap != kernel_pmap && !(m->aflags & PGA_EXECUTABLE) &&
 	    (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 		vm_page_aflag_set(m, PGA_EXECUTABLE);
 		moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
 	}
 }
 
 static void
 moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa,
     vm_size_t sz)
 {
 
 	/*
 	 * This is much trickier than on older systems because
 	 * we can't sync the icache on physical addresses directly
 	 * without a direct map. Instead we check a couple of cases
 	 * where the memory is already mapped in and, failing that,
 	 * use the same trick we use for page zeroing to create
 	 * a temporary mapping for this physical address.
 	 */
 
 	if (!pmap_bootstrapped) {
 		/*
 		 * If PMAP is not bootstrapped, we are likely to be
 		 * in real mode.
 		 */
 		__syncicache((void *)pa, sz);
 	} else if (pmap == kernel_pmap) {
 		__syncicache((void *)va, sz);
 	} else if (hw_direct_map) {
 		__syncicache((void *)pa, sz);
 	} else {
 		/* Use the scratch page to set up a temp mapping */
 
 		mtx_lock(&moea64_scratchpage_mtx);
 
 		moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF);
 		__syncicache((void *)(moea64_scratchpage_va[1] + 
 		    (va & ADDR_POFF)), sz);
 
 		mtx_unlock(&moea64_scratchpage_mtx);
 	}
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
 
 	psize = atop(end - start);
 	m = m_start;
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		moea64_enter(mmu, pm, start + ptoa(diff), m, prot &
 		    (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
 		m = TAILQ_NEXT(m, listq);
 	}
 }
 
 void
 moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m,
     vm_prot_t prot)
 {
 
 	moea64_enter(mmu, pm, va, m,
 	    prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE);
 }
 
 vm_paddr_t
 moea64_extract(mmu_t mmu, pmap_t pm, vm_offset_t va)
 {
 	struct	pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	PMAP_LOCK(pm);
 	pvo = moea64_pvo_find_va(pm, va);
 	if (pvo == NULL)
 		pa = 0;
 	else
 		pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) |
 		    (va - PVO_VADDR(pvo));
 	PMAP_UNLOCK(pm);
 	return (pa);
 }
 
 /*
  * Atomically extract and hold the physical page with the given
  * pmap and virtual address pair if that mapping permits the given
  * protection.
  */
 vm_page_t
 moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	struct	pvo_entry *pvo;
 	vm_page_t m;
         vm_paddr_t pa;
         
 	m = NULL;
 	pa = 0;
 	PMAP_LOCK(pmap);
 retry:
 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
 	if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) &&
 	    ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW ||
 	     (prot & VM_PROT_WRITE) == 0)) {
 		if (vm_page_pa_tryrelock(pmap,
 			pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa))
 			goto retry;
 		m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
 		vm_page_hold(m);
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 static mmu_t installed_mmu;
 
 static void *
 moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 
 {
 	/*
 	 * This entire routine is a horrible hack to avoid bothering kmem
 	 * for new KVA addresses. Because this can get called from inside
 	 * kmem allocation routines, calling kmem for a new address here
 	 * can lead to multiply locking non-recursive mutexes.
 	 */
         vm_offset_t va;
 
         vm_page_t m;
         int pflags, needed_lock;
 
 	*flags = UMA_SLAB_PRIV;
 	needed_lock = !PMAP_LOCKED(kernel_pmap);
 	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
 
         for (;;) {
                 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
                 if (m == NULL) {
                         if (wait & M_NOWAIT)
                                 return (NULL);
                         VM_WAIT;
                 } else
                         break;
         }
 
 	va = VM_PAGE_TO_PHYS(m);
 
 	LOCK_TABLE_WR();
 	if (needed_lock)
 		PMAP_LOCK(kernel_pmap);
 
 	moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone,
 	    NULL, va, VM_PAGE_TO_PHYS(m), LPTE_M, PVO_WIRED | PVO_BOOTSTRAP);
 
 	if (needed_lock)
 		PMAP_UNLOCK(kernel_pmap);
 	UNLOCK_TABLE_WR();
 	
 	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
                 bzero((void *)va, PAGE_SIZE);
 
 	return (void *)va;
 }
 
 extern int elf32_nxstack;
 
 void
 moea64_init(mmu_t mmu)
 {
 
 	CTR0(KTR_PMAP, "moea64_init");
 
 	moea64_upvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	moea64_mpvo_zone = uma_zcreate("MPVO entry", sizeof(struct pvo_entry),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 
 	if (!hw_direct_map) {
 		installed_mmu = mmu;
 		uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc);
 		uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc);
 	}
 
 #ifdef COMPAT_FREEBSD32
 	elf32_nxstack = 1;
 #endif
 
 	moea64_initialized = TRUE;
 }
 
 boolean_t
 moea64_is_referenced(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_is_referenced: page %p is not managed", m));
 	return (moea64_query_bit(mmu, m, PTE_REF));
 }
 
 boolean_t
 moea64_is_modified(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have LPTE_CHG set.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (moea64_query_bit(mmu, m, LPTE_CHG));
 }
 
 boolean_t
 moea64_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	struct pvo_entry *pvo;
 	boolean_t rv;
 
 	PMAP_LOCK(pmap);
 	pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
 	rv = pvo == NULL || (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0;
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 void
 moea64_clear_reference(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_clear_reference: page %p is not managed", m));
 	moea64_clear_bit(mmu, m, LPTE_REF);
 }
 
 void
 moea64_clear_modify(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_clear_modify: page %p is not managed", m));
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	KASSERT((m->oflags & VPO_BUSY) == 0,
 	    ("moea64_clear_modify: page %p is busy", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have LPTE_CHG
 	 * set.  If the object containing the page is locked and the page is
 	 * not VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	moea64_clear_bit(mmu, m, LPTE_CHG);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 moea64_remove_write(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo;
 	uintptr_t pt;
 	pmap_t	pmap;
 	uint64_t lo = 0;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
 	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 	if ((m->oflags & VPO_BUSY) == 0 &&
 	    (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	powerpc_sync();
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) {
 			pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 			pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP;
 			pvo->pvo_pte.lpte.pte_lo |= LPTE_BR;
 			if (pt != -1) {
 				MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte);
 				lo |= pvo->pvo_pte.lpte.pte_lo;
 				pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG;
 				MOEA64_PTE_CHANGE(mmu, pt,
 				    &pvo->pvo_pte.lpte, pvo->pvo_vpn);
 				if (pvo->pvo_pmap == kernel_pmap)
 					isync();
 			}
 		}
 		if ((lo & LPTE_CHG) != 0) 
 			vm_page_dirty(m);
 		PMAP_UNLOCK(pmap);
 	}
 	UNLOCK_TABLE_RD();
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 }
 
 /*
  *	moea64_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 moea64_ts_referenced(mmu_t mmu, vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_ts_referenced: page %p is not managed", m));
 	return (moea64_clear_bit(mmu, m, LPTE_REF));
 }
 
 /*
  * Modify the WIMG settings of all mappings for a page.
  */
 void
 moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma)
 {
 	struct	pvo_entry *pvo;
 	struct  pvo_head *pvo_head;
 	uintptr_t pt;
 	pmap_t	pmap;
 	uint64_t lo;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		m->md.mdpg_cache_attrs = ma;
 		return;
 	}
 
 	pvo_head = vm_page_to_pvoh(m);
 	lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, pvo_head, pvo_vlink) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 		pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG;
 		pvo->pvo_pte.lpte.pte_lo |= lo;
 		if (pt != -1) {
 			MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte,
 			    pvo->pvo_vpn);
 			if (pvo->pvo_pmap == kernel_pmap)
 				isync();
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	UNLOCK_TABLE_RD();
 	m->md.mdpg_cache_attrs = ma;
 }
 
 /*
  * Map a wired page into kernel virtual address space.
  */
 void
 moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma)
 {
 	uint64_t	pte_lo;
 	int		error;	
 
 	pte_lo = moea64_calc_wimg(pa, ma);
 
 	LOCK_TABLE_WR();
 	PMAP_LOCK(kernel_pmap);
 	error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone,
 	    NULL, va, pa, pte_lo, PVO_WIRED);
 	PMAP_UNLOCK(kernel_pmap);
 	UNLOCK_TABLE_WR();
 
 	if (error != 0 && error != ENOENT)
 		panic("moea64_kenter: failed to enter va %#zx pa %#zx: %d", va,
 		    pa, error);
 }
 
 void
 moea64_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
 {
 
 	moea64_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
 }
 
 /*
  * Extract the physical page address associated with the given kernel virtual
  * address.
  */
 vm_paddr_t
 moea64_kextract(mmu_t mmu, vm_offset_t va)
 {
 	struct		pvo_entry *pvo;
 	vm_paddr_t pa;
 
 	/*
 	 * Shortcut the direct-mapped case when applicable.  We never put
 	 * anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS.
 	 */
 	if (va < VM_MIN_KERNEL_ADDRESS)
 		return (va);
 
 	PMAP_LOCK(kernel_pmap);
 	pvo = moea64_pvo_find_va(kernel_pmap, va);
 	KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
 	    va));
 	pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) | (va - PVO_VADDR(pvo));
 	PMAP_UNLOCK(kernel_pmap);
 	return (pa);
 }
 
 /*
  * Remove a wired page from kernel virtual address space.
  */
 void
 moea64_kremove(mmu_t mmu, vm_offset_t va)
 {
 	moea64_remove(mmu, kernel_pmap, va, va + PAGE_SIZE);
 }
 
 /*
  * Map a range of physical addresses into kernel virtual address space.
  *
  * The value passed in *virt is a suggested virtual address for the mapping.
  * Architectures which can support a direct-mapped physical to virtual region
  * can return the appropriate address within that region, leaving '*virt'
  * unchanged.  We cannot and therefore do not; *virt is updated with the
  * first usable address after the mapped region.
  */
 vm_offset_t
 moea64_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
     vm_paddr_t pa_end, int prot)
 {
 	vm_offset_t	sva, va;
 
 	sva = *virt;
 	va = sva;
 	for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
 		moea64_kenter(mmu, va, pa_start);
 	*virt = va;
 
 	return (sva);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 moea64_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
 {
         int loops;
 	struct pvo_entry *pvo;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("moea64_page_exists_quick: page %p is not managed", m));
 	loops = 0;
 	rv = FALSE;
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		if (pvo->pvo_pmap == pmap) {
 			rv = TRUE;
 			break;
 		}
 		if (++loops >= 16)
 			break;
 	}
 	UNLOCK_TABLE_RD();
 	return (rv);
 }
 
 /*
  * Return the number of managed mappings to the given physical page
  * that are wired.
  */
 int
 moea64_page_wired_mappings(mmu_t mmu, vm_page_t m)
 {
 	struct pvo_entry *pvo;
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
 		if ((pvo->pvo_vaddr & PVO_WIRED) != 0)
 			count++;
 	UNLOCK_TABLE_RD();
 	return (count);
 }
 
 static uintptr_t	moea64_vsidcontext;
 
 uintptr_t
 moea64_get_unique_vsid(void) {
 	u_int entropy;
 	register_t hash;
 	uint32_t mask;
 	int i;
 
 	entropy = 0;
 	__asm __volatile("mftb %0" : "=r"(entropy));
 
 	mtx_lock(&moea64_slb_mutex);
 	for (i = 0; i < NVSIDS; i += VSID_NBPW) {
 		u_int	n;
 
 		/*
 		 * Create a new value by mutiplying by a prime and adding in
 		 * entropy from the timebase register.  This is to make the
 		 * VSID more random so that the PT hash function collides
 		 * less often.  (Note that the prime casues gcc to do shifts
 		 * instead of a multiply.)
 		 */
 		moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
 		hash = moea64_vsidcontext & (NVSIDS - 1);
 		if (hash == 0)		/* 0 is special, avoid it */
 			continue;
 		n = hash >> 5;
 		mask = 1 << (hash & (VSID_NBPW - 1));
 		hash = (moea64_vsidcontext & VSID_HASHMASK);
 		if (moea64_vsid_bitmap[n] & mask) {	/* collision? */
 			/* anything free in this bucket? */
 			if (moea64_vsid_bitmap[n] == 0xffffffff) {
 				entropy = (moea64_vsidcontext >> 20);
 				continue;
 			}
 			i = ffs(~moea64_vsid_bitmap[n]) - 1;
 			mask = 1 << i;
 			hash &= VSID_HASHMASK & ~(VSID_NBPW - 1);
 			hash |= i;
 		}
 		KASSERT(!(moea64_vsid_bitmap[n] & mask),
 		    ("Allocating in-use VSID %#zx\n", hash));
 		moea64_vsid_bitmap[n] |= mask;
 		mtx_unlock(&moea64_slb_mutex);
 		return (hash);
 	}
 
 	mtx_unlock(&moea64_slb_mutex);
 	panic("%s: out of segments",__func__);
 }
 
 #ifdef __powerpc64__
 void
 moea64_pinit(mmu_t mmu, pmap_t pmap)
 {
 	PMAP_LOCK_INIT(pmap);
 	RB_INIT(&pmap->pmap_pvo);
 
 	pmap->pm_slb_tree_root = slb_alloc_tree();
 	pmap->pm_slb = slb_alloc_user_cache();
 	pmap->pm_slb_len = 0;
 }
 #else
 void
 moea64_pinit(mmu_t mmu, pmap_t pmap)
 {
 	int	i;
 	uint32_t hash;
 
 	PMAP_LOCK_INIT(pmap);
 	RB_INIT(&pmap->pmap_pvo);
 
 	if (pmap_bootstrapped)
 		pmap->pmap_phys = (pmap_t)moea64_kextract(mmu,
 		    (vm_offset_t)pmap);
 	else
 		pmap->pmap_phys = pmap;
 
 	/*
 	 * Allocate some segment registers for this pmap.
 	 */
 	hash = moea64_get_unique_vsid();
 
 	for (i = 0; i < 16; i++) 
 		pmap->pm_sr[i] = VSID_MAKE(i, hash);
 
 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
 }
 #endif
 
 /*
  * Initialize the pmap associated with process 0.
  */
 void
 moea64_pinit0(mmu_t mmu, pmap_t pm)
 {
 	moea64_pinit(mmu, pm);
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
 }
 
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
 static void
 moea64_pvo_protect(mmu_t mmu,  pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
 {
 	uintptr_t pt;
 	struct	vm_page *pg;
 	uint64_t oldlo;
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 
 	/*
 	 * Grab the PTE pointer before we diddle with the cached PTE
 	 * copy.
 	 */
 	pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 
 	/*
 	 * Change the protection of the page.
 	 */
 	oldlo = pvo->pvo_pte.lpte.pte_lo;
 	pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP;
 	pvo->pvo_pte.lpte.pte_lo &= ~LPTE_NOEXEC;
 	if ((prot & VM_PROT_EXECUTE) == 0) 
 		pvo->pvo_pte.lpte.pte_lo |= LPTE_NOEXEC;
 	if (prot & VM_PROT_WRITE) 
 		pvo->pvo_pte.lpte.pte_lo |= LPTE_BW;
 	else
 		pvo->pvo_pte.lpte.pte_lo |= LPTE_BR;
 
 	pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
 
 	/*
 	 * If the PVO is in the page table, update that pte as well.
 	 */
 	if (pt != -1)
 		MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte,
 		    pvo->pvo_vpn);
 	if (pm != kernel_pmap && pg != NULL && !(pg->aflags & PGA_EXECUTABLE) &&
 	    (pvo->pvo_pte.lpte.pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 		if ((pg->oflags & VPO_UNMANAGED) == 0)
 			vm_page_aflag_set(pg, PGA_EXECUTABLE);
 		moea64_syncicache(mmu, pm, PVO_VADDR(pvo),
 		    pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, PAGE_SIZE);
 	}
 
 	/*
 	 * Update vm about the REF/CHG bits if the page is managed and we have
 	 * removed write access.
 	 */
 	if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED && 
 	    (oldlo & LPTE_PP) != LPTE_BR && !(prot && VM_PROT_WRITE)) {
 		if (pg != NULL) {
 			if (pvo->pvo_pte.lpte.pte_lo & LPTE_CHG)
 				vm_page_dirty(pg);
 			if (pvo->pvo_pte.lpte.pte_lo & LPTE_REF)
 				vm_page_aflag_set(pg, PGA_REFERENCED);
 		}
 	}
 }
 
 void
 moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva,
     vm_prot_t prot)
 {
 	struct	pvo_entry *pvo, *tpvo, key;
 
 	CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
 	    sva, eva, prot);
 
 	KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
 	    ("moea64_protect: non current pmap"));
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		moea64_remove(mmu, pm, sva, eva);
 		return;
 	}
 
 	LOCK_TABLE_RD();
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 		moea64_pvo_protect(mmu, pm, pvo, prot);
 	}
 	UNLOCK_TABLE_RD();
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Map a list of wired pages into kernel virtual address space.  This is
  * intended for temporary mappings which do not need page modification or
  * references recorded.  Existing mappings in the region are overwritten.
  */
 void
 moea64_qenter(mmu_t mmu, vm_offset_t va, vm_page_t *m, int count)
 {
 	while (count-- > 0) {
 		moea64_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 }
 
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by moea64_qenter.
  */
 void
 moea64_qremove(mmu_t mmu, vm_offset_t va, int count)
 {
 	while (count-- > 0) {
 		moea64_kremove(mmu, va);
 		va += PAGE_SIZE;
 	}
 }
 
 void
 moea64_release_vsid(uint64_t vsid)
 {
 	int idx, mask;
 
 	mtx_lock(&moea64_slb_mutex);
 	idx = vsid & (NVSIDS-1);
 	mask = 1 << (idx % VSID_NBPW);
 	idx /= VSID_NBPW;
 	KASSERT(moea64_vsid_bitmap[idx] & mask,
 	    ("Freeing unallocated VSID %#jx", vsid));
 	moea64_vsid_bitmap[idx] &= ~mask;
 	mtx_unlock(&moea64_slb_mutex);
 }
 	
 
 void
 moea64_release(mmu_t mmu, pmap_t pmap)
 {
         
 	/*
 	 * Free segment registers' VSIDs
 	 */
     #ifdef __powerpc64__
 	slb_free_tree(pmap);
 	slb_free_user_cache(pmap->pm_slb);
     #else
 	KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
 
 	moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
     #endif
 
 	PMAP_LOCK_DESTROY(pmap);
 }
 
 /*
  * Remove all pages mapped by the specified pmap
  */
 void
 moea64_remove_pages(mmu_t mmu, pmap_t pm)
 {
 	struct	pvo_entry *pvo, *tpvo;
 
 	LOCK_TABLE_WR();
 	PMAP_LOCK(pm);
 	RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
 		if (!(pvo->pvo_vaddr & PVO_WIRED))
 			moea64_pvo_remove(mmu, pvo);
 	}
 	UNLOCK_TABLE_WR();
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Remove the given range of addresses from the specified map.
  */
 void
 moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	struct	pvo_entry *pvo, *tpvo, key;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pm->pm_stats.resident_count == 0)
 		return;
 
 	LOCK_TABLE_WR();
 	PMAP_LOCK(pm);
 	key.pvo_vaddr = sva;
 	for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
 	    pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
 		tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
 		moea64_pvo_remove(mmu, pvo);
 	}
 	UNLOCK_TABLE_WR();
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
  * will reflect changes in pte's back to the vm_page.
  */
 void
 moea64_remove_all(mmu_t mmu, vm_page_t m)
 {
 	struct	pvo_entry *pvo, *next_pvo;
 	pmap_t	pmap;
 
 	LOCK_TABLE_WR();
 	LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
 		pmap = pvo->pvo_pmap;
 		PMAP_LOCK(pmap);
 		moea64_pvo_remove(mmu, pvo);
 		PMAP_UNLOCK(pmap);
 	}
 	UNLOCK_TABLE_WR();
 	if ((m->aflags & PGA_WRITEABLE) && moea64_is_modified(mmu, m))
 		vm_page_dirty(m);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	vm_page_aflag_clear(m, PGA_EXECUTABLE);
 }
 
 /*
  * Allocate a physical page of memory directly from the phys_avail map.
  * Can only be called from moea64_bootstrap before avail start and end are
  * calculated.
  */
 vm_offset_t
 moea64_bootstrap_alloc(vm_size_t size, u_int align)
 {
 	vm_offset_t	s, e;
 	int		i, j;
 
 	size = round_page(size);
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		if (align != 0)
 			s = (phys_avail[i] + align - 1) & ~(align - 1);
 		else
 			s = phys_avail[i];
 		e = s + size;
 
 		if (s < phys_avail[i] || e > phys_avail[i + 1])
 			continue;
 
 		if (s + size > platform_real_maxaddr())
 			continue;
 
 		if (s == phys_avail[i]) {
 			phys_avail[i] += size;
 		} else if (e == phys_avail[i + 1]) {
 			phys_avail[i + 1] -= size;
 		} else {
 			for (j = phys_avail_count * 2; j > i; j -= 2) {
 				phys_avail[j] = phys_avail[j - 2];
 				phys_avail[j + 1] = phys_avail[j - 1];
 			}
 
 			phys_avail[i + 3] = phys_avail[i + 1];
 			phys_avail[i + 1] = s;
 			phys_avail[i + 2] = e;
 			phys_avail_count++;
 		}
 
 		return (s);
 	}
 	panic("moea64_bootstrap_alloc: could not allocate memory");
 }
 
 static int
 moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone,
     struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa,
     uint64_t pte_lo, int flags)
 {
 	struct	 pvo_entry *pvo;
 	uint64_t vsid;
 	int	 first;
 	u_int	 ptegidx;
 	int	 i;
 	int      bootstrap;
 
 	/*
 	 * One nasty thing that can happen here is that the UMA calls to
 	 * allocate new PVOs need to map more memory, which calls pvo_enter(),
 	 * which calls UMA...
 	 *
 	 * We break the loop by detecting recursion and allocating out of
 	 * the bootstrap pool.
 	 */
 
 	first = 0;
 	bootstrap = (flags & PVO_BOOTSTRAP);
 
 	if (!moea64_initialized)
 		bootstrap = 1;
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 	rw_assert(&moea64_table_lock, RA_WLOCKED);
 
 	/*
 	 * Compute the PTE Group index.
 	 */
 	va &= ~ADDR_POFF;
 	vsid = va_to_vsid(pm, va);
 	ptegidx = va_to_pteg(vsid, va, flags & PVO_LARGE);
 
 	/*
 	 * Remove any existing mapping for this page.  Reuse the pvo entry if
 	 * there is a mapping.
 	 */
 	moea64_pvo_enter_calls++;
 
 	LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
 		if (pvo->pvo_pmap == pm && PVO_VADDR(pvo) == va) {
 			if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa &&
 			    (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC | LPTE_PP))
 			    == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) {
 			    	if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) {
 					/* Re-insert if spilled */
 					i = MOEA64_PTE_INSERT(mmu, ptegidx,
 					    &pvo->pvo_pte.lpte);
 					if (i >= 0)
 						PVO_PTEGIDX_SET(pvo, i);
 					moea64_pte_overflow--;
 				}
 				return (0);
 			}
 			moea64_pvo_remove(mmu, pvo);
 			break;
 		}
 	}
 
 	/*
 	 * If we aren't overwriting a mapping, try to allocate.
 	 */
 	if (bootstrap) {
 		if (moea64_bpvo_pool_index >= BPVO_POOL_SIZE) {
 			panic("moea64_enter: bpvo pool exhausted, %d, %d, %zd",
 			      moea64_bpvo_pool_index, BPVO_POOL_SIZE, 
 			      BPVO_POOL_SIZE * sizeof(struct pvo_entry));
 		}
 		pvo = &moea64_bpvo_pool[moea64_bpvo_pool_index];
 		moea64_bpvo_pool_index++;
 		bootstrap = 1;
 	} else {
 		pvo = uma_zalloc(zone, M_NOWAIT);
 	}
 
 	if (pvo == NULL)
 		return (ENOMEM);
 
 	moea64_pvo_entries++;
 	pvo->pvo_vaddr = va;
 	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
 	    | (vsid << 16);
 	pvo->pvo_pmap = pm;
 	LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink);
 	pvo->pvo_vaddr &= ~ADDR_POFF;
 
 	if (flags & PVO_WIRED)
 		pvo->pvo_vaddr |= PVO_WIRED;
 	if (pvo_head != NULL)
 		pvo->pvo_vaddr |= PVO_MANAGED;
 	if (bootstrap)
 		pvo->pvo_vaddr |= PVO_BOOTSTRAP;
 	if (flags & PVO_LARGE)
 		pvo->pvo_vaddr |= PVO_LARGE;
 
 	moea64_pte_create(&pvo->pvo_pte.lpte, vsid, va, 
 	    (uint64_t)(pa) | pte_lo, flags);
 
 	/*
 	 * Add to pmap list
 	 */
 	RB_INSERT(pvo_tree, &pm->pmap_pvo, pvo);
 
 	/*
 	 * Remember if the list was empty and therefore will be the first
 	 * item.
 	 */
 	if (pvo_head != NULL) {
 		if (LIST_FIRST(pvo_head) == NULL)
 			first = 1;
 		LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
 	}
 
 	if (pvo->pvo_vaddr & PVO_WIRED) {
 		pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED;
 		pm->pm_stats.wired_count++;
 	}
 	pm->pm_stats.resident_count++;
 
 	/*
 	 * We hope this succeeds but it isn't required.
 	 */
 	i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte);
 	if (i >= 0) {
 		PVO_PTEGIDX_SET(pvo, i);
 	} else {
 		panic("moea64_pvo_enter: overflow");
 		moea64_pte_overflow++;
 	}
 
 	if (pm == kernel_pmap)
 		isync();
 
 #ifdef __powerpc64__
 	/*
 	 * Make sure all our bootstrap mappings are in the SLB as soon
 	 * as virtual memory is switched on.
 	 */
 	if (!pmap_bootstrapped)
 		moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE);
 #endif
 
 	return (first ? ENOENT : 0);
 }
 
 static void
 moea64_pvo_remove(mmu_t mmu, struct pvo_entry *pvo)
 {
 	struct	vm_page *pg;
 	uintptr_t pt;
 
 	PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
 	rw_assert(&moea64_table_lock, RA_WLOCKED);
 
 	/*
 	 * If there is an active pte entry, we need to deactivate it (and
 	 * save the ref & cfg bits).
 	 */
 	pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 	if (pt != -1) {
 		MOEA64_PTE_UNSET(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
 		PVO_PTEGIDX_CLR(pvo);
 	} else {
 		moea64_pte_overflow--;
 	}
 
 	/*
 	 * Update our statistics.
 	 */
 	pvo->pvo_pmap->pm_stats.resident_count--;
 	if (pvo->pvo_vaddr & PVO_WIRED)
 		pvo->pvo_pmap->pm_stats.wired_count--;
 
 	/*
 	 * Remove this PVO from the pmap list.
 	 */
 	RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
 
 	/*
 	 * Remove this from the overflow list and return it to the pool
 	 * if we aren't going to reuse it.
 	 */
 	LIST_REMOVE(pvo, pvo_olink);
 
 	/*
 	 * Update vm about the REF/CHG bits if the page is managed.
 	 */
 	pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN);
 
 	if ((pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED && pg != NULL) {
 		LIST_REMOVE(pvo, pvo_vlink);
 		if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) {
 			if (pvo->pvo_pte.lpte.pte_lo & LPTE_CHG)
 				vm_page_dirty(pg);
 			if (pvo->pvo_pte.lpte.pte_lo & LPTE_REF)
 				vm_page_aflag_set(pg, PGA_REFERENCED);
 			if (LIST_EMPTY(vm_page_to_pvoh(pg)))
 				vm_page_aflag_clear(pg, PGA_WRITEABLE);
 		}
 		if (LIST_EMPTY(vm_page_to_pvoh(pg)))
 			vm_page_aflag_clear(pg, PGA_EXECUTABLE);
 	}
 
 	moea64_pvo_entries--;
 	moea64_pvo_remove_calls++;
 
 	if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
 		uma_zfree((pvo->pvo_vaddr & PVO_MANAGED) ? moea64_mpvo_zone :
 		    moea64_upvo_zone, pvo);
 }
 
 static struct pvo_entry *
 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
 {
 	struct pvo_entry key;
 
 	key.pvo_vaddr = va & ~ADDR_POFF;
 	return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
 }
 
 static boolean_t
 moea64_query_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit)
 {
 	struct	pvo_entry *pvo;
 	uintptr_t pt;
 
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		/*
 		 * See if we saved the bit off.  If so, return success.
 		 */
 		if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
 			UNLOCK_TABLE_RD();
 			return (TRUE);
 		}
 	}
 
 	/*
 	 * No luck, now go through the hard part of looking at the PTEs
 	 * themselves.  Sync so that any pending REF/CHG bits are flushed to
 	 * the PTEs.
 	 */
 	powerpc_sync();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 
 		/*
 		 * See if this pvo has a valid PTE.  if so, fetch the
 		 * REF/CHG bits from the valid PTE.  If the appropriate
 		 * ptebit is set, return success.
 		 */
 		PMAP_LOCK(pvo->pvo_pmap);
 		pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 		if (pt != -1) {
 			MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte);
 			if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
 				PMAP_UNLOCK(pvo->pvo_pmap);
 				UNLOCK_TABLE_RD();
 				return (TRUE);
 			}
 		}
 		PMAP_UNLOCK(pvo->pvo_pmap);
 	}
 
 	UNLOCK_TABLE_RD();
 	return (FALSE);
 }
 
 static u_int
 moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit)
 {
 	u_int	count;
 	struct	pvo_entry *pvo;
 	uintptr_t pt;
 
 	/*
 	 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
 	 * we can reset the right ones).  note that since the pvo entries and
 	 * list heads are accessed via BAT0 and are never placed in the page
 	 * table, we don't have to worry about further accesses setting the
 	 * REF/CHG bits.
 	 */
 	powerpc_sync();
 
 	/*
 	 * For each pvo entry, clear the pvo's ptebit.  If this pvo has a
 	 * valid pte clear the ptebit from the valid pte.
 	 */
 	count = 0;
 	LOCK_TABLE_RD();
 	LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
 		PMAP_LOCK(pvo->pvo_pmap);
 		pt = MOEA64_PVO_TO_PTE(mmu, pvo);
 		if (pt != -1) {
 			MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte);
 			if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
 				count++;
 				MOEA64_PTE_CLEAR(mmu, pt, &pvo->pvo_pte.lpte,
 				    pvo->pvo_vpn, ptebit);
 			}
 		}
 		pvo->pvo_pte.lpte.pte_lo &= ~ptebit;
 		PMAP_UNLOCK(pvo->pvo_pmap);
 	}
 
 	UNLOCK_TABLE_RD();
 	return (count);
 }
 
 boolean_t
 moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 	struct pvo_entry *pvo, key;
 	vm_offset_t ppa;
 	int error = 0;
 
 	PMAP_LOCK(kernel_pmap);
 	key.pvo_vaddr = ppa = pa & ~ADDR_POFF;
 	for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
 	    ppa < pa + size; ppa += PAGE_SIZE,
 	    pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
 		if (pvo == NULL ||
 		    (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) != ppa) {
 			error = EFAULT;
 			break;
 		}
 	}
 	PMAP_UNLOCK(kernel_pmap);
 
 	return (error);
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 moea64_mapdev_attr(mmu_t mmu, vm_offset_t pa, vm_size_t size, vm_memattr_t ma)
 {
 	vm_offset_t va, tmpva, ppa, offset;
 
 	ppa = trunc_page(pa);
 	offset = pa & PAGE_MASK;
 	size = roundup2(offset + size, PAGE_SIZE);
 
 	va = kmem_alloc_nofault(kernel_map, size);
 
 	if (!va)
 		panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
 
 	for (tmpva = va; size > 0;) {
 		moea64_kenter_attr(mmu, tmpva, ppa, ma);
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		ppa += PAGE_SIZE;
 	}
 
 	return ((void *)(va + offset));
 }
 
 void *
 moea64_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 
 	return moea64_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT);
 }
 
 void
 moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
 {
 	vm_offset_t base, offset;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = roundup2(offset + size, PAGE_SIZE);
 
 	kmem_free(kernel_map, base, size);
 }
 
 void
 moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 	struct pvo_entry *pvo;
 	vm_offset_t lim;
 	vm_paddr_t pa;
 	vm_size_t len;
 
 	PMAP_LOCK(pm);
 	while (sz > 0) {
 		lim = round_page(va);
 		len = MIN(lim - va, sz);
 		pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
 		if (pvo != NULL && !(pvo->pvo_pte.lpte.pte_lo & LPTE_I)) {
 			pa = (pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) |
 			    (va & ADDR_POFF);
 			moea64_syncicache(mmu, pm, va, pa, len);
 		}
 		va += len;
 		sz -= len;
 	}
 	PMAP_UNLOCK(pm);
 }
diff --git a/sys/powerpc/aim/moea64_native.c b/sys/powerpc/aim/moea64_native.c
index 3da2f5ad3f9e..de0d4ba250fe 100644
--- a/sys/powerpc/aim/moea64_native.c
+++ b/sys/powerpc/aim/moea64_native.c
@@ -1,634 +1,633 @@
 /*-
  * Copyright (c) 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $
  */
 /*-
  * Copyright (C) 2001 Benno Rice.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Native 64-bit page table operations for running without a hypervisor.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <sys/kdb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 
 #include <machine/md_var.h>
 #include <machine/mmuvar.h>
 
 #include "mmu_oea64.h"
 #include "mmu_if.h"
 #include "moea64_if.h"
 
 #define	PTESYNC()	__asm __volatile("ptesync");
 #define	TLBSYNC()	__asm __volatile("tlbsync; ptesync");
 #define	SYNC()		__asm __volatile("sync");
 #define	EIEIO()		__asm __volatile("eieio");
 
 #define	VSID_HASH_MASK	0x0000007fffffffffULL
 
 static __inline void
 TLBIE(uint64_t vpn) {
 #ifndef __powerpc64__
 	register_t vpn_hi, vpn_lo;
 	register_t msr;
 	register_t scratch, intr;
 #endif
 
 	static volatile u_int tlbie_lock = 0;
 
 	vpn <<= ADDR_PIDX_SHFT;
 	vpn &= ~(0xffffULL << 48);
 
 	/* Hobo spinlock: we need stronger guarantees than mutexes provide */
 	while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
 	isync(); /* Flush instruction queue once lock acquired */
 
 #ifdef __powerpc64__
 	__asm __volatile("tlbie %0" :: "r"(vpn) : "memory");
 	__asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
 #else
 	vpn_hi = (uint32_t)(vpn >> 32);
 	vpn_lo = (uint32_t)vpn;
 
 	intr = intr_disable();
 	__asm __volatile("\
 	    mfmsr %0; \
 	    mr %1, %0; \
 	    insrdi %1,%5,1,0; \
 	    mtmsrd %1; isync; \
 	    \
 	    sld %1,%2,%4; \
 	    or %1,%1,%3; \
 	    tlbie %1; \
 	    \
 	    mtmsrd %0; isync; \
 	    eieio; \
 	    tlbsync; \
 	    ptesync;" 
 	: "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1)
 	    : "memory");
 	intr_restore(intr);
 #endif
 
 	/* No barriers or special ops -- taken care of by ptesync above */
 	tlbie_lock = 0;
 }
 
 #define DISABLE_TRANS(msr)	msr = mfmsr(); mtmsr(msr & ~PSL_DR)
 #define ENABLE_TRANS(msr)	mtmsr(msr)
 
 /*
  * PTEG data.
  */
 static struct	lpteg *moea64_pteg_table;
 
 /*
  * PTE calls.
  */
 static int	moea64_pte_insert_native(mmu_t, u_int, struct lpte *);
 static uintptr_t moea64_pvo_to_pte_native(mmu_t, const struct pvo_entry *);
 static void	moea64_pte_synch_native(mmu_t, uintptr_t pt,
 		    struct lpte *pvo_pt);
 static void	moea64_pte_clear_native(mmu_t, uintptr_t pt,
 		    struct lpte *pvo_pt, uint64_t vpn, uint64_t ptebit);
 static void	moea64_pte_change_native(mmu_t, uintptr_t pt,
 		    struct lpte *pvo_pt, uint64_t vpn);
 static void	moea64_pte_unset_native(mmu_t mmu, uintptr_t pt,
 		    struct lpte *pvo_pt, uint64_t vpn);
 
 /*
  * Utility routines.
  */
 static void		moea64_bootstrap_native(mmu_t mmup, 
 			    vm_offset_t kernelstart, vm_offset_t kernelend);
 static void		moea64_cpu_bootstrap_native(mmu_t, int ap);
 static void		tlbia(void);
 
 static mmu_method_t moea64_native_methods[] = {
 	/* Internal interfaces */
 	MMUMETHOD(mmu_bootstrap,	moea64_bootstrap_native),
 	MMUMETHOD(mmu_cpu_bootstrap,	moea64_cpu_bootstrap_native),
 
 	MMUMETHOD(moea64_pte_synch,	moea64_pte_synch_native),
 	MMUMETHOD(moea64_pte_clear,	moea64_pte_clear_native),	
 	MMUMETHOD(moea64_pte_unset,	moea64_pte_unset_native),	
 	MMUMETHOD(moea64_pte_change,	moea64_pte_change_native),	
 	MMUMETHOD(moea64_pte_insert,	moea64_pte_insert_native),	
 	MMUMETHOD(moea64_pvo_to_pte,	moea64_pvo_to_pte_native),	
 
 	{ 0, 0 }
 };
 
 MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods,
     0, oea64_mmu);
 
 static __inline u_int
 va_to_pteg(uint64_t vsid, vm_offset_t addr, int large)
 {
 	uint64_t hash;
 	int shift;
 
 	shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT;
 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >>
 	    shift);
 	return (hash & moea64_pteg_mask);
 }
 
 static void
 moea64_pte_synch_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt)
 {
 	struct lpte *pt = (struct lpte *)pt_cookie;
 
 	pvo_pt->pte_lo |= pt->pte_lo & (LPTE_REF | LPTE_CHG);
 }
 
 static void
 moea64_pte_clear_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt,
     uint64_t vpn, uint64_t ptebit)
 {
 	struct lpte *pt = (struct lpte *)pt_cookie;
 
 	/*
 	 * As shown in Section 7.6.3.2.3
 	 */
 	pt->pte_lo &= ~ptebit;
 	critical_enter();
 	TLBIE(vpn);
 	critical_exit();
 }
 
 static void
 moea64_pte_set_native(struct lpte *pt, struct lpte *pvo_pt)
 {
 
 	pvo_pt->pte_hi |= LPTE_VALID;
 
 	/*
 	 * Update the PTE as defined in section 7.6.3.1.
 	 * Note that the REF/CHG bits are from pvo_pt and thus should have
 	 * been saved so this routine can restore them (if desired).
 	 */
 	pt->pte_lo = pvo_pt->pte_lo;
 	EIEIO();
 	pt->pte_hi = pvo_pt->pte_hi;
 	PTESYNC();
 
 	/* Keep statistics for unlocked pages */
 	if (!(pvo_pt->pte_hi & LPTE_LOCKED))
 		moea64_pte_valid++;
 }
 
 static void
 moea64_pte_unset_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt,
     uint64_t vpn)
 {
 	struct lpte *pt = (struct lpte *)pt_cookie;
 
 	/*
 	 * Invalidate the pte.
 	 */
 	isync();
 	critical_enter();
 	pvo_pt->pte_hi &= ~LPTE_VALID;
 	pt->pte_hi &= ~LPTE_VALID;
 	PTESYNC();
 	TLBIE(vpn);
 	critical_exit();
 
 	/*
 	 * Save the reg & chg bits.
 	 */
 	moea64_pte_synch_native(mmu, pt_cookie, pvo_pt);
 
 	/* Keep statistics for unlocked pages */
 	if (!(pvo_pt->pte_hi & LPTE_LOCKED))
 		moea64_pte_valid--;
 }
 
 static void
 moea64_pte_change_native(mmu_t mmu, uintptr_t pt, struct lpte *pvo_pt,
     uint64_t vpn)
 {
 
 	/*
 	 * Invalidate the PTE
 	 */
 	moea64_pte_unset_native(mmu, pt, pvo_pt, vpn);
 	moea64_pte_set_native((struct lpte *)pt, pvo_pt);
 }
 
 static void
 moea64_cpu_bootstrap_native(mmu_t mmup, int ap)
 {
 	int i = 0;
 	#ifdef __powerpc64__
 	struct slb *slb = PCPU_GET(slb);
 	register_t seg0;
 	#endif
 
 	/*
 	 * Initialize segment registers and MMU
 	 */
 
 	mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR);
 
 	/*
 	 * Install kernel SLB entries
 	 */
 
 	#ifdef __powerpc64__
 		__asm __volatile ("slbia");
 		__asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) :
 		    "r"(0));
 
 		for (i = 0; i < 64; i++) {
 			if (!(slb[i].slbe & SLBE_VALID))
 				continue;
 
 			__asm __volatile ("slbmte %0, %1" :: 
 			    "r"(slb[i].slbv), "r"(slb[i].slbe)); 
 		}
 	#else
 		for (i = 0; i < 16; i++)
 			mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]);
 	#endif
 
 	/*
 	 * Install page table
 	 */
 
 	__asm __volatile ("ptesync; mtsdr1 %0; isync"
 	    :: "r"((uintptr_t)moea64_pteg_table 
 		     | (uintptr_t)(flsl(moea64_pteg_mask >> 11))));
 	tlbia();
 }
 
 static void
 moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart,
     vm_offset_t kernelend)
 {
 	vm_size_t	size;
 	vm_offset_t	off;
 	vm_paddr_t	pa;
 	register_t	msr;
 
 	moea64_early_bootstrap(mmup, kernelstart, kernelend);
 
 	/*
 	 * Allocate PTEG table.
 	 */
 
 	size = moea64_pteg_count * sizeof(struct lpteg);
 	CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", 
 	    moea64_pteg_count, size);
 
 	/*
 	 * We now need to allocate memory. This memory, to be allocated,
 	 * has to reside in a page table. The page table we are about to
 	 * allocate. We don't have BAT. So drop to data real mode for a minute
 	 * as a measure of last resort. We do this a couple times.
 	 */
 
 	moea64_pteg_table = (struct lpteg *)moea64_bootstrap_alloc(size, size);
 	DISABLE_TRANS(msr);
 	bzero((void *)moea64_pteg_table, moea64_pteg_count * sizeof(struct lpteg));
 	ENABLE_TRANS(msr);
 
 	CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table);
 
 	moea64_mid_bootstrap(mmup, kernelstart, kernelend);
 
 	/*
 	 * Add a mapping for the page table itself if there is no direct map.
 	 */
 	if (!hw_direct_map) {
 		size = moea64_pteg_count * sizeof(struct lpteg);
 		off = (vm_offset_t)(moea64_pteg_table);
 		DISABLE_TRANS(msr);
 		for (pa = off; pa < off + size; pa += PAGE_SIZE)
 			pmap_kenter(pa, pa);
 		ENABLE_TRANS(msr);
 	}
 
 	/* Bring up virtual memory */
 	moea64_late_bootstrap(mmup, kernelstart, kernelend);
 }
 
 static void
 tlbia(void)
 {
 	vm_offset_t i;
 	#ifndef __powerpc64__
 	register_t msr, scratch;
 	#endif
 
 	TLBSYNC();
 
 	for (i = 0; i < 0xFF000; i += 0x00001000) {
 		#ifdef __powerpc64__
 		__asm __volatile("tlbiel %0" :: "r"(i));
 		#else
 		__asm __volatile("\
 		    mfmsr %0; \
 		    mr %1, %0; \
 		    insrdi %1,%3,1,0; \
 		    mtmsrd %1; \
 		    isync; \
 		    \
 		    tlbiel %2; \
 		    \
 		    mtmsrd %0; \
 		    isync;" 
 		: "=r"(msr), "=r"(scratch) : "r"(i), "r"(1));
 		#endif
 	}
 
 	EIEIO();
 	TLBSYNC();
 }
 
 static uintptr_t
 moea64_pvo_to_pte_native(mmu_t mmu, const struct pvo_entry *pvo)
 {
 	struct lpte 	*pt;
 	int		pteidx, ptegidx;
 	uint64_t	vsid;
 
 	/* If the PTEG index is not set, then there is no page table entry */
 	if (!PVO_PTEGIDX_ISSET(pvo))
 		return (-1);
 
 	/*
 	 * Calculate the ptegidx
 	 */
 	vsid = PVO_VSID(pvo);
 	ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo),
 	    pvo->pvo_vaddr & PVO_LARGE);
 
 	/*
 	 * We can find the actual pte entry without searching by grabbing
 	 * the PTEG index from 3 unused bits in pvo_vaddr and by
 	 * noticing the HID bit.
 	 */
 	if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID)
 		ptegidx ^= moea64_pteg_mask;
 
 	pteidx = (ptegidx << 3) | PVO_PTEGIDX_GET(pvo);
 
 	if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && 
 	    !PVO_PTEGIDX_ISSET(pvo)) {
 		panic("moea64_pvo_to_pte: pvo %p has valid pte in pvo but no "
 		    "valid pte index", pvo);
 	}
 
 	if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0 && 
 	    PVO_PTEGIDX_ISSET(pvo)) {
 		panic("moea64_pvo_to_pte: pvo %p has valid pte index in pvo "
 		    "pvo but no valid pte", pvo);
 	}
 
 	pt = &moea64_pteg_table[pteidx >> 3].pt[pteidx & 7];
 	if ((pt->pte_hi ^ (pvo->pvo_pte.lpte.pte_hi & ~LPTE_VALID)) == 
 	    LPTE_VALID) {
 		if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0) {
 			panic("moea64_pvo_to_pte: pvo %p has valid pte in "
 			    "moea64_pteg_table %p but invalid in pvo", pvo, pt);
 		}
 
 		if (((pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo) & 
 		    ~(LPTE_M|LPTE_CHG|LPTE_REF)) != 0) {
 			panic("moea64_pvo_to_pte: pvo %p pte does not match "
 			    "pte %p in moea64_pteg_table difference is %#x", 
 			    pvo, pt,
 			    (uint32_t)(pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo));
 		}
 
 		return ((uintptr_t)pt);
 	}
 
 	if (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) {
 		panic("moea64_pvo_to_pte: pvo %p has invalid pte %p in "
 		    "moea64_pteg_table but valid in pvo", pvo, pt);
 	}
 
 	return (-1);
 }
 
 static __inline int
 moea64_pte_spillable_ident(u_int ptegidx)
 {
 	struct	lpte *pt;
 	int	i, j, k;
 
 	/* Start at a random slot */
 	i = mftb() % 8;
 	k = -1;
 	for (j = 0; j < 8; j++) {
 		pt = &moea64_pteg_table[ptegidx].pt[(i + j) % 8];
 		if (pt->pte_hi & (LPTE_LOCKED | LPTE_WIRED))
 			continue;
 
 		/* This is a candidate, so remember it */
 		k = (i + j) % 8;
 
 		/* Try to get a page that has not been used lately */
 		if (!(pt->pte_lo & LPTE_REF))
 			return (k);
 	}
 	
 	return (k);
 }
 
 static int
 moea64_pte_insert_native(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt)
 {
 	struct	lpte *pt;
 	struct	pvo_entry *pvo;
 	u_int	pteg_bktidx;
 	int	i;
 
 	/*
 	 * First try primary hash.
 	 */
 	pteg_bktidx = ptegidx;
 	for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) {
 		if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) {
 			pvo_pt->pte_hi &= ~LPTE_HID;
 			moea64_pte_set_native(pt, pvo_pt);
 			return (i);
 		}
 	}
 
 	/*
 	 * Now try secondary hash.
 	 */
 	pteg_bktidx ^= moea64_pteg_mask;
 	for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) {
 		if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) {
 			pvo_pt->pte_hi |= LPTE_HID;
 			moea64_pte_set_native(pt, pvo_pt);
 			return (i);
 		}
 	}
 
 	/*
 	 * Out of luck. Find a PTE to sacrifice.
 	 */
 	pteg_bktidx = ptegidx;
 	i = moea64_pte_spillable_ident(pteg_bktidx);
 	if (i < 0) {
 		pteg_bktidx ^= moea64_pteg_mask;
 		i = moea64_pte_spillable_ident(pteg_bktidx);
 	}
 
 	if (i < 0) {
 		/* No freeable slots in either PTEG? We're hosed. */
 		panic("moea64_pte_insert: overflow");
 		return (-1);
 	}
 
 	if (pteg_bktidx == ptegidx)
 		pvo_pt->pte_hi &= ~LPTE_HID;
 	else
 		pvo_pt->pte_hi |= LPTE_HID;
 
 	/*
 	 * Synchronize the sacrifice PTE with its PVO, then mark both
 	 * invalid. The PVO will be reused when/if the VM system comes
 	 * here after a fault.
 	 */
 	pt = &moea64_pteg_table[pteg_bktidx].pt[i];
 
 	if (pt->pte_hi & LPTE_HID)
 		pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */
 
 	LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) {
 		if (pvo->pvo_pte.lpte.pte_hi == pt->pte_hi) {
 			KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, 
 			    ("Invalid PVO for valid PTE!"));
 			moea64_pte_unset_native(mmu, (uintptr_t)pt,
 			    &pvo->pvo_pte.lpte, pvo->pvo_vpn);
 			PVO_PTEGIDX_CLR(pvo);
 			moea64_pte_overflow++;
 			break;
 		}
 	}
 
 	KASSERT(pvo->pvo_pte.lpte.pte_hi == pt->pte_hi,
 	   ("Unable to find PVO for spilled PTE"));
 
 	/*
 	 * Set the new PTE.
 	 */
 	moea64_pte_set_native(pt, pvo_pt);
 
 	return (i);
 }
 
diff --git a/sys/powerpc/ps3/mmu_ps3.c b/sys/powerpc/ps3/mmu_ps3.c
index 5a7fe93af3bc..dfca9a7d7725 100644
--- a/sys/powerpc/ps3/mmu_ps3.c
+++ b/sys/powerpc/ps3/mmu_ps3.c
@@ -1,311 +1,310 @@
 /*-
  * Copyright (C) 2010 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <powerpc/aim/mmu_oea64.h>
 
 #include "mmu_if.h"
 #include "moea64_if.h"
 #include "ps3-hvcall.h"
 
 #define VSID_HASH_MASK		0x0000007fffffffffUL
 #define PTESYNC()		__asm __volatile("ptesync")
 
 extern int ps3fb_remap(void);
 
 static uint64_t mps3_vas_id;
 
 /*
  * Kernel MMU interface
  */
 
 static void	mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart,
 		    vm_offset_t kernelend);
 static void	mps3_cpu_bootstrap(mmu_t mmup, int ap);
 static void	mps3_pte_synch(mmu_t, uintptr_t pt, struct lpte *pvo_pt);
 static void	mps3_pte_clear(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
 		    uint64_t vpn, uint64_t ptebit);
 static void	mps3_pte_unset(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
 		    uint64_t vpn);
 static void	mps3_pte_change(mmu_t, uintptr_t pt, struct lpte *pvo_pt,
 		    uint64_t vpn);
 static int	mps3_pte_insert(mmu_t, u_int ptegidx, struct lpte *pvo_pt);
 static uintptr_t mps3_pvo_to_pte(mmu_t, const struct pvo_entry *pvo);
 
 
 static mmu_method_t mps3_methods[] = {
         MMUMETHOD(mmu_bootstrap,	mps3_bootstrap),
         MMUMETHOD(mmu_cpu_bootstrap,	mps3_cpu_bootstrap),
 
 	MMUMETHOD(moea64_pte_synch,	mps3_pte_synch),
 	MMUMETHOD(moea64_pte_clear,	mps3_pte_clear),
 	MMUMETHOD(moea64_pte_unset,	mps3_pte_unset),
 	MMUMETHOD(moea64_pte_change,	mps3_pte_change),
 	MMUMETHOD(moea64_pte_insert,	mps3_pte_insert),
 	MMUMETHOD(moea64_pvo_to_pte,	mps3_pvo_to_pte),
 
         { 0, 0 }
 };
 
 MMU_DEF_INHERIT(ps3_mmu, "mmu_ps3", mps3_methods, 0, oea64_mmu);
 
 static void
 mps3_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 {
 	uint64_t final_pteg_count;
 
 	moea64_early_bootstrap(mmup, kernelstart, kernelend);
 
 	lv1_construct_virtual_address_space(
 	    20 /* log_2(moea64_pteg_count) */, 2 /* n page sizes */,
 	    (24UL << 56) | (16UL << 48) /* page sizes 16 MB + 64 KB */,
 	    &mps3_vas_id, &final_pteg_count
 	);
 
 	moea64_pteg_count = final_pteg_count / sizeof(struct lpteg);
 
 	moea64_mid_bootstrap(mmup, kernelstart, kernelend);
 	moea64_late_bootstrap(mmup, kernelstart, kernelend);
 }
 
 static void
 mps3_cpu_bootstrap(mmu_t mmup, int ap)
 {
 	struct slb *slb = PCPU_GET(slb);
 	register_t seg0;
 	int i;
 
 	mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR);
 
 	/*
 	 * Destroy the loader's address space if we are coming up for
 	 * the first time, and redo the FB mapping so we can continue
 	 * having a console.
 	 */
 
 	if (!ap)
 		lv1_destruct_virtual_address_space(0);
 
 	lv1_select_virtual_address_space(mps3_vas_id);
 
 	if (!ap)
 		ps3fb_remap();
 
 	/*
 	 * Install kernel SLB entries
 	 */
 
         __asm __volatile ("slbia");
         __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0));
 	for (i = 0; i < 64; i++) {
 		if (!(slb[i].slbe & SLBE_VALID))
 			continue;
 
 		__asm __volatile ("slbmte %0, %1" ::
 		    "r"(slb[i].slbv), "r"(slb[i].slbe));
 	}
 }
 
 static void
 mps3_pte_synch(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt)
 {
 	uint64_t halfbucket[4], rcbits;
 	
 	PTESYNC();
 	lv1_read_htab_entries(mps3_vas_id, slot & ~0x3UL, &halfbucket[0],
 	    &halfbucket[1], &halfbucket[2], &halfbucket[3], &rcbits);
 
 	/*
 	 * rcbits contains the low 12 bits of each PTEs 2nd part,
 	 * spaced at 16-bit intervals
 	 */
 
 	KASSERT((halfbucket[slot & 0x3] & LPTE_AVPN_MASK) ==
 	    (pvo_pt->pte_hi & LPTE_AVPN_MASK),
 	    ("PTE upper word %#lx != %#lx\n",
 	    halfbucket[slot & 0x3], pvo_pt->pte_hi));
 
  	pvo_pt->pte_lo |= (rcbits >> ((3 - (slot & 0x3))*16)) &
 	    (LPTE_CHG | LPTE_REF);
 }
 
 static void
 mps3_pte_clear(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn,
     u_int64_t ptebit)
 {
 
 	lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi,
 	    pvo_pt->pte_lo & ~ptebit);
 }
 
 static void
 mps3_pte_unset(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn)
 {
 
 	mps3_pte_synch(mmu, slot, pvo_pt);
 	pvo_pt->pte_hi &= ~LPTE_VALID;
 	lv1_write_htab_entry(mps3_vas_id, slot, 0, 0);
 	moea64_pte_valid--;
 }
 
 static void
 mps3_pte_change(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn)
 {
  
 	mps3_pte_synch(mmu, slot, pvo_pt);
 	lv1_write_htab_entry(mps3_vas_id, slot, pvo_pt->pte_hi,
 	    pvo_pt->pte_lo);
 }
 
 static int
 mps3_pte_insert(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt)
 {
 	int result;
 	struct lpte evicted;
 	struct pvo_entry *pvo;
 	uint64_t index;
 
 	pvo_pt->pte_hi |= LPTE_VALID;
 	pvo_pt->pte_hi &= ~LPTE_HID;
 	evicted.pte_hi = 0;
 	PTESYNC();
 	result = lv1_insert_htab_entry(mps3_vas_id, ptegidx << 3,
 	    pvo_pt->pte_hi, pvo_pt->pte_lo, LPTE_LOCKED | LPTE_WIRED, 0,
 	    &index, &evicted.pte_hi, &evicted.pte_lo);
 
 	if (result != 0) {
 		/* No freeable slots in either PTEG? We're hosed. */
 		panic("mps3_pte_insert: overflow (%d)", result);
 		return (-1);
 	}
 
 	/*
 	 * See where we ended up.
 	 */
 	if (index >> 3 != ptegidx)
 		pvo_pt->pte_hi |= LPTE_HID;
 
 	moea64_pte_valid++;
 
 	if (!evicted.pte_hi)
 		return (index & 0x7);
 
 	/*
 	 * Synchronize the sacrifice PTE with its PVO, then mark both
 	 * invalid. The PVO will be reused when/if the VM system comes
 	 * here after a fault.
 	 */
 
 	ptegidx = index >> 3; /* Where the sacrifice PTE was found */
 	if (evicted.pte_hi & LPTE_HID)
 		ptegidx ^= moea64_pteg_mask; /* PTEs indexed by primary */
 
 	KASSERT((evicted.pte_hi & (LPTE_WIRED | LPTE_LOCKED)) == 0,
 	    ("Evicted a wired PTE"));
 
 	result = 0;
 	LIST_FOREACH(pvo, &moea64_pvo_table[ptegidx], pvo_olink) {
 		if (!PVO_PTEGIDX_ISSET(pvo))
 			continue;
 
 		if (pvo->pvo_pte.lpte.pte_hi == (evicted.pte_hi | LPTE_VALID)) {
 			KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID,
 			    ("Invalid PVO for valid PTE!"));
 			pvo->pvo_pte.lpte.pte_hi &= ~LPTE_VALID;
 			pvo->pvo_pte.lpte.pte_lo |=
 			    evicted.pte_lo & (LPTE_REF | LPTE_CHG);
 			PVO_PTEGIDX_CLR(pvo);
 			moea64_pte_valid--;
 			moea64_pte_overflow++;
 			result = 1;
 			break;
 		}
 	}
 
 	KASSERT(result == 1, ("PVO for sacrifice PTE not found"));
 
 	return (index & 0x7);
 }
 
 static __inline u_int
 va_to_pteg(uint64_t vsid, vm_offset_t addr, int large)
 {
 	uint64_t hash;
 	int shift;
 
 	shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT;
 	hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >>
 	    shift);
 	return (hash & moea64_pteg_mask);
 }
 
 uintptr_t
 mps3_pvo_to_pte(mmu_t mmu, const struct pvo_entry *pvo)
 {
 	uint64_t vsid;
 	u_int ptegidx;
 
 	/* If the PTEG index is not set, then there is no page table entry */
 	if (!PVO_PTEGIDX_ISSET(pvo))
 		return (-1);
 
 	vsid = PVO_VSID(pvo);
 	ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE);
 
 	/*
 	 * We can find the actual pte entry without searching by grabbing
 	 * the PTEG index from 3 unused bits in pvo_vaddr and by
 	 * noticing the HID bit.
 	 */
 	if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID)
 		ptegidx ^= moea64_pteg_mask;
 
 	return ((ptegidx << 3) | PVO_PTEGIDX_GET(pvo));
 }
 
diff --git a/sys/sparc64/sparc64/tsb.c b/sys/sparc64/sparc64/tsb.c
index 403043c64036..c38d50e2b6b7 100644
--- a/sys/sparc64/sparc64/tsb.c
+++ b/sys/sparc64/sparc64/tsb.c
@@ -1,229 +1,228 @@
 /*-
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI: pmap.c,v 1.28.2.15 2000/04/27 03:10:31 cp Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_pmap.h"
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
 
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/trap.h>
 #include <machine/pmap.h>
 #include <machine/smp.h>
 #include <machine/tlb.h>
 #include <machine/tsb.h>
 #include <machine/tte.h>
 
 CTASSERT((1 << TTE_SHIFT) == sizeof(struct tte));
 CTASSERT(TSB_BUCKET_MASK < (1 << 12));
 
 PMAP_STATS_VAR(tsb_nrepl);
 PMAP_STATS_VAR(tsb_nlookup_k);
 PMAP_STATS_VAR(tsb_nlookup_u);
 PMAP_STATS_VAR(tsb_nenter_k);
 PMAP_STATS_VAR(tsb_nenter_k_oc);
 PMAP_STATS_VAR(tsb_nenter_u);
 PMAP_STATS_VAR(tsb_nenter_u_oc);
 PMAP_STATS_VAR(tsb_nforeach);
 
 struct tte *tsb_kernel;
 vm_size_t tsb_kernel_mask;
 vm_size_t tsb_kernel_size;
 vm_paddr_t tsb_kernel_phys;
 u_int tsb_kernel_ldd_phys;
 
 struct tte *
 tsb_tte_lookup(pmap_t pm, vm_offset_t va)
 {
 	struct tte *bucket;
 	struct tte *tp;
 	u_long sz;
 	u_int i;
 
 	if (pm == kernel_pmap) {
 		PMAP_STATS_INC(tsb_nlookup_k);
 		tp = tsb_kvtotte(va);
 		if (tte_match(tp, va))
 			return (tp);
 	} else {
 		PMAP_LOCK_ASSERT(pm, MA_OWNED);
 		PMAP_STATS_INC(tsb_nlookup_u);
 		for (sz = TS_MIN; sz <= TS_MAX; sz++) {
 			bucket = tsb_vtobucket(pm, sz, va);
 			for (i = 0; i < TSB_BUCKET_SIZE; i++) {
 				tp = &bucket[i];
 				if (tte_match(tp, va))
 					return (tp);
 			}
 		}
 	}
 	return (NULL);
 }
 
 struct tte *
 tsb_tte_enter(pmap_t pm, vm_page_t m, vm_offset_t va, u_long sz, u_long data)
 {
 	struct tte *bucket;
 	struct tte *rtp;
 	struct tte *tp;
 	vm_offset_t ova;
 	int b0;
 	int i;
 
 	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
 		CTR5(KTR_SPARE2,
 	"tsb_tte_enter: off colour va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
 		    va, VM_PAGE_TO_PHYS(m), m->object,
 		    m->object ? m->object->type : -1,
 		    m->pindex);
 		if (pm == kernel_pmap)
 			PMAP_STATS_INC(tsb_nenter_k_oc);
 		else
 			PMAP_STATS_INC(tsb_nenter_u_oc);
 	}
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 	if (pm == kernel_pmap) {
 		PMAP_STATS_INC(tsb_nenter_k);
 		tp = tsb_kvtotte(va);
 		KASSERT((tp->tte_data & TD_V) == 0,
 		    ("tsb_tte_enter: replacing valid kernel mapping"));
 		goto enter;
 	}
 	PMAP_STATS_INC(tsb_nenter_u);
 
 	bucket = tsb_vtobucket(pm, sz, va);
 
 	tp = NULL;
 	rtp = NULL;
 	b0 = rd(tick) & (TSB_BUCKET_SIZE - 1);
 	i = b0;
 	do {
 		if ((bucket[i].tte_data & TD_V) == 0) {
 			tp = &bucket[i];
 			break;
 		}
 		if (tp == NULL) {
 			if ((bucket[i].tte_data & TD_REF) == 0)
 				tp = &bucket[i];
 			else if (rtp == NULL)
 				rtp = &bucket[i];
 		}
 	} while ((i = (i + 1) & (TSB_BUCKET_SIZE - 1)) != b0);
 
 	if (tp == NULL)
 		tp = rtp;
 	if ((tp->tte_data & TD_V) != 0) {
 		PMAP_STATS_INC(tsb_nrepl);
 		ova = TTE_GET_VA(tp);
 		pmap_remove_tte(pm, NULL, tp, ova);
 		tlb_page_demap(pm, ova);
 	}
 
 enter:
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		data |= TD_CP;
 		if ((m->oflags & VPO_UNMANAGED) == 0) {
 			pm->pm_stats.resident_count++;
 			data |= TD_PV;
 		}
 		if (pmap_cache_enter(m, va) != 0)
 			data |= TD_CV;
 		TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
 	} else
 		data |= TD_FAKE | TD_E;
 
 	tp->tte_vpn = TV_VPN(va, sz);
 	tp->tte_data = data;
 
 	return (tp);
 }
 
 /*
  * Traverse the tsb of a pmap, calling the callback function for any tte entry
  * that has a virtual address between start and end. If this function returns 0,
  * tsb_foreach() terminates.
  * This is used by pmap_remove(), pmap_protect(), and pmap_copy() in the case
  * that the number of pages in the range given to them reaches the
  * dimensions of the tsb size as an optimization.
  */
 void
 tsb_foreach(pmap_t pm1, pmap_t pm2, vm_offset_t start, vm_offset_t end,
     tsb_callback_t *callback)
 {
 	vm_offset_t va;
 	struct tte *tp;
 	struct tte *tsbp;
 	uintptr_t i;
 	uintptr_t n;
 
 	PMAP_STATS_INC(tsb_nforeach);
 	if (pm1 == kernel_pmap) {
 		tsbp = tsb_kernel;
 		n = tsb_kernel_size / sizeof(struct tte);
 	} else {
 		tsbp = pm1->pm_tsb;
 		n = TSB_SIZE;
 	}
 	for (i = 0; i < n; i++) {
 		tp = &tsbp[i];
 		if ((tp->tte_data & TD_V) != 0) {
 			va = TTE_GET_VA(tp);
 			if (va >= start && va < end) {
 				if (!callback(pm1, pm2, tp, va))
 					break;
 			}
 		}
 	}
 }