Index: head/sys/arm/arm/machdep.c
===================================================================
--- head/sys/arm/arm/machdep.c	(revision 285626)
+++ head/sys/arm/arm/machdep.c	(revision 285627)
@@ -1,1700 +1,1701 @@
 /*	$NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $	*/
 
 /*-
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Brinicombe
  *	for the NetBSD Project.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Machine dependant functions for kernel setup
  *
  * Created      : 17/09/94
  * Updated	: 18/04/01 updated for new wscons
  */
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
+#include "opt_kstack_pages.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 #include "opt_timer.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/efi.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <machine/acle-compat.h>
 #include <machine/armreg.h>
 #include <machine/atags.h>
 #include <machine/cpu.h>
 #include <machine/cpuinfo.h>
 #include <machine/devmap.h>
 #include <machine/frame.h>
 #include <machine/intr.h>
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pcb.h>
 #include <machine/physmem.h>
 #include <machine/platform.h>
 #include <machine/reg.h>
 #include <machine/trap.h>
 #include <machine/undefined.h>
 #include <machine/vfp.h>
 #include <machine/vmparam.h>
 #include <machine/sysarch.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 #if __ARM_ARCH >= 6
 #include <machine/cpu-v6.h>
 
 DB_SHOW_COMMAND(cp15, db_show_cp15)
 {
 	u_int reg;
 
 	reg = cp15_midr_get();
 	db_printf("Cpu ID: 0x%08x\n", reg);
 	reg = cp15_ctr_get();
 	db_printf("Current Cache Lvl ID: 0x%08x\n",reg);
 
 	reg = cp15_sctlr_get();
 	db_printf("Ctrl: 0x%08x\n",reg);
 	reg = cp15_actlr_get();
 	db_printf("Aux Ctrl: 0x%08x\n",reg);
 
 	reg = cp15_id_pfr0_get();
 	db_printf("Processor Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_pfr1_get();
 	db_printf("Processor Feat 1: 0x%08x\n", reg);
 	reg = cp15_id_dfr0_get();
 	db_printf("Debug Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_afr0_get();
 	db_printf("Auxiliary Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_mmfr0_get();
 	db_printf("Memory Model Feat 0: 0x%08x\n", reg);
 	reg = cp15_id_mmfr1_get();
 	db_printf("Memory Model Feat 1: 0x%08x\n", reg);
 	reg = cp15_id_mmfr2_get();
 	db_printf("Memory Model Feat 2: 0x%08x\n", reg);
 	reg = cp15_id_mmfr3_get();
 	db_printf("Memory Model Feat 3: 0x%08x\n", reg);
 	reg = cp15_ttbr_get();
 	db_printf("TTB0: 0x%08x\n", reg);
 }
 
 DB_SHOW_COMMAND(vtop, db_show_vtop)
 {
 	u_int reg;
 
 	if (have_addr) {
 		cp15_ats1cpr_set(addr);
 		reg = cp15_par_get();
 		db_printf("Physical address reg: 0x%08x\n",reg);
 	} else
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif /* __ARM_ARCH >= 6 */
 #endif /* DDB */
 
 #ifdef DEBUG
 #define	debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define	debugf(fmt, args...)
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 struct pcpu *pcpup = &__pcpu[0];
 
 static struct trapframe proc0_tf;
 uint32_t cpu_reset_address = 0;
 int cold = 1;
 vm_offset_t vector_page;
 
 int (*_arm_memcpy)(void *, void *, int, int) = NULL;
 int (*_arm_bzero)(void *, int, int) = NULL;
 int _min_memcpy_size = 0;
 int _min_bzero_size = 0;
 
 extern int *end;
 
 #ifdef FDT
 vm_paddr_t pmap_pa;
 
 #ifdef ARM_NEW_PMAP
 vm_offset_t systempage;
 vm_offset_t irqstack;
 vm_offset_t undstack;
 vm_offset_t abtstack;
 #else
 /*
  * This is the number of L2 page tables required for covering max
  * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf,
  * stacks etc.), uprounded to be divisible by 4.
  */
 #define KERNEL_PT_MAX	78
 
 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX];
 
 struct pv_addr systempage;
 static struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 static struct pv_addr kernelstack;
 #endif
 #endif
 
 #if defined(LINUX_BOOT_ABI)
 #define LBABI_MAX_BANKS	10
 
 uint32_t board_id;
 struct arm_lbabi_tag *atag_list;
 char linux_command_line[LBABI_MAX_COMMAND_LINE + 1];
 char atags[LBABI_MAX_COMMAND_LINE * 2];
 uint32_t memstart[LBABI_MAX_BANKS];
 uint32_t memsize[LBABI_MAX_BANKS];
 uint32_t membanks;
 #endif
 
 static uint32_t board_revision;
 /* hex representation of uint64_t */
 static char board_serial[32];
 
 SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes");
 SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD,
     &board_revision, 0, "Board revision");
 SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD,
     board_serial, 0, "Board serial");
 
 int vfp_exists;
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &vfp_exists, 0, "Floating point support enabled");
 
 void
 board_set_serial(uint64_t serial)
 {
 
 	snprintf(board_serial, sizeof(board_serial)-1,
 		    "%016jx", serial);
 }
 
 void
 board_set_revision(uint32_t revision)
 {
 
 	board_revision = revision;
 }
 
 void
 sendsig(catcher, ksi, mask)
 	sig_t catcher;
 	ksiginfo_t *ksi;
 	sigset_t *mask;
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	int onstack;
 	int sig;
 	int code;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_usr_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct sigframe *)td->td_frame->tf_usr_sp;
 
 	/* make room on the stack */
 	fp--;
 
 	/* make the stack aligned */
 	fp = (struct sigframe *)STACKALIGN(fp);
 	/* Populate the siginfo frame. */
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK )
 	    ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.  We invoke the handler
 	 * directly, only returning via the trampoline.  Note the
 	 * trampoline version numbers are coordinated with machine-
 	 * dependent code in libc.
 	 */
 
 	tf->tf_r0 = sig;
 	tf->tf_r1 = (register_t)&fp->sf_si;
 	tf->tf_r2 = (register_t)&fp->sf_uc;
 
 	/* the trampoline uses r5 as the uc address */
 	tf->tf_r5 = (register_t)&fp->sf_uc;
 	tf->tf_pc = (register_t)catcher;
 	tf->tf_usr_sp = (register_t)fp;
 	tf->tf_usr_lr = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode));
 	/* Set the mode to enter in the signal handler */
 #if __ARM_ARCH >= 7
 	if ((register_t)catcher & 1)
 		tf->tf_spsr |= PSR_T;
 	else
 		tf->tf_spsr &= ~PSR_T;
 #endif
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr,
 	    tf->tf_usr_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 struct kva_md_info kmi;
 
 /*
  * arm32_vector_init:
  *
  *	Initialize the vector page, and select whether or not to
  *	relocate the vectors.
  *
  *	NOTE: We expect the vector page to be mapped at its expected
  *	destination.
  */
 
 extern unsigned int page0[], page0_data[];
 void
 arm_vector_init(vm_offset_t va, int which)
 {
 	unsigned int *vectors = (int *) va;
 	unsigned int *vectors_data = vectors + (page0_data - page0);
 	int vec;
 
 	/*
 	 * Loop through the vectors we're taking over, and copy the
 	 * vector's insn and data word.
 	 */
 	for (vec = 0; vec < ARM_NVEC; vec++) {
 		if ((which & (1 << vec)) == 0) {
 			/* Don't want to take over this vector. */
 			continue;
 		}
 		vectors[vec] = page0[vec];
 		vectors_data[vec] = page0_data[vec];
 	}
 
 	/* Now sync the vectors. */
 	cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int));
 
 	vector_page = va;
 
 	if (va == ARM_VECTORS_HIGH) {
 		/*
 		 * Assume the MD caller knows what it's doing here, and
 		 * really does want the vector page relocated.
 		 *
 		 * Note: This has to be done here (and not just in
 		 * cpu_setup()) because the vector page needs to be
 		 * accessible *before* cpu_startup() is called.
 		 * Think ddb(9) ...
 		 *
 		 * NOTE: If the CPU control register is not readable,
 		 * this will totally fail!  We'll just assume that
 		 * any system that has high vector support has a
 		 * readable CPU control register, for now.  If we
 		 * ever encounter one that does not, we'll have to
 		 * rethink this.
 		 */
 		cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC);
 	}
 }
 
 static void
 cpu_startup(void *dummy)
 {
 	struct pcb *pcb = thread0.td_pcb;
 	const unsigned int mbyte = 1024 * 1024;
 #ifdef ARM_TP_ADDRESS
 #ifndef ARM_CACHE_LOCK_ENABLE
 	vm_page_t m;
 #endif
 #endif
 
 	identify_arm_cpu();
 
 	vm_ksubmap_init(&kmi);
 
 	/*
 	 * Display the RAM layout.
 	 */
 	printf("real memory  = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(realmem),
 	    (uintmax_t)arm32_ptob(realmem) / mbyte);
 	printf("avail memory = %ju (%ju MB)\n",
 	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count),
 	    (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte);
 	if (bootverbose) {
 		arm_physmem_print_tables();
 		arm_devmap_print_table();
 	}
 
 	bufinit();
 	vm_pager_bufferinit();
 	pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack +
 	    USPACE_SVC_STACK_TOP;
 	pmap_set_pcb_pagedir(pmap_kernel(), pcb);
 #ifndef ARM_NEW_PMAP
 	vector_page_setprot(VM_PROT_READ);
 	pmap_postinit();
 #endif
 #ifdef ARM_TP_ADDRESS
 #ifdef ARM_CACHE_LOCK_ENABLE
 	pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS);
 	arm_lock_cache_line(ARM_TP_ADDRESS);
 #else
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO);
 	pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m));
 #endif
 	*(uint32_t *)ARM_RAS_START = 0;
 	*(uint32_t *)ARM_RAS_END = 0xffffffff;
 #endif
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	cpu_dcache_wb_range((uintptr_t)ptr, len);
 #ifdef ARM_L2_PIPT
 	cpu_l2cache_wb_range((uintptr_t)vtophys(ptr), len);
 #else
 	cpu_l2cache_wb_range((uintptr_t)ptr, len);
 #endif
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	return (ENXIO);
 }
 
 void
 cpu_idle(int busy)
 {
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu);
 	spinlock_enter();
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_idleclock();
 #endif
 	if (!sched_runnable())
 		cpu_sleep(0);
 #ifndef NO_EVENTTIMERS
 	if (!busy)
 		cpu_activeclock();
 #endif
 	spinlock_exit();
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 /*
  * Most ARM platforms don't need to do anything special to init their clocks
  * (they get intialized during normal device attachment), and by not defining a
  * cpu_initclocks() function they get this generic one.  Any platform that needs
  * to do something special can just provide their own implementation, which will
  * override this one due to the weak linkage.
  */
 void
 arm_generic_initclocks(void)
 {
 
 #ifndef NO_EVENTTIMERS
 #ifdef SMP
 	if (PCPU_GET(cpuid) == 0)
 		cpu_initclocks_bsp();
 	else
 		cpu_initclocks_ap();
 #else
 	cpu_initclocks_bsp();
 #endif
 #endif
 }
 __weak_reference(arm_generic_initclocks, cpu_initclocks);
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf = td->td_frame;
 	bcopy(&tf->tf_r0, regs->r, sizeof(regs->r));
 	regs->r_sp = tf->tf_usr_sp;
 	regs->r_lr = tf->tf_usr_lr;
 	regs->r_pc = tf->tf_pc;
 	regs->r_cpsr = tf->tf_spsr;
 	return (0);
 }
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 	bzero(regs, sizeof(*regs));
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tf = td->td_frame;
 
 	bcopy(regs->r, &tf->tf_r0, sizeof(regs->r));
 	tf->tf_usr_sp = regs->r_sp;
 	tf->tf_usr_lr = regs->r_lr;
 	tf->tf_pc = regs->r_pc;
 	tf->tf_spsr &=  ~PSR_FLAGS;
 	tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS;
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 	return (0);
 }
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 	return (0);
 }
 
 
 static int
 ptrace_read_int(struct thread *td, vm_offset_t addr, u_int32_t *v)
 {
 	struct iovec iov;
 	struct uio uio;
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED);
 	iov.iov_base = (caddr_t) v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = td;
 	return proc_rwmem(td->td_proc, &uio);
 }
 
 static int
 ptrace_write_int(struct thread *td, vm_offset_t addr, u_int32_t v)
 {
 	struct iovec iov;
 	struct uio uio;
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED);
 	iov.iov_base = (caddr_t) &v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_td = td;
 	return proc_rwmem(td->td_proc, &uio);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	struct proc *p;
 	int error;
 
 	/* TODO: This needs to be updated for Thumb-2 */
 	if ((td->td_frame->tf_spsr & PSR_T) != 0)
 		return (EINVAL);
 
 	KASSERT(td->td_md.md_ptrace_instr == 0,
 	 ("Didn't clear single step"));
 	p = td->td_proc;
 	PROC_UNLOCK(p);
 	error = ptrace_read_int(td, td->td_frame->tf_pc + 4,
 	    &td->td_md.md_ptrace_instr);
 	if (error)
 		goto out;
 	error = ptrace_write_int(td, td->td_frame->tf_pc + 4,
 	    PTRACE_BREAKPOINT);
 	if (error)
 		td->td_md.md_ptrace_instr = 0;
 	td->td_md.md_ptrace_addr = td->td_frame->tf_pc + 4;
 out:
 	PROC_LOCK(p);
 	return (error);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	struct proc *p;
 
 	/* TODO: This needs to be updated for Thumb-2 */
 	if ((td->td_frame->tf_spsr & PSR_T) != 0)
 		return (EINVAL);
 
 	if (td->td_md.md_ptrace_instr) {
 		p = td->td_proc;
 		PROC_UNLOCK(p);
 		ptrace_write_int(td, td->td_md.md_ptrace_addr,
 		    td->td_md.md_ptrace_instr);
 		PROC_LOCK(p);
 		td->td_md.md_ptrace_instr = 0;
 	}
 	return (0);
 }
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	td->td_frame->tf_pc = addr;
 	return (0);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		cspr = disable_interrupts(PSR_I | PSR_F);
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_cspr = cspr;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t cspr;
 
 	td = curthread;
 	critical_exit();
 	cspr = td->td_md.md_saved_cspr;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		restore_interrupts(cspr);
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(*tf));
 	tf->tf_usr_sp = stack;
 	tf->tf_usr_lr = imgp->entry_addr;
 	tf->tf_svc_lr = 0x77777777;
 	tf->tf_pc = imgp->entry_addr;
 	tf->tf_spsr = PSR_USR32_MODE;
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 	__greg_t *gr = mcp->__gregs;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		gr[_REG_R0] = 0;
 		gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C;
 	} else {
 		gr[_REG_R0]   = tf->tf_r0;
 		gr[_REG_CPSR] = tf->tf_spsr;
 	}
 	gr[_REG_R1]   = tf->tf_r1;
 	gr[_REG_R2]   = tf->tf_r2;
 	gr[_REG_R3]   = tf->tf_r3;
 	gr[_REG_R4]   = tf->tf_r4;
 	gr[_REG_R5]   = tf->tf_r5;
 	gr[_REG_R6]   = tf->tf_r6;
 	gr[_REG_R7]   = tf->tf_r7;
 	gr[_REG_R8]   = tf->tf_r8;
 	gr[_REG_R9]   = tf->tf_r9;
 	gr[_REG_R10]  = tf->tf_r10;
 	gr[_REG_R11]  = tf->tf_r11;
 	gr[_REG_R12]  = tf->tf_r12;
 	gr[_REG_SP]   = tf->tf_usr_sp;
 	gr[_REG_LR]   = tf->tf_usr_lr;
 	gr[_REG_PC]   = tf->tf_pc;
 
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf = td->td_frame;
 	const __greg_t *gr = mcp->__gregs;
 
 	tf->tf_r0 = gr[_REG_R0];
 	tf->tf_r1 = gr[_REG_R1];
 	tf->tf_r2 = gr[_REG_R2];
 	tf->tf_r3 = gr[_REG_R3];
 	tf->tf_r4 = gr[_REG_R4];
 	tf->tf_r5 = gr[_REG_R5];
 	tf->tf_r6 = gr[_REG_R6];
 	tf->tf_r7 = gr[_REG_R7];
 	tf->tf_r8 = gr[_REG_R8];
 	tf->tf_r9 = gr[_REG_R9];
 	tf->tf_r10 = gr[_REG_R10];
 	tf->tf_r11 = gr[_REG_R11];
 	tf->tf_r12 = gr[_REG_R12];
 	tf->tf_usr_sp = gr[_REG_SP];
 	tf->tf_usr_lr = gr[_REG_LR];
 	tf->tf_pc = gr[_REG_PC];
 	tf->tf_spsr = gr[_REG_CPSR];
 
 	return (0);
 }
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	int spsr;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
 	 */
 	spsr = uc.uc_mcontext.__gregs[_REG_CPSR];
 	if ((spsr & PSR_MODE) != PSR_USR32_MODE ||
 	    (spsr & (PSR_I | PSR_F)) != 0)
 		return (EINVAL);
 		/* Restore register context. */
 	set_mcontext(td, &uc.uc_mcontext);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	pcb->pcb_regs.sf_r4 = tf->tf_r4;
 	pcb->pcb_regs.sf_r5 = tf->tf_r5;
 	pcb->pcb_regs.sf_r6 = tf->tf_r6;
 	pcb->pcb_regs.sf_r7 = tf->tf_r7;
 	pcb->pcb_regs.sf_r8 = tf->tf_r8;
 	pcb->pcb_regs.sf_r9 = tf->tf_r9;
 	pcb->pcb_regs.sf_r10 = tf->tf_r10;
 	pcb->pcb_regs.sf_r11 = tf->tf_r11;
 	pcb->pcb_regs.sf_r12 = tf->tf_r12;
 	pcb->pcb_regs.sf_pc = tf->tf_pc;
 	pcb->pcb_regs.sf_lr = tf->tf_usr_lr;
 	pcb->pcb_regs.sf_sp = tf->tf_usr_sp;
 }
 
 /*
  * Fake up a boot descriptor table
  */
 vm_offset_t
 fake_preload_metadata(struct arm_boot_params *abp __unused)
 {
 #ifdef DDB
 	vm_offset_t zstart = 0, zend = 0;
 #endif
 	vm_offset_t lastaddr;
 	int i = 0;
 	static uint32_t fake_preload[35];
 
 	fake_preload[i++] = MODINFO_NAME;
 	fake_preload[i++] = strlen("kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "kernel");
 	i += 1;
 	fake_preload[i++] = MODINFO_TYPE;
 	fake_preload[i++] = strlen("elf kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "elf kernel");
 	i += 2;
 	fake_preload[i++] = MODINFO_ADDR;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	fake_preload[i++] = KERNVIRTADDR;
 	fake_preload[i++] = MODINFO_SIZE;
 	fake_preload[i++] = sizeof(uint32_t);
 	fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR;
 #ifdef DDB
 	if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) {
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4);
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8);
 		lastaddr = *(uint32_t *)(KERNVIRTADDR + 8);
 		zend = lastaddr;
 		zstart = *(uint32_t *)(KERNVIRTADDR + 4);
 		db_fetch_ksymtab(zstart, zend);
 	} else
 #endif
 		lastaddr = (vm_offset_t)&end;
 	fake_preload[i++] = 0;
 	fake_preload[i] = 0;
 	preload_metadata = (void *)fake_preload;
 
 	return (lastaddr);
 }
 
 void
 pcpu0_init(void)
 {
 #if __ARM_ARCH >= 6
 	set_curthread(&thread0);
 #endif
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 }
 
 #if defined(LINUX_BOOT_ABI)
 vm_offset_t
 linux_parse_boot_param(struct arm_boot_params *abp)
 {
 	struct arm_lbabi_tag *walker;
 	uint32_t revision;
 	uint64_t serial;
 
 	/*
 	 * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2
 	 * is atags or dtb pointer.  If all of these aren't satisfied,
 	 * then punt.
 	 */
 	if (!(abp->abp_r0 == 0 && abp->abp_r1 != 0 && abp->abp_r2 != 0))
 		return 0;
 
 	board_id = abp->abp_r1;
 	walker = (struct arm_lbabi_tag *)
 	    (abp->abp_r2 + KERNVIRTADDR - abp->abp_physaddr);
 
 	/* xxx - Need to also look for binary device tree */
 	if (ATAG_TAG(walker) != ATAG_CORE)
 		return 0;
 
 	atag_list = walker;
 	while (ATAG_TAG(walker) != ATAG_NONE) {
 		switch (ATAG_TAG(walker)) {
 		case ATAG_CORE:
 			break;
 		case ATAG_MEM:
 			arm_physmem_hardware_region(walker->u.tag_mem.start,
 			    walker->u.tag_mem.size);
 			break;
 		case ATAG_INITRD2:
 			break;
 		case ATAG_SERIAL:
 			serial = walker->u.tag_sn.low |
 			    ((uint64_t)walker->u.tag_sn.high << 32);
 			board_set_serial(serial);
 			break;
 		case ATAG_REVISION:
 			revision = walker->u.tag_rev.rev;
 			board_set_revision(revision);
 			break;
 		case ATAG_CMDLINE:
 			/* XXX open question: Parse this for boothowto? */
 			bcopy(walker->u.tag_cmd.command, linux_command_line,
 			      ATAG_SIZE(walker));
 			break;
 		default:
 			break;
 		}
 		walker = ATAG_NEXT(walker);
 	}
 
 	/* Save a copy for later */
 	bcopy(atag_list, atags,
 	    (char *)walker - (char *)atag_list + ATAG_SIZE(walker));
 
 	return fake_preload_metadata(abp);
 }
 #endif
 
 #if defined(FREEBSD_BOOT_LOADER)
 vm_offset_t
 freebsd_parse_boot_param(struct arm_boot_params *abp)
 {
 	vm_offset_t lastaddr = 0;
 	void *mdp;
 	void *kmdp;
 #ifdef DDB
 	vm_offset_t ksym_start;
 	vm_offset_t ksym_end;
 #endif
 
 	/*
 	 * Mask metadata pointer: it is supposed to be on page boundary. If
 	 * the first argument (mdp) doesn't point to a valid address the
 	 * bootloader must have passed us something else than the metadata
 	 * ptr, so we give up.  Also give up if we cannot find metadta section
 	 * the loader creates that we get all this data out of.
 	 */
 
 	if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL)
 		return 0;
 	preload_metadata = mdp;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		return 0;
 
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 	lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
 #ifdef DDB
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 	db_fetch_ksymtab(ksym_start, ksym_end);
 #endif
 	return lastaddr;
 }
 #endif
 
 vm_offset_t
 default_parse_boot_param(struct arm_boot_params *abp)
 {
 	vm_offset_t lastaddr;
 
 #if defined(LINUX_BOOT_ABI)
 	if ((lastaddr = linux_parse_boot_param(abp)) != 0)
 		return lastaddr;
 #endif
 #if defined(FREEBSD_BOOT_LOADER)
 	if ((lastaddr = freebsd_parse_boot_param(abp)) != 0)
 		return lastaddr;
 #endif
 	/* Fall back to hardcoded metadata. */
 	lastaddr = fake_preload_metadata(abp);
 
 	return lastaddr;
 }
 
 /*
  * Stub version of the boot parameter parsing routine.  We are
  * called early in initarm, before even VM has been initialized.
  * This routine needs to preserve any data that the boot loader
  * has passed in before the kernel starts to grow past the end
  * of the BSS, traditionally the place boot-loaders put this data.
  *
  * Since this is called so early, things that depend on the vm system
  * being setup (including access to some SoC's serial ports), about
  * all that can be done in this routine is to copy the arguments.
  *
  * This is the default boot parameter parsing routine.  Individual
  * kernels/boards can override this weak function with one of their
  * own.  We just fake metadata...
  */
 __weak_reference(default_parse_boot_param, parse_boot_param);
 
 /*
  * Initialize proc0
  */
 void
 init_proc0(vm_offset_t kstack)
 {
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_pcb = (struct pcb *)
 		(thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_vfpcpu = -1;
 	thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 #ifdef ARM_NEW_PMAP
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #else
 void
 set_stackptrs(int cpu)
 {
 
 	set_stackptr(PSR_IRQ32_MODE,
 	    irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_ABT32_MODE,
 	    abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 	set_stackptr(PSR_UND32_MODE,
 	    undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1)));
 }
 #endif
 
 #ifdef EFI
 #define efi_next_descriptor(ptr, size) \
 	((struct efi_md *)(((uint8_t *) ptr) + size))
 
 static void
 add_efi_map_entries(struct efi_map_header *efihdr, struct mem_region *mr,
     int *mrcnt, uint32_t *memsize)
 {
 	struct efi_md *map, *p;
 	const char *type;
 	size_t efisz, memory_size;
 	int ndesc, i, j;
 
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode"
 	};
 
 	*mrcnt = 0;
 	*memsize = 0;
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = roundup2(sizeof(struct efi_map_header), 0x10);
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz);
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	if (boothowto & RB_VERBOSE)
 		printf("%23s %12s %12s %8s %4s\n",
 		    "Type", "Physical", "Virtual", "#Pages", "Attr");
 
 	memory_size = 0;
 	for (i = 0, j = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		if (boothowto & RB_VERBOSE) {
 			if (p->md_type <= EFI_MD_TYPE_PALCODE)
 				type = types[p->md_type];
 			else
 				type = "<INVALID>";
 			printf("%23s %012llx %12p %08llx ", type, p->md_phys,
 			    p->md_virt, p->md_pages);
 			if (p->md_attr & EFI_MD_ATTR_UC)
 				printf("UC ");
 			if (p->md_attr & EFI_MD_ATTR_WC)
 				printf("WC ");
 			if (p->md_attr & EFI_MD_ATTR_WT)
 				printf("WT ");
 			if (p->md_attr & EFI_MD_ATTR_WB)
 				printf("WB ");
 			if (p->md_attr & EFI_MD_ATTR_UCE)
 				printf("UCE ");
 			if (p->md_attr & EFI_MD_ATTR_WP)
 				printf("WP ");
 			if (p->md_attr & EFI_MD_ATTR_RP)
 				printf("RP ");
 			if (p->md_attr & EFI_MD_ATTR_XP)
 				printf("XP ");
 			if (p->md_attr & EFI_MD_ATTR_RT)
 				printf("RUNTIME");
 			printf("\n");
 		}
 
 		switch (p->md_type) {
 		case EFI_MD_TYPE_CODE:
 		case EFI_MD_TYPE_DATA:
 		case EFI_MD_TYPE_BS_CODE:
 		case EFI_MD_TYPE_BS_DATA:
 		case EFI_MD_TYPE_FREE:
 			/*
 			 * We're allowed to use any entry with these types.
 			 */
 			break;
 		default:
 			continue;
 		}
 
 		j++;
 		if (j >= FDT_MEM_REGIONS)
 			break;
 
 		mr[j].mr_start = p->md_phys;
 		mr[j].mr_size = p->md_pages * PAGE_SIZE;
 		memory_size += mr[j].mr_size;
 	}
 
 	*mrcnt = j;
 	*memsize = memory_size;
 }
 #endif /* EFI */
 
 #ifdef FDT
 static char *
 kenv_next(char *cp)
 {
 
 	if (cp != NULL) {
 		while (*cp != 0)
 			cp++;
 		cp++;
 		if (*cp == 0)
 			cp = NULL;
 	}
 	return (cp);
 }
 
 static void
 print_kenv(void)
 {
 	char *cp;
 
 	debugf("loader passed (static) kenv:\n");
 	if (kern_envp == NULL) {
 		debugf(" no env, null ptr\n");
 		return;
 	}
 	debugf(" kern_envp = 0x%08x\n", (uint32_t)kern_envp);
 
 	for (cp = kern_envp; cp != NULL; cp = kenv_next(cp))
 		debugf(" %x %s\n", (uint32_t)cp, cp);
 }
 
 #ifndef ARM_NEW_PMAP
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	struct pv_addr kernel_l1pt;
 	struct pv_addr dpcpu;
 	vm_offset_t dtbp, freemempos, l2_start, lastaddr;
 	uint32_t memsize, l2size;
 	char *env;
 	void *kmdp;
 	u_int l1pagetable;
 	int i, j, err_devmap, mem_regions_sz;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 
 	memsize = 0;
 
 	cpuinfo_init();
 	set_cpufuncs();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp != NULL)
 		dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	else
 		dtbp = (vm_offset_t)NULL;
 
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0)
 		panic("Cannot get physical memory regions");
 	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
 		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/* Calculate number of L2 tables needed for mapping vm_page_array */
 	l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page);
 	l2size = (l2size >> L1_S_SHIFT) + 1;
 
 	/*
 	 * Add one table for end of kernel map, one for stacks, msgbuf and
 	 * L1 and L2 tables map and one for vectors map.
 	 */
 	l2size += 3;
 
 	/* Make it divisible by 4 */
 	l2size = (l2size + 3) & ~3;
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)						\
 	alloc_pages((var).pv_va, (np));					\
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 
 	for (i = 0, j = 0; i < l2size; ++i) {
 		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[i],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 			j = i;
 		} else {
 			kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va +
 			    L2_TABLE_SIZE_REAL * (i - j);
 			kernel_pt_table[i].pv_pa =
 			    kernel_pt_table[i].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to 0x00000000
 	 * or 0xffff0000. This page will just contain the system vectors
 	 * and can be shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
 	valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/*
 	 * Try to map as much as possible of kernel text and data using
 	 * 1MB section mapping and for the rest of initial kernel address
 	 * space use L2 coarse tables.
 	 *
 	 * Link L2 tables for mapping remainder of kernel (modulo 1MB)
 	 * and kernel structures
 	 */
 	l2_start = lastaddr & ~(L1_S_OFFSET);
 	for (i = 0 ; i < l2size - 1; i++)
 		pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE,
 		    &kernel_pt_table[i]);
 
 	pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE;
 
 	/* Map kernel code and data */
 	pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr,
 	   (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map L1 directory and allocated L2 page tables */
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va,
 	    kernel_pt_table[0].pv_pa,
 	    L2_TABLE_SIZE_REAL * l2size,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	/* Map allocated DPCPU, stacks and msgbuf */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa,
 	    freemempos - dpcpu.pv_va,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Link and map the vector page */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[l2size - 1]);
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE);
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	arm_devmap_bootstrap(l1pagetable, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT);
 	pmap_pa = kernel_l1pt.pv_pa;
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2));
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 
 	platform_gpio_init();
 
 	cninit();
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL) {
 		strlcpy(kernelname, env, sizeof(kernelname));
 		freeenv(env);
 	}
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void *)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_exclude_region(abp->abp_physaddr,
 	    (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 #else /* !ARM_NEW_PMAP */
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_paddr_t lastaddr;
 	vm_offset_t dtbp, kernelstack, dpcpu;
 	uint32_t memsize;
 	char *env;
 	void *kmdp;
 	int err_devmap, mem_regions_sz;
 #ifdef EFI
 	struct efi_map_header *efihdr;
 #endif
 
 	/* get last allocated physical address */
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr;
 
 	memsize = 0;
 	set_cpufuncs();
 	cpuinfo_init();
 
 	/*
 	 * Find the dtb passed in by the boot loader.
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp != NULL)
 		dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	else
 		dtbp = (vm_offset_t)NULL;
 #if defined(FDT_DTB_STATIC)
 	/*
 	 * In case the device tree blob was not retrieved (from metadata) try
 	 * to use the statically embedded one.
 	 */
 	if (dtbp == (vm_offset_t)NULL)
 		dtbp = (vm_offset_t)&fdt_static_dtb;
 #endif
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 
 #ifdef EFI
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL) {
 		add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz,
 		   &memsize);
 	} else
 #endif
 	{
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
 		    &memsize) != 0)
 			panic("Cannot get physical memory regions");
 	}
 	arm_physmem_hardware_regions(mem_regions, mem_regions_sz);
 
 	/* Grab reserved memory regions information from device tree. */
 	if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0)
 		arm_physmem_exclude_regions(mem_regions, mem_regions_sz,
 		    EXFLAG_NODUMP | EXFLAG_NOALLOC);
 
 	/*
 	 * Set TEX remapping registers.
 	 * Setup kernel page tables and switch to kernel L1 page table.
 	 */
 	pmap_set_tex();
 	pmap_bootstrap_prepare(lastaddr);
 
 	/*
 	 * Now that proper page tables are installed, call cpu_setup() to enable
 	 * instruction and data caches and other chip-specific features.
 	 */
 	cpu_setup();
 
 	/* Platform-specific initialisation */
 	platform_probe_and_attach();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/*
 	 * Allocate a page for the system page mapped to 0xffff0000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	systempage = pmap_preboot_get_pages(1);
 
 	/* Map the vector page. */
 	pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH,  1);
 	if (virtual_end >= ARM_VECTORS_HIGH)
 		virtual_end = ARM_VECTORS_HIGH - 1;
 
 	/* Allocate dynamic per-cpu area. */
 	dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate stacks for all modes */
 	irqstack    = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU);
 	abtstack    = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU);
 	undstack    = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU );
 	kernelstack = pmap_preboot_get_vpages(KSTACK_PAGES * MAXCPU);
 
 	/* Allocate message buffer. */
 	msgbufp = (void *)pmap_preboot_get_vpages(
 	    round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 	mutex_init();
 
 	/* Establish static device mappings. */
 	err_devmap = platform_devmap_init();
 	arm_devmap_bootstrap(0, NULL);
 	vm_max_kernel_address = platform_lastaddr();
 
 	/*
 	 * Only after the SOC registers block is mapped we can perform device
 	 * tree fixups, as they may attempt to read parameters from hardware.
 	 */
 	OF_interpret("perform-fixup", 0);
 	platform_gpio_init();
 	cninit();
 
 	debugf("initarm: console initialized\n");
 	debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp);
 	debugf(" boothowto = 0x%08x\n", boothowto);
 	debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp);
 	debugf(" lastaddr1: 0x%08x\n", lastaddr);
 	print_kenv();
 
 	env = kern_getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
 	if (err_devmap != 0)
 		printf("WARNING: could not fully configure devmap, error=%d\n",
 		    err_devmap);
 
 	platform_late_init();
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	/* Set stack for exception handlers */
 	undefined_init();
 	init_proc0(kernelstack);
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	enable_interrupts(PSR_A);
 	pmap_bootstrap(0);
 
 	/* Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_exclude_region(abp->abp_physaddr,
 		pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	/* Init message buffer. */
 	msgbufinit(msgbufp, msgbufsize);
 	kdb_init();
 	return ((void *)STACKALIGN(thread0.td_pcb));
 
 }
 
 #endif /* !ARM_NEW_PMAP */
 #endif /* FDT */
Index: head/sys/arm/at91/at91_machdep.c
===================================================================
--- head/sys/arm/at91/at91_machdep.c	(revision 285626)
+++ head/sys/arm/at91/at91_machdep.c	(revision 285627)
@@ -1,698 +1,699 @@
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
+#include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/physmem.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/board.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/at91/at91board.h>
 #include <arm/at91/at91var.h>
 #include <arm/at91/at91soc.h>
 #include <arm/at91/at91_usartreg.h>
 #include <arm/at91/at91rm92reg.h>
 #include <arm/at91/at91sam9g20reg.h>
 #include <arm/at91/at91sam9g45reg.h>
 
 #ifndef MAXCPU
 #define MAXCPU 1
 #endif
 
 /* Page table for mapping proc0 zero page */
 #define KERNEL_PT_SYS		0
 #define KERNEL_PT_KERN		1
 #define KERNEL_PT_KERN_NUM	22
 /* L2 table for mapping after kernel */
 #define KERNEL_PT_AFKERNEL	KERNEL_PT_KERN + KERNEL_PT_KERN_NUM
 #define	KERNEL_PT_AFKERNEL_NUM	5
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 extern struct bus_space at91_bs_tag;
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Static device mappings. */
 const struct arm_devmap_entry at91_devmap[] = {
 	/*
 	 * Map the critical on-board devices. The interrupt vector at
 	 * 0xffff0000 makes it impossible to map them PA == VA, so we map all
 	 * 0xfffxxxxx addresses to 0xdffxxxxx. This covers all critical devices
 	 * on all members of the AT91SAM9 and AT91RM9200 families.
 	 */
 	{
 		0xdff00000,
 		0xfff00000,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	/* There's a notion that we should do the rest of these lazily. */
 	/*
 	 * We can't just map the OHCI registers VA == PA, because
 	 * AT91xx_xxx_BASE belongs to the userland address space.
 	 * We could just choose a different virtual address, but a better
 	 * solution would probably be to just use pmap_mapdev() to allocate
 	 * KVA, as we don't need the OHCI controller before the vm
 	 * initialization is done. However, the AT91 resource allocation
 	 * system doesn't know how to use pmap_mapdev() yet.
 	 * Care must be taken to ensure PA and VM address do not overlap
 	 * between entries.
 	 */
 	{
 		/*
 		 * Add the ohci controller, and anything else that might be
 		 * on this chip select for a VA/PA mapping.
 		 */
 		/* Internal Memory 1MB  */
 		AT91RM92_OHCI_VA_BASE,
 		AT91RM92_OHCI_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		/* CompactFlash controller. Portion of EBI CS4 1MB */
 		AT91RM92_CF_VA_BASE,
 		AT91RM92_CF_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	/*
 	 * The next two should be good for the 9260, 9261 and 9G20 since
 	 * addresses mapping is the same.
 	 */
 	{
 		/* Internal Memory 1MB  */
 		AT91SAM9G20_OHCI_VA_BASE,
 		AT91SAM9G20_OHCI_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		/* EBI CS3 256MB */
 		AT91SAM9G20_NAND_VA_BASE,
 		AT91SAM9G20_NAND_BASE,
 		AT91SAM9G20_NAND_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	/*
 	 * The next should be good for the 9G45.
 	 */
 	{
 		/* Internal Memory 1MB  */
 		AT91SAM9G45_OHCI_VA_BASE,
 		AT91SAM9G45_OHCI_BASE,
 		0x00100000,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{ 0, 0, 0, 0, 0, }
 };
 
 #ifdef LINUX_BOOT_ABI
 extern int membanks;
 extern int memstart[];
 extern int memsize[];
 #endif
 
 long
 at91_ramsize(void)
 {
 	uint32_t cr, mdr, mr, *SDRAMC;
 	int banks, rows, cols, bw;
 #ifdef LINUX_BOOT_ABI
 	/*
 	 * If we found any ATAGs that were for memory, return the first bank.
 	 */
 	if (membanks > 0)
 		return (memsize[0]);
 #endif
 
 	if (at91_is_rm92()) {
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91RM92_SDRAMC_BASE);
 		cr = SDRAMC[AT91RM92_SDRAMC_CR / 4];
 		mr = SDRAMC[AT91RM92_SDRAMC_MR / 4];
 		banks = (cr & AT91RM92_SDRAMC_CR_NB_4) ? 2 : 1;
 		rows = ((cr & AT91RM92_SDRAMC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91RM92_SDRAMC_CR_NC_MASK) + 8;
 		bw = (mr & AT91RM92_SDRAMC_MR_DBW_16) ? 1 : 2;
 	} else if (at91_cpu_is(AT91_T_SAM9G45)) {
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G45_DDRSDRC0_BASE);
 		cr = SDRAMC[AT91SAM9G45_DDRSDRC_CR / 4];
 		mdr = SDRAMC[AT91SAM9G45_DDRSDRC_MDR / 4];
 		banks = 0;
 		rows = ((cr & AT91SAM9G45_DDRSDRC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91SAM9G45_DDRSDRC_CR_NC_MASK) + 8;
 		bw = (mdr & AT91SAM9G45_DDRSDRC_MDR_DBW_16) ? 1 : 2;
 
 		/* Fix the calculation for DDR memory */
 		mdr &= AT91SAM9G45_DDRSDRC_MDR_MASK;
 		if (mdr & AT91SAM9G45_DDRSDRC_MDR_LPDDR1 ||
 		    mdr & AT91SAM9G45_DDRSDRC_MDR_DDR2) {
 			/* The cols value is 1 higher for DDR */
 			cols += 1;
 			/* DDR has 4 internal banks. */
 			banks = 2;
 		}
 	} else {
 		/*
 		 * This should be good for the 9260, 9261, 9G20, 9G35 and 9X25
 		 * as addresses and registers are the same.
 		 */
 		SDRAMC = (uint32_t *)(AT91_BASE + AT91SAM9G20_SDRAMC_BASE);
 		cr = SDRAMC[AT91SAM9G20_SDRAMC_CR / 4];
 		mr = SDRAMC[AT91SAM9G20_SDRAMC_MR / 4];
 		banks = (cr & AT91SAM9G20_SDRAMC_CR_NB_4) ? 2 : 1;
 		rows = ((cr & AT91SAM9G20_SDRAMC_CR_NR_MASK) >> 2) + 11;
 		cols = (cr & AT91SAM9G20_SDRAMC_CR_NC_MASK) + 8;
 		bw = (cr & AT91SAM9G20_SDRAMC_CR_DBW_16) ? 1 : 2;
 	}
 
 	return (1 << (cols + rows + banks + bw));
 }
 
 static const char *soc_type_name[] = {
 	[AT91_T_CAP9] = "at91cap9",
 	[AT91_T_RM9200] = "at91rm9200",
 	[AT91_T_SAM9260] = "at91sam9260",
 	[AT91_T_SAM9261] = "at91sam9261",
 	[AT91_T_SAM9263] = "at91sam9263",
 	[AT91_T_SAM9G10] = "at91sam9g10",
 	[AT91_T_SAM9G20] = "at91sam9g20",
 	[AT91_T_SAM9G45] = "at91sam9g45",
 	[AT91_T_SAM9N12] = "at91sam9n12",
 	[AT91_T_SAM9RL] = "at91sam9rl",
 	[AT91_T_SAM9X5] = "at91sam9x5",
 	[AT91_T_NONE] = "UNKNOWN"
 };
 
 static const char *soc_subtype_name[] = {
 	[AT91_ST_NONE] = "UNKNOWN",
 	[AT91_ST_RM9200_BGA] = "at91rm9200_bga",
 	[AT91_ST_RM9200_PQFP] = "at91rm9200_pqfp",
 	[AT91_ST_SAM9XE] = "at91sam9xe",
 	[AT91_ST_SAM9G45] = "at91sam9g45",
 	[AT91_ST_SAM9M10] = "at91sam9m10",
 	[AT91_ST_SAM9G46] = "at91sam9g46",
 	[AT91_ST_SAM9M11] = "at91sam9m11",
 	[AT91_ST_SAM9G15] = "at91sam9g15",
 	[AT91_ST_SAM9G25] = "at91sam9g25",
 	[AT91_ST_SAM9G35] = "at91sam9g35",
 	[AT91_ST_SAM9X25] = "at91sam9x25",
 	[AT91_ST_SAM9X35] = "at91sam9x35",
 };
 
 struct at91_soc_info soc_info;
 
 /*
  * Read the SoC ID from the CIDR register and try to match it against the
  * values we know.  If we find a good one, we return true.  If not, we
  * return false.  When we find a good one, we also find the subtype
  * and CPU family.
  */
 static int
 at91_try_id(uint32_t dbgu_base)
 {
 	uint32_t socid;
 
 	soc_info.cidr = *(volatile uint32_t *)(AT91_BASE + dbgu_base +
 	    DBGU_C1R);
 	socid = soc_info.cidr & ~AT91_CPU_VERSION_MASK;
 
 	soc_info.type = AT91_T_NONE;
 	soc_info.subtype = AT91_ST_NONE;
 	soc_info.family = (soc_info.cidr & AT91_CPU_FAMILY_MASK) >> 20;
 	soc_info.exid = *(volatile uint32_t *)(AT91_BASE + dbgu_base +
 	    DBGU_C2R);
 
 	switch (socid) {
 	case AT91_CPU_CAP9:
 		soc_info.type = AT91_T_CAP9;
 		break;
 	case AT91_CPU_RM9200:
 		soc_info.type = AT91_T_RM9200;
 		break;
 	case AT91_CPU_SAM9XE128:
 	case AT91_CPU_SAM9XE256:
 	case AT91_CPU_SAM9XE512:
 	case AT91_CPU_SAM9260:
 		soc_info.type = AT91_T_SAM9260;
 		if (soc_info.family == AT91_FAMILY_SAM9XE)
 			soc_info.subtype = AT91_ST_SAM9XE;
 		break;
 	case AT91_CPU_SAM9261:
 		soc_info.type = AT91_T_SAM9261;
 		break;
 	case AT91_CPU_SAM9263:
 		soc_info.type = AT91_T_SAM9263;
 		break;
 	case AT91_CPU_SAM9G10:
 		soc_info.type = AT91_T_SAM9G10;
 		break;
 	case AT91_CPU_SAM9G20:
 		soc_info.type = AT91_T_SAM9G20;
 		break;
 	case AT91_CPU_SAM9G45:
 		soc_info.type = AT91_T_SAM9G45;
 		break;
 	case AT91_CPU_SAM9N12:
 		soc_info.type = AT91_T_SAM9N12;
 		break;
 	case AT91_CPU_SAM9RL64:
 		soc_info.type = AT91_T_SAM9RL;
 		break;
 	case AT91_CPU_SAM9X5:
 		soc_info.type = AT91_T_SAM9X5;
 		break;
 	default:
 		return (0);
 	}
 
 	switch (soc_info.type) {
 	case AT91_T_SAM9G45:
 		switch (soc_info.exid) {
 		case AT91_EXID_SAM9G45:
 			soc_info.subtype = AT91_ST_SAM9G45;
 			break;
 		case AT91_EXID_SAM9G46:
 			soc_info.subtype = AT91_ST_SAM9G46;
 			break;
 		case AT91_EXID_SAM9M10:
 			soc_info.subtype = AT91_ST_SAM9M10;
 			break;
 		case AT91_EXID_SAM9M11:
 			soc_info.subtype = AT91_ST_SAM9M11;
 			break;
 		}
 		break;
 	case AT91_T_SAM9X5:
 		switch (soc_info.exid) {
 		case AT91_EXID_SAM9G15:
 			soc_info.subtype = AT91_ST_SAM9G15;
 			break;
 		case AT91_EXID_SAM9G25:
 			soc_info.subtype = AT91_ST_SAM9G25;
 			break;
 		case AT91_EXID_SAM9G35:
 			soc_info.subtype = AT91_ST_SAM9G35;
 			break;
 		case AT91_EXID_SAM9X25:
 			soc_info.subtype = AT91_ST_SAM9X25;
 			break;
 		case AT91_EXID_SAM9X35:
 			soc_info.subtype = AT91_ST_SAM9X35;
 			break;
 		}
 		break;
 	default:
 		break;
 	}
 	/*
 	 * Disable interrupts in the DBGU unit...
 	 */
 	*(volatile uint32_t *)(AT91_BASE + dbgu_base + USART_IDR) = 0xffffffff;
 
 	/*
 	 * Save the name for later...
 	 */
 	snprintf(soc_info.name, sizeof(soc_info.name), "%s%s%s",
 	    soc_type_name[soc_info.type],
 	    soc_info.subtype == AT91_ST_NONE ? "" : " subtype ",
 	    soc_info.subtype == AT91_ST_NONE ? "" :
 	    soc_subtype_name[soc_info.subtype]);
 
         /*
          * try to get the matching CPU support.
          */
         soc_info.soc_data = at91_match_soc(soc_info.type, soc_info.subtype);
         soc_info.dbgu_base = AT91_BASE + dbgu_base;
 
 	return (1);
 }
 
 void
 at91_soc_id(void)
 {
 
 	if (!at91_try_id(AT91_DBGU0))
 		at91_try_id(AT91_DBGU1);
 }
 
 #ifdef ARM_MANY_BOARD
 /* likely belongs in arm/arm/machdep.c, but since board_init is still at91 only... */
 SET_DECLARE(arm_board_set, const struct arm_board);
 
 /* Not yet fully functional, but enough to build ATMEL config */
 static long
 board_init(void)
 {
 	return -1;
 }
 #endif
 
 #ifndef FDT
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr msgbufpv;
 struct pv_addr kernelstack;
 struct pv_addr systempage;
 struct pv_addr irqstack;
 struct pv_addr abtstack;
 struct pv_addr undstack;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t afterkern;
 	uint32_t memsize;
 	vm_offset_t lastaddr;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)						\
 	alloc_pages((var).pv_va, (np));					\
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (i = 0; i < NUM_KERNEL_PTS; ++i) {
 		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[i],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[i].pv_va = freemempos -
 			    (i % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[i].pv_pa =
 			    kernel_pt_table[i].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to 0x00000000
 	 * or 0xffff0000. This page will just contain the system vectors
 	 * and can be shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU);
 	valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU);
 	valloc_pages(undstack, UND_STACK_SIZE * MAXCPU);
 	valloc_pages(kernelstack, KSTACK_PAGES * MAXCPU);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	for (i = 0; i < KERNEL_PT_KERN_NUM; i++)
 		pmap_link_l2pt(l1pagetable, KERNBASE + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_KERN + i]);
 	pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR,
 	   (((uint32_t)lastaddr - KERNBASE) + PAGE_SIZE) & ~(PAGE_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	afterkern = round_page((lastaddr + L1_S_SIZE) & ~(L1_S_SIZE - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map the DPCPU pages */
 	pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, DPCPU_SIZE,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	/* Map the stack pages */
 	pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
 	    IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
 	    ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
 	    UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
 	    KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa,
 	    msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	for (i = 0; i < NUM_KERNEL_PTS; ++i) {
 		pmap_map_chunk(l1pagetable, kernel_pt_table[i].pv_va,
 		    kernel_pt_table[i].pv_pa, L2_TABLE_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	}
 
 	arm_devmap_bootstrap(l1pagetable, at91_devmap);
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2));
 
 	at91_soc_id();
 
 	/*
 	 * Initialize all the clocks, so that the console can work.  We can only
 	 * do this if at91_soc_id() was able to fill in the support data.  Even
 	 * if we can't init the clocks, still try to do a console init so we can
 	 * try to print the error message about missing soc support.  There's a
 	 * chance the printf will work if the bootloader set up the DBGU.
 	 */
 	if (soc_info.soc_data != NULL) {
 		soc_info.soc_data->soc_clock_init();
 		at91_pmc_init_clock();
 	}
 
 	cninit();
 
 	if (soc_info.soc_data == NULL)
 		printf("Warning: No soc support for %s found.\n", soc_info.name);
 
 	memsize = board_init();
 	if (memsize == -1) {
 		printf("board_init() failed, cannot determine ram size; "
 		    "assuming 16MB\n");
 		memsize = 16 * 1024 * 1024;
 	}
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 	cpu_setup();
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + L1_S_SIZE * (KERNEL_PT_KERN_NUM - 1);
 	/* Always use the 256MB of KVA we have available between the kernel and devices */
 	vm_max_kernel_address = KERNVIRTADDR + (256 << 20);
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel, and all the things we allocated which immediately
 	 * follow the kernel, from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(PHYSADDR, memsize);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 #endif
 
 /*
  * These functions are handled elsewhere, so make them nops here.
  */
 void
 cpu_startprofclock(void)
 {
 
 }
 
 void
 cpu_stopprofclock(void)
 {
 
 }
 
 void
 cpu_initclocks(void)
 {
 
 }
 
 void
 DELAY(int n)
 {
 
 	if (soc_info.soc_data)
 		soc_info.soc_data->soc_delay(n);
 }
 
 void
 cpu_reset(void)
 {
 
 	if (soc_info.soc_data)
 		soc_info.soc_data->soc_reset();
 	while (1)
 		continue;
 }
Index: head/sys/arm/cavium/cns11xx/econa_machdep.c
===================================================================
--- head/sys/arm/cavium/cns11xx/econa_machdep.c	(revision 285626)
+++ head/sys/arm/cavium/cns11xx/econa_machdep.c	(revision 285627)
@@ -1,343 +1,345 @@
 /*-
  * Copyright (c) 2009 Yohanes Nugroho <yohanes@gmail.com>
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #define	_ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/physmem.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 #include "econa_reg.h"
 
 /* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_SYS		0
 #define	KERNEL_PT_KERN		1
 #define	KERNEL_PT_KERN_NUM	22
 /* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL	KERNEL_PT_KERN + KERNEL_PT_KERN_NUM
 #define	KERNEL_PT_AFKERNEL_NUM	5
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define	NUM_KERNEL_PTS	(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 
 /* Static device mappings. */
 static const struct arm_devmap_entry econa_devmap[] = {
 	{
 		/*
 		 * This maps DDR SDRAM
 		 */
 		ECONA_SDRAM_BASE, /*virtual*/
 		ECONA_SDRAM_BASE, /*physical*/
 		ECONA_SDRAM_SIZE, /*size*/
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	/*
 	 * Map the on-board devices VA == PA so that we can access them
 	 * with the MMU on or off.
 	 */
 	{
 		/*
 		 * This maps the interrupt controller, the UART
 		 * and the timer.
 		 */
 		ECONA_IO_BASE, /*virtual*/
 		ECONA_IO_BASE, /*physical*/
 		ECONA_IO_SIZE, /*size*/
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		/*
 		 * OHCI + EHCI
 		 */
 		ECONA_OHCI_VBASE, /*virtual*/
 		ECONA_OHCI_PBASE, /*physical*/
 		ECONA_USB_SIZE, /*size*/
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		/*
 		 * CFI
 		 */
 		ECONA_CFI_VBASE, /*virtual*/
 		ECONA_CFI_PBASE, /*physical*/
 		ECONA_CFI_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		0,
 		0,
 		0,
 		0,
 		0,
 	}
 };
 
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	volatile uint32_t * ddr = (uint32_t *)0x4000000C;
 	int loop, i;
 	u_int l1pagetable;
 	vm_offset_t afterkern;
 	vm_offset_t freemempos;
 	vm_offset_t lastaddr;
 	uint32_t memsize;
 	int mem_info;
 
 	boothowto = RB_VERBOSE;
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
       	init_param1();
 		
 
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 	/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)                   \
 	alloc_pages((var).pv_va, (np));         \
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define	alloc_pages(var, np)			\
 	(var) = freemempos;		\
 	freemempos += (np * PAGE_SIZE);		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_va = freemempos -
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_pa =
 			    kernel_pt_table[loop].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	for (i = 0; i < KERNEL_PT_KERN_NUM; i++)
 		pmap_link_l2pt(l1pagetable, KERNBASE + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_KERN + i]);
 	pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR,
 	   (((uint32_t)lastaddr - KERNBASE) + PAGE_SIZE) & ~(PAGE_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	afterkern = round_page((lastaddr + L1_S_SIZE) & ~(L1_S_SIZE - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 
 	/* Map the stack pages */
 	pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
 	    IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
 	    ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
 	    UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
 	    KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa,
 	    msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va,
 		    kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	}
 
 	arm_devmap_bootstrap(l1pagetable, econa_devmap);
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 	cninit();
 	mem_info = ((*ddr) >> 4) & 0x3;
 	memsize = (8<<mem_info)*1024*1024;
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + L1_S_SIZE * (KERNEL_PT_KERN_NUM - 1);
 	vm_max_kernel_address = KERNVIRTADDR + 3 * memsize;
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel, and all the things we allocated which immediately
 	 * follow the kernel, from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(PHYSADDR, memsize);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
Index: head/sys/arm/samsung/s3c2xx0/s3c24x0_machdep.c
===================================================================
--- head/sys/arm/samsung/s3c2xx0/s3c24x0_machdep.c	(revision 285626)
+++ head/sys/arm/samsung/s3c2xx0/s3c24x0_machdep.c	(revision 285627)
@@ -1,404 +1,405 @@
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include "opt_ddb.h"
+#include "opt_kstack_pages.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/physmem.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/samsung/s3c2xx0/s3c24x0var.h>
 #include <arm/samsung/s3c2xx0/s3c2410reg.h>
 #include <arm/samsung/s3c2xx0/s3c2xx0board.h>
 
 /* Page table for mapping proc0 zero page */
 #define KERNEL_PT_SYS		0
 #define KERNEL_PT_KERN		1	
 #define KERNEL_PT_KERN_NUM	44
 /* L2 table for mapping after kernel */
 #define KERNEL_PT_AFKERNEL	KERNEL_PT_KERN + KERNEL_PT_KERN_NUM
 #define	KERNEL_PT_AFKERNEL_NUM	5
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 extern int s3c2410_pclk;
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 
 #define	_A(a)	((a) & ~L1_S_OFFSET)
 #define	_S(s)	(((s) + L1_S_SIZE - 1) & ~(L1_S_SIZE-1))
 
 /* Static device mappings. */
 static const struct arm_devmap_entry s3c24x0_devmap[] = {
 	/*
 	 * Map the devices we need early on.
 	 */
 	{
 		_A(S3C24X0_CLKMAN_BASE),
 		_A(S3C24X0_CLKMAN_PA_BASE),
 		_S(S3C24X0_CLKMAN_SIZE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		_A(S3C24X0_GPIO_BASE),
 		_A(S3C24X0_GPIO_PA_BASE),
 		_S(S3C2410_GPIO_SIZE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		_A(S3C24X0_INTCTL_BASE),
 		_A(S3C24X0_INTCTL_PA_BASE),
 		_S(S3C24X0_INTCTL_SIZE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		_A(S3C24X0_TIMER_BASE),
 		_A(S3C24X0_TIMER_PA_BASE),
 		_S(S3C24X0_TIMER_SIZE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		_A(S3C24X0_UART0_BASE),
 		_A(S3C24X0_UART0_PA_BASE),
 		_S(S3C24X0_UART_PA_BASE(3) - S3C24X0_UART0_PA_BASE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		_A(S3C24X0_WDT_BASE),
 		_A(S3C24X0_WDT_PA_BASE),
 		_S(S3C24X0_WDT_SIZE),
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		0,
 		0,
 		0,
 		0,
 		0,
 	}
 };
 
 #undef	_A
 #undef	_S
 
 #define	ioreg_read32(a)  	(*(volatile uint32_t *)(a))
 #define	ioreg_write32(a,v)	(*(volatile uint32_t *)(a)=(v))
 
 struct arm32_dma_range s3c24x0_range = {
 	.dr_sysbase = 0,
 	.dr_busbase = 0,
 	.dr_len = 0,
 };
 
 struct arm32_dma_range *
 bus_dma_get_range(void)
 {
 
 	if (s3c24x0_range.dr_len == 0) {
 		s3c24x0_range.dr_sysbase = dump_avail[0];
 		s3c24x0_range.dr_busbase = dump_avail[0];
 		s3c24x0_range.dr_len = dump_avail[1] - dump_avail[0];
 	}
 	return (&s3c24x0_range);
 }
 
 int
 bus_dma_get_range_nb(void)
 {
 	return (1);
 }
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr	kernel_l1pt;
 	int loop;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t afterkern;
 	vm_offset_t lastaddr;
 
 	int i;
 	uint32_t memsize;
 
 	boothowto = 0;  /* Likely not needed */
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	i = 0;
 	set_cpufuncs();
 	cpufuncs.cf_sleep = s3c24x0_sleep;
 
 	pcpu0_init();
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 #define KERNEL_TEXT_BASE (KERNBASE)
 	freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK;
 	/* Define a macro to simplify memory allocation */
 #define valloc_pages(var, np)			\
 	alloc_pages((var).pv_va, (np));		\
 	(var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR);
 
 #define alloc_pages(var, np)			\
 	(var) = freemempos;			\
 	freemempos += (np * PAGE_SIZE);		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos += PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_va = freemempos -
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_pa =
 			    kernel_pt_table[loop].pv_va - KERNVIRTADDR +
 			    abp->abp_physaddr;
 		}
 	}
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH,
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	for (i = 0; i < KERNEL_PT_KERN_NUM; i++)
 		pmap_link_l2pt(l1pagetable, KERNBASE + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_KERN + i]);
 	pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR,
 	   (((uint32_t)(lastaddr) - KERNBASE) + PAGE_SIZE) & ~(PAGE_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	afterkern = round_page((lastaddr + L1_S_SIZE) & ~(L1_S_SIZE
 	    - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * L1_S_SIZE,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	/* Map the stack pages */
 	pmap_map_chunk(l1pagetable, irqstack.pv_va, irqstack.pv_pa,
 	    IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, abtstack.pv_va, abtstack.pv_pa,
 	    ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, undstack.pv_va, undstack.pv_pa,
 	    UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, kernelstack.pv_va, kernelstack.pv_pa,
 	    KSTACK_PAGES * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
 	    L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, msgbufpv.pv_va, msgbufpv.pv_pa,
 	    msgbufsize, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		pmap_map_chunk(l1pagetable, kernel_pt_table[loop].pv_va,
 		    kernel_pt_table[loop].pv_pa, L2_TABLE_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	}
 
 	arm_devmap_bootstrap(l1pagetable, s3c24x0_devmap);
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 
 	cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE);
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	/* Disable all peripheral interrupts */
 	ioreg_write32(S3C24X0_INTCTL_BASE + INTCTL_INTMSK, ~0);
 	memsize = board_init();
 	/* Find pclk for uart */
 	switch(ioreg_read32(S3C24X0_GPIO_BASE + GPIO_GSTATUS1) >> 16) {
 	case 0x3241:
 		s3c2410_clock_freq2(S3C24X0_CLKMAN_BASE, NULL, NULL,
 		    &s3c2410_pclk);
 		break;
 	case 0x3244:
 		s3c2440_clock_freq2(S3C24X0_CLKMAN_BASE, NULL, NULL,
 		    &s3c2410_pclk);
 		break;
 	}
 	cninit();
 
 	undefined_init();
 	
 	init_proc0(kernelstack.pv_va);			
 	
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + 0x100000 * (KERNEL_PT_KERN_NUM - 1);
 	vm_max_kernel_address = KERNVIRTADDR + 3 * memsize;
 	pmap_bootstrap(freemempos, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel, and all the things we allocated which immediately
 	 * follow the kernel, from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(PHYSADDR, memsize);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
Index: head/sys/arm/xscale/i80321/ep80219_machdep.c
===================================================================
--- head/sys/arm/xscale/i80321/ep80219_machdep.c	(revision 285626)
+++ head/sys/arm/xscale/i80321/ep80219_machdep.c	(revision 285627)
@@ -1,400 +1,402 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <machine/physmem.h>
 #include <sys/reboot.h>
 
 #include <arm/xscale/i80321/i80321reg.h>
 #include <arm/xscale/i80321/i80321var.h>
 #include <arm/xscale/i80321/iq80321reg.h>
 #include <arm/xscale/i80321/obiovar.h>
 
 #define	KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_IOPXS		1
 #define	KERNEL_PT_BEFOREKERN	2
 #define	KERNEL_PT_AFKERNEL	3	/* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL_NUM	9
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 struct pv_addr minidataclean;
 
 
 /* #define IQ80321_OBIO_BASE 0xfe800000UL */
 /* #define IQ80321_OBIO_SIZE 0x00100000UL */
 
 /* Static device mappings. */
 static const struct arm_devmap_entry ep80219_devmap[] = {
 	/*
 	 * Map the on-board devices VA == PA so that we can access them
 	 * with the MMU on or off.
 	 */
 	{
 		IQ80321_OBIO_BASE,
 		IQ80321_OBIO_BASE,
 		IQ80321_OBIO_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		IQ80321_IOW_VBASE,
 		VERDE_OUT_XLATE_IO_WIN0_BASE,
 		VERDE_OUT_XLATE_IO_WIN_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},	
 	{
 		IQ80321_80321_VBASE,
 		VERDE_PMMR_BASE,
 		VERDE_PMMR_SIZE,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{
 		0,
 		0,
 		0,
 		0,
 		0,
 	}
 };
 
 extern vm_offset_t xscale_cache_clean_addr;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int loop, i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t freemem_pt;
 	vm_offset_t afterkern;
 	vm_offset_t freemem_after;
 	vm_offset_t lastaddr;
 	uint32_t memsize, memstart;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = 0xa0200000;
 	/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)			\
 	alloc_pages((var).pv_pa, (np));		\
 	(var).pv_va = (var).pv_pa + 0x20000000;
 
 #define alloc_pages(var, np)			\
 	freemempos -= (np * PAGE_SIZE);		\
 	(var) = freemempos;		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos -= PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa + 0x20000000;
 		}
 	}
 	freemem_pt = freemempos;
 	freemempos = 0xa0100000;
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	alloc_pages(minidataclean.pv_pa, 1);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	/*
 	 * Allocate memory for the l1 and l2 page tables. The scheme to avoid
 	 * wasting memory by allocating the l1pt on the first 16k memory was
 	 * taken from NetBSD rpc_machdep.c. NKPT should be greater than 12 for
 	 * this to work (which is supposed to be the case).
 	 */
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1),
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	pmap_link_l2pt(l1pagetable, IQ80321_IOPXS_VBASE,
 	    &kernel_pt_table[KERNEL_PT_IOPXS]);
 	pmap_link_l2pt(l1pagetable, KERNBASE,
 	    &kernel_pt_table[KERNEL_PT_BEFOREKERN]);
 	pmap_map_chunk(l1pagetable, KERNBASE, IQ80321_SDRAM_START, 0x100000,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, IQ80321_SDRAM_START + 0x100000,
 	    0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x200000, IQ80321_SDRAM_START + 0x200000,
 	    (((uint32_t)(lastaddr) - KERNBASE - 0x200000) + L1_S_SIZE) & ~(L1_S_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	freemem_after = ((int)lastaddr + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 	afterkern = round_page(((vm_offset_t)lastaddr + L1_S_SIZE) & ~(L1_S_SIZE
 	    - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 	pmap_map_entry(l1pagetable, afterkern, minidataclean.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	
 
 	/* Map the Mini-Data cache clean area. */
 	xscale_setup_minidata(l1pagetable, afterkern,
 	    minidataclean.pv_pa);
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	arm_devmap_bootstrap(l1pagetable, ep80219_devmap);
 	/*
 	 * Give the XScale global cache clean code an appropriately
 	 * sized chunk of unmapped VA space starting at 0xff000000
 	 * (our device mappings end before this address).
 	 */
 	xscale_cache_clean_addr = 0xff000000U;
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	/*
 	 * Fetch the SDRAM start/size from the i80321 SDRAM configration
 	 * registers.
 	 */
 	i80321_calibrate_delay();
 	i80321_sdram_bounds(obio_bs_tag, IQ80321_80321_VBASE + VERDE_MCU_BASE,
 	    &memstart, &memsize);
 	physmem = memsize / PAGE_SIZE;
 	cninit();
 
 	undefined_init();
 				
 	init_proc0(kernelstack.pv_va);
 	
 	/* Enable MMU, I-cache, D-cache, write buffer. */
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	vm_max_kernel_address = 0xe0000000;
 	pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(IQ80321_SDRAM_START, memsize);
 	arm_physmem_exclude_region(freemem_pt, KERNPHYSADDR -
 	    freemem_pt, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(freemempos, KERNPHYSADDR - 0x100000 -
 	    freemempos, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 
 extern int
 machdep_pci_route_interrupt(device_t pcib, device_t dev, int pin)
 {
 	int bus;
 	int device;
 	int func;
 	uint32_t busno;
 	struct i80321_pci_softc *sc = device_get_softc(pcib);
 	bus = pci_get_bus(dev);
 	device = pci_get_slot(dev);
 	func = pci_get_function(dev);
 	busno = bus_space_read_4(sc->sc_st, sc->sc_atu_sh, ATU_PCIXSR);
 	busno = PCIXSR_BUSNO(busno);
 	if (busno == 0xff)
 		busno = 0;
 	if (bus != busno)
 		goto no_mapping;
 	switch (device) {
 		/* EP80219 PCI */
 	case 1: /* Ethernet i82555 10/100 */
 		printf("Device %d routed to irq %d\n", device, ICU_INT_XINT(0));
 		return (ICU_INT_XINT(0));
 	case 2: /* UART */
 		printf("Device %d routed to irq %d\n", device, ICU_INT_XINT(1));
 		return (ICU_INT_XINT(1));
 	case 3:
 		/*
 		 * The S-ATA chips are behind the bridge, and all of
 		 * the S-ATA interrupts are wired together.
 		 */
 		printf("Device %d routed to irq %d\n", device, ICU_INT_XINT(2));
 		return (ICU_INT_XINT(2));
 	case 4: /* MINI-PIC_INT */
 		printf("Device %d routed to irq %d\n", device, ICU_INT_XINT(3));
 		return( ICU_INT_XINT(3));
 	default:
 no_mapping:
 		printf("No mapping for %d/%d/%d/%c\n", bus, device, func, pin);
 		
 	}
 	return (0);
 
 }
Index: head/sys/arm/xscale/i80321/iq31244_machdep.c
===================================================================
--- head/sys/arm/xscale/i80321/iq31244_machdep.c	(revision 285626)
+++ head/sys/arm/xscale/i80321/iq31244_machdep.c	(revision 285627)
@@ -1,414 +1,416 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 #include <machine/physmem.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/xscale/i80321/i80321reg.h>
 #include <arm/xscale/i80321/i80321var.h>
 #include <arm/xscale/i80321/iq80321reg.h>
 #include <arm/xscale/i80321/obiovar.h>
 
 #define	KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_IOPXS		1
 #define	KERNEL_PT_BEFOREKERN	2
 #define	KERNEL_PT_AFKERNEL	3	/* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL_NUM	9
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 struct pv_addr minidataclean;
 
 #define IQ80321_OBIO_BASE 0xfe800000UL
 #define IQ80321_OBIO_SIZE 0x00100000UL
 /* Static device mappings. */
 static const struct arm_devmap_entry iq80321_devmap[] = {
 	/*
 	 * Map the on-board devices VA == PA so that we can access them
 	 * with the MMU on or off.
 	 */
 	    {
 		    IQ80321_OBIO_BASE,
 		    IQ80321_OBIO_BASE,
 		    IQ80321_OBIO_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	    {
 	    	    IQ80321_IOW_VBASE,
 		    VERDE_OUT_XLATE_IO_WIN0_BASE,
 		    VERDE_OUT_XLATE_IO_WIN_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	
 	    {
 		    IQ80321_80321_VBASE,
 		    VERDE_PMMR_BASE,
 		    VERDE_PMMR_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	    {
 		    0,
 		    0,
 		    0,
 		    0,
 		    0,
 	    }
 };
 
 #define SDRAM_START 0xa0000000
 
 extern vm_offset_t xscale_cache_clean_addr;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int loop, i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t freemem_pt;
 	vm_offset_t afterkern;
 	vm_offset_t freemem_after;
 	vm_offset_t lastaddr;
 	uint32_t memsize, memstart;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = 0xa0200000;
 	/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)			\
 	alloc_pages((var).pv_pa, (np));		\
 	(var).pv_va = (var).pv_pa + 0x20000000;
 
 #define alloc_pages(var, np)			\
 	freemempos -= (np * PAGE_SIZE);		\
 	(var) = freemempos;		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos -= PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa + 0x20000000;
 		}
 	}
 	freemem_pt = freemempos;
 	freemempos = 0xa0100000;
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	alloc_pages(minidataclean.pv_pa, 1);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	/*
 	 * Allocate memory for the l1 and l2 page tables. The scheme to avoid
 	 * wasting memory by allocating the l1pt on the first 16k memory was
 	 * taken from NetBSD rpc_machdep.c. NKPT should be greater than 12 for
 	 * this to work (which is supposed to be the case).
 	 */
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1),
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	pmap_link_l2pt(l1pagetable, IQ80321_IOPXS_VBASE,
 	    &kernel_pt_table[KERNEL_PT_IOPXS]);
 	pmap_link_l2pt(l1pagetable, KERNBASE,
 	    &kernel_pt_table[KERNEL_PT_BEFOREKERN]);
 	pmap_map_chunk(l1pagetable, KERNBASE, SDRAM_START, 0x100000,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, SDRAM_START + 0x100000,
 	    0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x200000, SDRAM_START + 0x200000,
 	    (((uint32_t)(lastaddr) - KERNBASE - 0x200000) + L1_S_SIZE) & ~(L1_S_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	freemem_after = ((int)lastaddr + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 	afterkern = round_page(((vm_offset_t)lastaddr + L1_S_SIZE) & ~(L1_S_SIZE
 	    - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 	pmap_map_entry(l1pagetable, afterkern, minidataclean.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	
 
 	/* Map the Mini-Data cache clean area. */
 	xscale_setup_minidata(l1pagetable, afterkern,
 	    minidataclean.pv_pa);
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	arm_devmap_bootstrap(l1pagetable, iq80321_devmap);
 	/*
 	 * Give the XScale global cache clean code an appropriately
 	 * sized chunk of unmapped VA space starting at 0xff000000
 	 * (our device mappings end before this address).
 	 */
 	xscale_cache_clean_addr = 0xff000000U;
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	/*
 	 * Fetch the SDRAM start/size from the i80321 SDRAM configration
 	 * registers.
 	 */
 	i80321_calibrate_delay();
 	i80321_sdram_bounds(obio_bs_tag, IQ80321_80321_VBASE + VERDE_MCU_BASE,
 	    &memstart, &memsize);
 	physmem = memsize / PAGE_SIZE;
 	cninit();
 
 	undefined_init();
 				
 	init_proc0(kernelstack.pv_va);
 	
 	/* Enable MMU, I-cache, D-cache, write buffer. */
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 	pmap_curmaxkvaddr = afterkern + PAGE_SIZE;
 	vm_max_kernel_address = 0xe0000000;
 	pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(SDRAM_START, memsize);
 	arm_physmem_exclude_region(freemem_pt, KERNPHYSADDR -
 	    freemem_pt, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(freemempos, KERNPHYSADDR - 0x100000 -
 	    freemempos, EXFLAG_NOALLOC);			
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 
 extern int
 machdep_pci_route_interrupt(device_t pcib, device_t dev, int pin)
 {
 	int bus;
 	int device;
 	int func;
 	uint32_t busno;
 	struct i80321_pci_softc *sc = device_get_softc(pcib);
 	bus = pci_get_bus(dev);
 	device = pci_get_slot(dev);
 	func = pci_get_function(dev);
 	busno = bus_space_read_4(sc->sc_st, sc->sc_atu_sh, ATU_PCIXSR);
 	busno = PCIXSR_BUSNO(busno);
 	if (busno == 0xff)
 		busno = 0;
 	if (bus != busno)
 		goto no_mapping;
 	switch (device) {
 		/* IQ31244 PCI */
 	case 1: /* PCIX-PCIX bridge */
 		/*
 		 * The S-ATA chips are behind the bridge, and all of
 		 * the S-ATA interrupts are wired together.
 		 */
 		return (ICU_INT_XINT(2));
 	case 2: /* PCI slot */
 		/* All pins are wired together. */
 		return (ICU_INT_XINT(3));
 	case 3: /* i82546 dual Gig-E */
 		if (pin == 1 || pin == 2)
 			return (ICU_INT_XINT(0));
 		goto no_mapping;
 		/* IQ80321 PCI */
 	case 4: /* i82544 Gig-E */
 	case 8: /*
 		 * Apparently you can set the device for the ethernet adapter
 		 * to 8 with a jumper, so handle that as well
 		 */
 		if (pin == 1)
 			return (ICU_INT_XINT(0));
 		goto no_mapping;
 	case 6: /* S-PCI-X slot */
 		if (pin == 1)
 			return (ICU_INT_XINT(2));
 		if (pin == 2)
 			return (ICU_INT_XINT(3));
 		goto no_mapping;
 	default:
 no_mapping:
 		printf("No mapping for %d/%d/%d/%c\n", bus, device, func, pin);
 		
 	}
 	return (0);
 
 }
Index: head/sys/arm/xscale/i8134x/crb_machdep.c
===================================================================
--- head/sys/arm/xscale/i8134x/crb_machdep.c	(revision 285626)
+++ head/sys/arm/xscale/i8134x/crb_machdep.c	(revision 285627)
@@ -1,338 +1,340 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <machine/physmem.h>
 #include <sys/reboot.h>
 
 
 #include <arm/xscale/i80321/i80321var.h> /* For i80321_calibrate_delay() */
 
 #include <arm/xscale/i8134x/i81342reg.h>
 #include <arm/xscale/i8134x/i81342var.h>
 #include <arm/xscale/i8134x/obiovar.h>
 
 
 #define KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_IOPXS		1
 #define KERNEL_PT_BEFOREKERN	2
 #define KERNEL_PT_AFKERNEL	3	/* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL_NUM	9
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 
 /* Static device mappings. */
 static const struct arm_devmap_entry iq81342_devmap[] = {
 	    {
 		    IOP34X_VADDR,
 		    IOP34X_HWADDR,
 		    IOP34X_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	    {
 		    /*
 		     * Cheat and map a whole section, this will bring
 		     * both PCI-X and PCI-E outbound I/O
 		     */
 		    IOP34X_PCIX_OIOBAR_VADDR &~ (0x100000 - 1),
 		    IOP34X_PCIX_OIOBAR &~ (0x100000 - 1),
 		    0x100000,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	    {
 		    IOP34X_PCE1_VADDR,
 		    IOP34X_PCE1,
 		    IOP34X_PCE1_SIZE,
 		    VM_PROT_READ|VM_PROT_WRITE,
 		    PTE_DEVICE,
 	    },
 	    {	
 		    0,
 		    0,
 		    0,
 		    0,
 		    0,
 	    }
 };
 
 #define SDRAM_START 0x00000000
 
 extern vm_offset_t xscale_cache_clean_addr;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int loop, i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t freemem_pt;
 	vm_offset_t afterkern;
 	vm_offset_t freemem_after;
 	vm_offset_t lastaddr;
 	uint32_t memsize, memstart;
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = 0x00200000;
 	/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)			\
 	alloc_pages((var).pv_pa, (np));		\
 	(var).pv_va = (var).pv_pa + 0xc0000000;
 
 #define alloc_pages(var, np)			\
 	freemempos -= (np * PAGE_SIZE);		\
 	(var) = freemempos;		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos -= PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa + 0xc0000000;
 		}
 	}
 	freemem_pt = freemempos;
 	freemempos = 0x00100000;
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1),
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	pmap_map_chunk(l1pagetable, KERNBASE, SDRAM_START, 0x100000,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, SDRAM_START + 0x100000,
 	    0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x200000, SDRAM_START + 0x200000,
 	   (((uint32_t)(lastaddr) - KERNBASE - 0x200000) + L1_S_SIZE) & ~(L1_S_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	freemem_after = ((int)lastaddr + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 	afterkern = round_page(((vm_offset_t)lastaddr + L1_S_SIZE) & ~(L1_S_SIZE
 	    - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 	
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	arm_devmap_bootstrap(l1pagetable, iq81342_devmap);
 	/*
 	 * Give the XScale global cache clean code an appropriately
 	 * sized chunk of unmapped VA space starting at 0xff000000
 	 * (our device mappings end before this address).
 	 */
 	xscale_cache_clean_addr = 0xff000000U;
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	i80321_calibrate_delay();
 	i81342_sdram_bounds(obio_bs_tag, IOP34X_VADDR, &memstart, &memsize);
 	physmem = memsize / PAGE_SIZE;
 	cninit();
 	/* Set stack for exception handlers */
 	
 	undefined_init();
 				
 	init_proc0(kernelstack.pv_va);
 	
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + PAGE_SIZE;
 
 	vm_max_kernel_address = 0xe0000000;
 	pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(SDRAM_START, memsize);
 	arm_physmem_exclude_region(freemem_pt, KERNPHYSADDR -
 	    freemem_pt, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(freemempos, KERNPHYSADDR - 0x100000 -
 	    freemempos, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
Index: head/sys/arm/xscale/ixp425/avila_machdep.c
===================================================================
--- head/sys/arm/xscale/ixp425/avila_machdep.c	(revision 285626)
+++ head/sys/arm/xscale/ixp425/avila_machdep.c	(revision 285627)
@@ -1,435 +1,437 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/physmem.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <sys/reboot.h>
 
 #include <arm/xscale/ixp425/ixp425reg.h>
 #include <arm/xscale/ixp425/ixp425var.h>
 
 #define KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_IO		1
 #define KERNEL_PT_IO_NUM	3
 #define KERNEL_PT_BEFOREKERN	KERNEL_PT_IO + KERNEL_PT_IO_NUM
 #define KERNEL_PT_AFKERNEL	KERNEL_PT_BEFOREKERN + 1	/* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL_NUM	9
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 struct pv_addr minidataclean;
 
 /* Static device mappings. */
 static const struct arm_devmap_entry ixp425_devmap[] = {
 	/* Physical/Virtual address for I/O space */
     { IXP425_IO_VBASE, IXP425_IO_HWBASE, IXP425_IO_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* Expansion Bus */
     { IXP425_EXP_VBASE, IXP425_EXP_HWBASE, IXP425_EXP_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* CFI Flash on the Expansion Bus */
     { IXP425_EXP_BUS_CS0_VBASE, IXP425_EXP_BUS_CS0_HWBASE,
       IXP425_EXP_BUS_CS0_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* IXP425 PCI Configuration */
     { IXP425_PCI_VBASE, IXP425_PCI_HWBASE, IXP425_PCI_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* SDRAM Controller */
     { IXP425_MCU_VBASE, IXP425_MCU_HWBASE, IXP425_MCU_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* PCI Memory Space */
     { IXP425_PCI_MEM_VBASE, IXP425_PCI_MEM_HWBASE, IXP425_PCI_MEM_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* Q-Mgr Memory Space */
     { IXP425_QMGR_VBASE, IXP425_QMGR_HWBASE, IXP425_QMGR_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
     { 0 },
 };
 
 /* Static device mappings. */
 static const struct arm_devmap_entry ixp435_devmap[] = {
 	/* Physical/Virtual address for I/O space */
     { IXP425_IO_VBASE, IXP425_IO_HWBASE, IXP425_IO_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
     { IXP425_EXP_VBASE, IXP425_EXP_HWBASE, IXP425_EXP_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* IXP425 PCI Configuration */
     { IXP425_PCI_VBASE, IXP425_PCI_HWBASE, IXP425_PCI_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* DDRII Controller NB: mapped same place as IXP425 */
     { IXP425_MCU_VBASE, IXP435_MCU_HWBASE, IXP425_MCU_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* PCI Memory Space */
     { IXP425_PCI_MEM_VBASE, IXP425_PCI_MEM_HWBASE, IXP425_PCI_MEM_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* Q-Mgr Memory Space */
     { IXP425_QMGR_VBASE, IXP425_QMGR_HWBASE, IXP425_QMGR_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* CFI Flash on the Expansion Bus */
     { IXP425_EXP_BUS_CS0_VBASE, IXP425_EXP_BUS_CS0_HWBASE,
       IXP425_EXP_BUS_CS0_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* USB1 Memory Space */
     { IXP435_USB1_VBASE, IXP435_USB1_HWBASE, IXP435_USB1_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 	/* USB2 Memory Space */
     { IXP435_USB2_VBASE, IXP435_USB2_HWBASE, IXP435_USB2_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* GPS Memory Space */
     { CAMBRIA_GPS_VBASE, CAMBRIA_GPS_HWBASE, CAMBRIA_GPS_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
 	/* RS485 Memory Space */
     { CAMBRIA_RS485_VBASE, CAMBRIA_RS485_HWBASE, CAMBRIA_RS485_SIZE,
       VM_PROT_READ|VM_PROT_WRITE, PTE_DEVICE, },
 
     { 0 }
 };
 
 extern vm_offset_t xscale_cache_clean_addr;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 #define	next_chunk2(a,b)	(((a) + (b)) &~ ((b)-1))
 #define	next_page(a)		next_chunk2(a,PAGE_SIZE)
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int loop, i;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t freemem_pt;
 	vm_offset_t afterkern;
 	vm_offset_t freemem_after;
 	vm_offset_t lastaddr;
 	uint32_t memsize;
 
 	/* kernel text starts where we were loaded at boot */
 #define	KERNEL_TEXT_OFF		(abp->abp_physaddr  - PHYSADDR)
 #define	KERNEL_TEXT_BASE	(KERNBASE + KERNEL_TEXT_OFF)
 #define	KERNEL_TEXT_PHYS	(PHYSADDR + KERNEL_TEXT_OFF)
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();		/* NB: sets cputype */
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 
 	if (envmode == 1)
 		kern_envp = static_env;
 	/* Do basic tuning, hz etc */
       	init_param1();
 		
 	/*
 	 * We allocate memory downwards from where we were loaded
 	 * by RedBoot; first the L1 page table, then NUM_KERNEL_PTS
 	 * entries in the L2 page table.  Past that we re-align the
 	 * allocation boundary so later data structures (stacks, etc)
 	 * can be mapped with different attributes (write-back vs
 	 * write-through).  Note this leaves a gap for expansion
 	 * (or might be repurposed).
 	 */
 	freemempos = abp->abp_physaddr;
 
 	/* macros to simplify initial memory allocation */
 #define alloc_pages(var, np) do {					\
 	freemempos -= (np * PAGE_SIZE);					\
 	(var) = freemempos;						\
 	/* NB: this works because locore maps PA=VA */			\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));			\
 } while (0)
 #define	valloc_pages(var, np) do {					\
 	alloc_pages((var).pv_pa, (np));					\
 	(var).pv_va = (var).pv_pa + (KERNVIRTADDR - abp->abp_physaddr);	\
 } while (0)
 
 	/* force L1 page table alignment */
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos -= PAGE_SIZE;
 	/* allocate contiguous L1 page table */
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	/* now allocate L2 page tables; they are linked to L1 below */
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa +
 				(KERNVIRTADDR - abp->abp_physaddr);
 		}
 	}
 	freemem_pt = freemempos;		/* base of allocated pt's */
 
 	/*
 	 * Re-align allocation boundary so we can map the area
 	 * write-back instead of write-through for the stacks and
 	 * related structures allocated below.
 	 */
 	freemempos = PHYSADDR + 0x100000;
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	alloc_pages(minidataclean.pv_pa, 1);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 
 	/*
 	 * Now construct the L1 page table.  First map the L2
 	 * page tables into the L1 so we can replace L1 mappings
 	 * later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1),
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 	pmap_link_l2pt(l1pagetable, IXP425_IO_VBASE,
 	    &kernel_pt_table[KERNEL_PT_IO]);
 	pmap_link_l2pt(l1pagetable, IXP425_MCU_VBASE,
 	    &kernel_pt_table[KERNEL_PT_IO + 1]);
 	pmap_link_l2pt(l1pagetable, IXP425_PCI_MEM_VBASE,
 	    &kernel_pt_table[KERNEL_PT_IO + 2]);
 	pmap_link_l2pt(l1pagetable, KERNBASE,
 	    &kernel_pt_table[KERNEL_PT_BEFOREKERN]);
 	pmap_map_chunk(l1pagetable, KERNBASE, PHYSADDR, 0x100000,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, PHYSADDR + 0x100000,
 	    0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, KERNEL_TEXT_BASE, KERNEL_TEXT_PHYS,
 	    next_chunk2(((uint32_t)lastaddr) - KERNEL_TEXT_BASE, L1_S_SIZE),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	freemem_after = next_page((int)lastaddr);
 	afterkern = round_page(next_chunk2((vm_offset_t)lastaddr, L1_S_SIZE));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 	pmap_map_entry(l1pagetable, afterkern, minidataclean.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 
 	/* Map the Mini-Data cache clean area. */
 	xscale_setup_minidata(l1pagetable, afterkern,
 	    minidataclean.pv_pa);
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	if (cpu_is_ixp43x())
 		arm_devmap_bootstrap(l1pagetable, ixp435_devmap);
 	else
 		arm_devmap_bootstrap(l1pagetable, ixp425_devmap);
 	/*
 	 * Give the XScale global cache clean code an appropriately
 	 * sized chunk of unmapped VA space starting at 0xff000000
 	 * (our device mappings end before this address).
 	 */
 	xscale_cache_clean_addr = 0xff000000U;
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	/* ready to setup the console (XXX move earlier if possible) */
 	cninit();
 	/*
 	 * Fetch the RAM size from the MCU registers.  The
 	 * expansion bus was mapped above so we can now read 'em.
 	 */
 	if (cpu_is_ixp43x())
 		memsize = ixp435_ddram_size();
 	else
 		memsize = ixp425_sdram_size();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + PAGE_SIZE;
 	vm_max_kernel_address = 0xe0000000;
 	pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel, and all the things we allocated which immediately
 	 * follow the kernel, from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	arm_physmem_hardware_region(PHYSADDR, memsize);
 	arm_physmem_exclude_region(freemem_pt, KERNPHYSADDR -
 	    freemem_pt, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(freemempos, KERNPHYSADDR - 0x100000 -
 	    freemempos, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 
 	/* use static kernel environment if so configured */
 	if (envmode == 1)
 		kern_envp = static_env;
 
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 #undef next_page
 #undef next_chunk2
 }
Index: head/sys/arm/xscale/pxa/pxa_machdep.c
===================================================================
--- head/sys/arm/xscale/pxa/pxa_machdep.c	(revision 285626)
+++ head/sys/arm/xscale/pxa/pxa_machdep.c	(revision 285627)
@@ -1,439 +1,440 @@
 /*	$NetBSD: hpc_machdep.c,v 1.70 2003/09/16 08:18:22 agc Exp $	*/
 
 /*-
  * Copyright (c) 1994-1998 Mark Brinicombe.
  * Copyright (c) 1994 Brini.
  * All rights reserved.
  *
  * This code is derived from software written for Brini by Mark Brinicombe
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Brini.
  * 4. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * RiscBSD kernel project
  *
  * machdep.c
  *
  * Machine dependant functions for kernel setup
  *
  * This file needs a lot of work.
  *
  * Created      : 17/09/94
  */
 
 #include "opt_ddb.h"
+#include "opt_kstack_pages.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _ARM32_BUS_DMA_PRIVATE
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/cons.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/buf.h>
 #include <sys/exec.h>
 #include <sys/kdb.h>
 #include <sys/msgbuf.h>
 #include <machine/reg.h>
 #include <machine/cpu.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <machine/devmap.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <machine/undefined.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/armreg.h>
 #include <machine/bus.h>
 #include <machine/physmem.h>
 #include <sys/reboot.h>
 
 #include <arm/xscale/pxa/pxareg.h>
 #include <arm/xscale/pxa/pxavar.h>
 
 #define KERNEL_PT_SYS		0	/* Page table for mapping proc0 zero page */
 #define	KERNEL_PT_IOPXS		1
 #define KERNEL_PT_BEFOREKERN	2
 #define KERNEL_PT_AFKERNEL	3	/* L2 table for mapping after kernel */
 #define	KERNEL_PT_AFKERNEL_NUM	9
 
 /* this should be evenly divisable by PAGE_SIZE / L2_TABLE_SIZE_REAL (or 4) */
 #define NUM_KERNEL_PTS		(KERNEL_PT_AFKERNEL + KERNEL_PT_AFKERNEL_NUM)
 
 struct pv_addr kernel_pt_table[NUM_KERNEL_PTS];
 
 /* Physical and virtual addresses for some global pages */
 
 struct pv_addr systempage;
 struct pv_addr msgbufpv;
 struct pv_addr irqstack;
 struct pv_addr undstack;
 struct pv_addr abtstack;
 struct pv_addr kernelstack;
 struct pv_addr minidataclean;
 
 static void	pxa_probe_sdram(bus_space_tag_t, bus_space_handle_t,
 		    uint32_t *, uint32_t *);
 
 /* Static device mappings. */
 static const struct arm_devmap_entry pxa_devmap[] = {
 	/*
 	 * Map the on-board devices up into the KVA region so we don't muck
 	 * up user-space.
 	 */
 	{
 		PXA2X0_PERIPH_START + PXA2X0_PERIPH_OFFSET,
 		PXA2X0_PERIPH_START,
 		PXA250_PERIPH_END - PXA2X0_PERIPH_START,
 		VM_PROT_READ|VM_PROT_WRITE,
 		PTE_DEVICE,
 	},
 	{ 0, 0, 0, 0, 0, }
 };
 
 #define SDRAM_START 0xa0000000
 
 extern vm_offset_t xscale_cache_clean_addr;
 
 void *
 initarm(struct arm_boot_params *abp)
 {
 	struct pv_addr  kernel_l1pt;
 	struct pv_addr  dpcpu;
 	int loop;
 	u_int l1pagetable;
 	vm_offset_t freemempos;
 	vm_offset_t freemem_pt;
 	vm_offset_t afterkern;
 	vm_offset_t freemem_after;
 	vm_offset_t lastaddr;
 	int i, j;
 	uint32_t memsize[PXA2X0_SDRAM_BANKS], memstart[PXA2X0_SDRAM_BANKS];
 
 	lastaddr = parse_boot_param(abp);
 	arm_physmem_kernaddr = abp->abp_physaddr;
 	set_cpufuncs();
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	freemempos = 0xa0200000;
 	/* Define a macro to simplify memory allocation */
 #define	valloc_pages(var, np)			\
 	alloc_pages((var).pv_pa, (np));		\
 	(var).pv_va = (var).pv_pa + 0x20000000;
 
 #define alloc_pages(var, np)			\
 	freemempos -= (np * PAGE_SIZE);		\
 	(var) = freemempos;		\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0)
 		freemempos -= PAGE_SIZE;
 	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
 	for (loop = 0; loop < NUM_KERNEL_PTS; ++loop) {
 		if (!(loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
 			valloc_pages(kernel_pt_table[loop],
 			    L2_TABLE_SIZE / PAGE_SIZE);
 		} else {
 			kernel_pt_table[loop].pv_pa = freemempos +
 			    (loop % (PAGE_SIZE / L2_TABLE_SIZE_REAL)) *
 			    L2_TABLE_SIZE_REAL;
 			kernel_pt_table[loop].pv_va =
 			    kernel_pt_table[loop].pv_pa + 0x20000000;
 		}
 	}
 	freemem_pt = freemempos;
 	freemempos = 0xa0100000;
 	/*
 	 * Allocate a page for the system page mapped to V0x00000000
 	 * This page will just contain the system vectors and can be
 	 * shared by all processes.
 	 */
 	valloc_pages(systempage, 1);
 
 	/* Allocate dynamic per-cpu area. */
 	valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu.pv_va, 0);
 
 	/* Allocate stacks for all modes */
 	valloc_pages(irqstack, IRQ_STACK_SIZE);
 	valloc_pages(abtstack, ABT_STACK_SIZE);
 	valloc_pages(undstack, UND_STACK_SIZE);
 	valloc_pages(kernelstack, KSTACK_PAGES);
 	alloc_pages(minidataclean.pv_pa, 1);
 	valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	/*
 	 * Allocate memory for the l1 and l2 page tables. The scheme to avoid
 	 * wasting memory by allocating the l1pt on the first 16k memory was
 	 * taken from NetBSD rpc_machdep.c. NKPT should be greater than 12 for
 	 * this to work (which is supposed to be the case).
 	 */
 
 	/*
 	 * Now we start construction of the L1 page table
 	 * We start by mapping the L2 page tables into the L1.
 	 * This means that we can replace L1 mappings later on if necessary
 	 */
 	l1pagetable = kernel_l1pt.pv_va;
 
 	/* Map the L2 pages tables in the L1 page table */
 	pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH & ~(0x00100000 - 1),
 	    &kernel_pt_table[KERNEL_PT_SYS]);
 #if 0 /* XXXBJR: What is this?  Don't know if there's an analogue. */
 	pmap_link_l2pt(l1pagetable, IQ80321_IOPXS_VBASE,
 	                &kernel_pt_table[KERNEL_PT_IOPXS]);
 #endif
 	pmap_link_l2pt(l1pagetable, KERNBASE,
 	    &kernel_pt_table[KERNEL_PT_BEFOREKERN]);
 	pmap_map_chunk(l1pagetable, KERNBASE, SDRAM_START, 0x100000,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x100000, SDRAM_START + 0x100000,
 	    0x100000, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE);
 	pmap_map_chunk(l1pagetable, KERNBASE + 0x200000, SDRAM_START + 0x200000,
 	   (((uint32_t)(lastaddr) - KERNBASE - 0x200000) + L1_S_SIZE) & ~(L1_S_SIZE - 1),
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	freemem_after = ((int)lastaddr + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 	afterkern = round_page(((vm_offset_t)lastaddr + L1_S_SIZE) &
 	    ~(L1_S_SIZE - 1));
 	for (i = 0; i < KERNEL_PT_AFKERNEL_NUM; i++) {
 		pmap_link_l2pt(l1pagetable, afterkern + i * 0x00100000,
 		    &kernel_pt_table[KERNEL_PT_AFKERNEL + i]);
 	}
 	pmap_map_entry(l1pagetable, afterkern, minidataclean.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 
 
 	/* Map the Mini-Data cache clean area. */
 	xscale_setup_minidata(l1pagetable, afterkern,
 	    minidataclean.pv_pa);
 
 	/* Map the vector page. */
 	pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa,
 	    VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
 	arm_devmap_bootstrap(l1pagetable, pxa_devmap);
 
 	/*
 	 * Give the XScale global cache clean code an appropriately
 	 * sized chunk of unmapped VA space starting at 0xff000000
 	 * (our device mappings end before this address).
 	 */
 	xscale_cache_clean_addr = 0xff000000U;
 
 	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT);
 	setttb(kernel_l1pt.pv_pa);
 	cpu_tlb_flushID();
 	cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2));
 
 	/*
 	 * Pages were allocated during the secondary bootstrap for the
 	 * stacks for different CPU modes.
 	 * We must now set the r13 registers in the different CPU modes to
 	 * point to these stacks.
 	 * Since the ARM stacks use STMFD etc. we must set r13 to the top end
 	 * of the stack memory.
 	 */
 	set_stackptrs(0);
 
 	/*
 	 * We must now clean the cache again....
 	 * Cleaning may be done by reading new data to displace any
 	 * dirty data in the cache. This will have happened in setttb()
 	 * but since we are boot strapping the addresses used for the read
 	 * may have just been remapped and thus the cache could be out
 	 * of sync. A re-clean after the switch will cure this.
 	 * After booting there are no gross relocations of the kernel thus
 	 * this problem will not occur after initarm().
 	 */
 	cpu_idcache_wbinv_all();
 	cpu_setup();
 
 	/*
 	 * Sort out bus_space for on-board devices.
 	 */
 	pxa_obio_tag_init();
 
 	/*
 	 * Fetch the SDRAM start/size from the PXA2X0 SDRAM configration
 	 * registers.
 	 */
 	pxa_probe_sdram(obio_tag, PXA2X0_MEMCTL_BASE, memstart, memsize);
 
 	/* Fire up consoles. */
 	cninit();
 
 	undefined_init();
 
 	init_proc0(kernelstack.pv_va);
 
 	/* Enable MMU, I-cache, D-cache, write buffer. */
 	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);
 
 	pmap_curmaxkvaddr = afterkern + PAGE_SIZE;
 	vm_max_kernel_address = 0xe0000000;
 	pmap_bootstrap(pmap_curmaxkvaddr, &kernel_l1pt);
 	msgbufp = (void*)msgbufpv.pv_va;
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 
 	/*
 	 * Add the physical ram we have available.
 	 *
 	 * Exclude the kernel (and all the things we allocated which immediately
 	 * follow the kernel) from the VM allocation pool but not from crash
 	 * dumps.  virtual_avail is a global variable which tracks the kva we've
 	 * "allocated" while setting up pmaps.
 	 *
 	 * Prepare the list of physical memory available to the vm subsystem.
 	 */
 	for (j = 0; j < PXA2X0_SDRAM_BANKS; j++) {
 		if (memsize[j] > 0)
 			arm_physmem_hardware_region(memstart[j], memsize[j]);
 	}
 	arm_physmem_exclude_region(freemem_pt, KERNPHYSADDR -
 	    freemem_pt, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(freemempos, KERNPHYSADDR - 0x100000 -
 	    freemempos, EXFLAG_NOALLOC);
 	arm_physmem_exclude_region(abp->abp_physaddr, 
 	    virtual_avail - KERNVIRTADDR, EXFLAG_NOALLOC);
 	arm_physmem_init_kernel_globals();
 
 	init_param2(physmem);
 	kdb_init();
 	return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP -
 	    sizeof(struct pcb)));
 }
 
 static void
 pxa_probe_sdram(bus_space_tag_t bst, bus_space_handle_t bsh,
     uint32_t *memstart, uint32_t *memsize)
 {
 	uint32_t	mdcnfg, dwid, dcac, drac, dnb;
 	int		i;
 
 	mdcnfg = bus_space_read_4(bst, bsh, MEMCTL_MDCNFG);
 
 	/*
 	 * Scan all 4 SDRAM banks
 	 */
 	for (i = 0; i < PXA2X0_SDRAM_BANKS; i++) {
 		memstart[i] = 0;
 		memsize[i] = 0;
 
 		switch (i) {
 		case 0:
 		case 1:
 			if ((i == 0 && (mdcnfg & MDCNFG_DE0) == 0) ||
 			    (i == 1 && (mdcnfg & MDCNFG_DE1) == 0))
 				continue;
 			dwid = mdcnfg >> MDCNFD_DWID01_SHIFT;
 			dcac = mdcnfg >> MDCNFD_DCAC01_SHIFT;
 			drac = mdcnfg >> MDCNFD_DRAC01_SHIFT;
 			dnb = mdcnfg >> MDCNFD_DNB01_SHIFT;
 			break;
 
 		case 2:
 		case 3:
 			if ((i == 2 && (mdcnfg & MDCNFG_DE2) == 0) ||
 			    (i == 3 && (mdcnfg & MDCNFG_DE3) == 0))
 				continue;
 			dwid = mdcnfg >> MDCNFD_DWID23_SHIFT;
 			dcac = mdcnfg >> MDCNFD_DCAC23_SHIFT;
 			drac = mdcnfg >> MDCNFD_DRAC23_SHIFT;
 			dnb = mdcnfg >> MDCNFD_DNB23_SHIFT;
 			break;
 		default:
 			panic("pxa_probe_sdram: impossible");
 		}
 
 		dwid = 2 << (1 - (dwid & MDCNFD_DWID_MASK));  /* 16/32 width */
 		dcac = 1 << ((dcac & MDCNFD_DCAC_MASK) + 8);  /* 8-11 columns */
 		drac = 1 << ((drac & MDCNFD_DRAC_MASK) + 11); /* 11-13 rows */
 		dnb = 2 << (dnb & MDCNFD_DNB_MASK);	      /* # of banks */
 
 		memsize[i] = dwid * dcac * drac * dnb;
 		memstart[i] = PXA2X0_SDRAM0_START +
 		    (i * PXA2X0_SDRAM_BANK_SIZE);
 	}
 }
 
 #define	TIMER_FREQUENCY	3686400
 #define	UNIMPLEMENTED	panic("%s: unimplemented", __func__)
 
 /* XXXBJR: Belongs with DELAY in a timer.c of some sort. */
 void
 cpu_startprofclock(void)
 {
 	UNIMPLEMENTED;
 }
 
 void
 cpu_stopprofclock(void)
 {
 	UNIMPLEMENTED;
 }
 
 static struct arm32_dma_range pxa_range = {
 	.dr_sysbase = 0,
 	.dr_busbase = 0,
 	.dr_len = ~0u,
 };
 
 struct arm32_dma_range *
 bus_dma_get_range(void)
 {
 
 	return (&pxa_range);
 }
 
 int
 bus_dma_get_range_nb(void)
 {
 
 	return (1);
 }
Index: head/sys/arm64/arm64/locore.S
===================================================================
--- head/sys/arm64/arm64/locore.S	(revision 285626)
+++ head/sys/arm64/arm64/locore.S	(revision 285627)
@@ -1,604 +1,603 @@
 /*-
  * Copyright (c) 2012-2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "assym.s"
+#include "opt_kstack_pages.h"
 #include <sys/syscall.h>
 #include <machine/asm.h>
 #include <machine/armreg.h>
 #include <machine/hypervisor.h>
 #include <machine/param.h>
 #include <machine/pte.h>
 
 #define	VIRT_BITS	39
 
 	.globl	kernbase
 	.set	kernbase, KERNBASE
 
 #define	DEVICE_MEM	0
 #define	NORMAL_UNCACHED	1
 #define	NORMAL_MEM	2
 
 /*
  * We assume:
  *  MMU      on with an identity map, or off
  *  D-Cache: off
  *  I-Cache: on or off
  *  We are loaded at a 2MiB aligned address
  */
-
-#define	INIT_STACK_SIZE	(PAGE_SIZE * 4)
 
 	.text
 	.globl _start
 _start:
 	/* Drop to EL1 */
 	bl	drop_to_el1
 
 	/*
 	 * Disable the MMU. We may have entered the kernel with it on and
 	 * will need to update the tables later. If this has been set up
 	 * with anything other than a VA == PA map then this will fail,
 	 * but in this case the code to find where we are running from
 	 * would have also failed.
 	 */
 	dsb	sy
 	mrs	x2, sctlr_el1
 	bic	x2, x2, SCTLR_M
 	msr	sctlr_el1, x2
 	isb
 
 	/* Set the context id */
 	msr	contextidr_el1, xzr
 
 	/* Get the virt -> phys offset */
 	bl	get_virt_delta
 
 	/*
 	 * At this point:
 	 * x29 = PA - VA
 	 * x28 = Our physical load address
 	 */
 
 	/* Create the page tables */
 	bl	create_pagetables
 
 	/*
 	 * At this point:
 	 * x27 = TTBR0 table
 	 * x26 = TTBR1 table
 	 */
 
 	/* Enable the mmu */
 	bl	start_mmu
 
 	/* Jump to the virtual address space */
 	ldr	x15, .Lvirtdone
 	br	x15
 
 virtdone:
 	/* Set up the stack */
 	adr	x25, initstack_end
 	mov	sp, x25
 	sub	sp, sp, #PCB_SIZE
 
 	/* Zero the BSS */
 	ldr	x15, .Lbss
 	ldr	x14, .Lend
 1:
 	str	xzr, [x15], #8
 	cmp	x15, x14
 	b.lo	1b
 
 	/* Backup the module pointer */
 	mov	x1, x0
 
 	/* Make the page table base a virtual address */
 	sub	x26, x26, x29
 
 	sub	sp, sp, #(64 * 4)
 	mov	x0, sp
 
 	/* Degate the delda so it is VA -> PA */
 	neg	x29, x29
 
 	str	x1,  [x0]	/* modulep */
 	str	x26, [x0, 8]	/* kern_l1pt */
 	str	x29, [x0, 16]	/* kern_delta */
 	str	x25, [x0, 24]	/* kern_stack */
 
 	/* trace back starts here */
 	mov	fp, #0
 	/* Branch to C code */
 	bl	initarm
 	bl	mi_startup
 
 	/* We should not get here */
 	brk	0
 
 	.align 3
 .Lvirtdone:
 	.quad	virtdone
 .Lbss:
 	.quad	__bss_start
 .Lend:
 	.quad	_end
 
 #ifdef SMP
 /*
  * mpentry(unsigned long)
  *
  * Called by a core when it is being brought online.
  * The data in x0 is passed straight to init_secondary.
  */
 ENTRY(mpentry)
 	/* Disable interrupts */
 	msr	daifset, #2
 
 	/* Drop to EL1 */
 	bl	drop_to_el1
 
 	/* Set the context id */
 	msr	contextidr_el1, x1
 
 	/* Load the kernel page table */
 	adr	x26, pagetable_l1_ttbr1
 	/* Load the identity page table */
 	adr	x27, pagetable_l1_ttbr0
 
 	/* Enable the mmu */
 	bl	start_mmu
 
 	/* Jump to the virtual address space */
 	ldr	x15, =mp_virtdone
 	br	x15
 
 mp_virtdone:
 	ldr	x4, =secondary_stacks
 	mov	x5, #(PAGE_SIZE * KSTACK_PAGES)
 	sub	x1, x0, #1
 	mul	x5, x1, x5
 	add	sp, x4, x5
 
 	b	init_secondary
 END(mpentry)
 #endif
 
 /*
  * If we are started in EL2, configure the required hypervisor
  * registers and drop to EL1.
  */
 drop_to_el1:
 	mrs	x1, CurrentEL
 	lsr	x1, x1, #2
 	cmp	x1, #0x2
 	b.eq	1f
 	ret
 1:
 	/* Configure the Hypervisor */
 	mov	x2, #(HCR_RW)
 	msr	hcr_el2, x2
 
 	/* Load the Virtualization Process ID Register */
 	mrs	x2, midr_el1
 	msr	vpidr_el2, x2
 
 	/* Load the Virtualization Multiprocess ID Register */
 	mrs	x2, mpidr_el1
 	msr	vmpidr_el2, x2
 
 	/* Set the bits that need to be 1 in sctlr_el1 */
 	ldr	x2, .Lsctlr_res1
 	msr	sctlr_el1, x2
 
 	/* Don't trap to EL2 for exceptions */
 	mov	x2, #CPTR_RES1
 	msr	cptr_el2, x2
 
 	/* Don't trap to EL2 for CP15 traps */
 	msr	hstr_el2, xzr
 
 	/* Hypervisor trap functions */
 	adr	x2, hyp_vectors
 	msr	vbar_el2, x2
 
 	mov	x2, #(PSR_F | PSR_I | PSR_A | PSR_D | PSR_M_EL1h)
 	msr	spsr_el2, x2
 
 	/* Configure GICv3 CPU interface */
 	mrs	x2, id_aa64pfr0_el1
 	/* Extract GIC bits from the register */
 	ubfx	x2, x2, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_BITS
 	/* GIC[3:0] == 0001 - GIC CPU interface via special regs. supported */
 	cmp	x2, #(ID_AA64PFR0_GIC_CPUIF_EN >> ID_AA64PFR0_GIC_SHIFT)
 	b.ne	2f
 
 	mrs	x2, icc_sre_el2
 	orr	x2, x2, #ICC_SRE_EL2_EN	/* Enable access from insecure EL1 */
 	msr	icc_sre_el2, x2
 	isb
 2:
 
 	/* Set the address to return to our return address */
 	msr	elr_el2, x30
 
 	eret
 
 	.align 3
 .Lsctlr_res1:
 	.quad SCTLR_RES1
 
 #define	VECT_EMPTY	\
 	.align 7;	\
 	1:	b	1b
 
 	.align 11
 hyp_vectors:
 	VECT_EMPTY	/* Synchronous EL2t */
 	VECT_EMPTY	/* IRQ EL2t */
 	VECT_EMPTY	/* FIQ EL2t */
 	VECT_EMPTY	/* Error EL2t */
 
 	VECT_EMPTY	/* Synchronous EL2h */
 	VECT_EMPTY	/* IRQ EL2h */
 	VECT_EMPTY	/* FIQ EL2h */
 	VECT_EMPTY	/* Error EL2h */
 
 	VECT_EMPTY	/* Synchronous 64-bit EL1 */
 	VECT_EMPTY	/* IRQ 64-bit EL1 */
 	VECT_EMPTY	/* FIQ 64-bit EL1 */
 	VECT_EMPTY	/* Error 64-bit EL1 */
 
 	VECT_EMPTY	/* Synchronous 32-bit EL1 */
 	VECT_EMPTY	/* IRQ 32-bit EL1 */
 	VECT_EMPTY	/* FIQ 32-bit EL1 */
 	VECT_EMPTY	/* Error 32-bit EL1 */
 
 /*
  * Get the delta between the physical address we were loaded to and the
  * virtual address we expect to run from. This is used when building the
  * initial page table.
  */
 get_virt_delta:
 	/* Load the physical address of virt_map */
 	adr	x29, virt_map
 	/* Load the virtual address of virt_map stored in virt_map */
 	ldr	x28, [x29]
 	/* Find PA - VA as PA' = VA' - VA + PA = VA' + (PA - VA) = VA' + x29 */
 	sub	x29, x29, x28
 	/* Find the load address for the kernel */
 	mov	x28, #(KERNBASE)
 	add	x28, x28, x29
 	ret
 
 	.align 3
 virt_map:
 	.quad	virt_map
 
 /*
  * This builds the page tables containing the identity map, and the kernel
  * virtual map.
  *
  * It relys on:
  *  We were loaded to an address that is on a 2MiB boundary
  *  All the memory must not cross a 1GiB boundaty
  *  x28 contains the physical address we were loaded from
  *
  * TODO: This is out of date.
  *  There are at least 5 pages before that address for the page tables
  *   The pages used are:
  *    - The identity (PA = VA) table (TTBR0)
  *    - The Kernel L1 table          (TTBR1)(not yet)
  *    -  The PA != VA L2 table to jump into (not yet)
  *    -  The FDT L2 table                   (not yet)
  */
 create_pagetables:
 	/* Save the Link register */
 	mov	x5, x30
 
 	/* Clean the page table */
 	adr	x6, pagetable
 	mov	x26, x6
 	adr	x27, pagetable_end
 1:
 	stp	xzr, xzr, [x6], #16
 	stp	xzr, xzr, [x6], #16
 	stp	xzr, xzr, [x6], #16
 	stp	xzr, xzr, [x6], #16
 	cmp	x6, x27
 	b.lo	1b
 
 	/*
 	 * Build the TTBR1 maps.
 	 */
 
 	/* Find the size of the kernel */
 	mov	x6, #(KERNBASE)
 	ldr	x7, .Lend
 	/* Find the end - begin */
 	sub	x8, x7, x6
 	/* Get the number of l2 pages to allocate, rounded down */
 	lsr	x10, x8, #(L2_SHIFT)
 	/* Add 4 MiB for any rounding above and the module data */
 	add	x10, x10, #2
 
 	/* Create the kernel space L2 table */
 	mov	x6, x26
 	mov	x7, #NORMAL_MEM
 	mov	x8, #(KERNBASE & L2_BLOCK_MASK)
 	mov	x9, x28
 	bl	build_block_pagetable
 
 	/* Move to the l1 table */
 	add	x26, x26, #PAGE_SIZE
 
 	/* Link the l1 -> l2 table */
 	mov	x9, x6
 	mov	x6, x26
 	bl	link_l1_pagetable
 
 
 	/*
 	 * Build the TTBR0 maps.
 	 */
 	add	x27, x26, #PAGE_SIZE
 
 #if defined(SOCDEV_PA) && defined(SOCDEV_VA)
 	/* Create a table for the UART */
 	mov	x6, x27		/* The initial page table */
 	mov	x7, #DEVICE_MEM
 	mov	x8, #(SOCDEV_VA)	/* VA start */
 	mov	x9, #(SOCDEV_PA)	/* PA start */
 	bl	build_section_pagetable
 #endif
 
 	/* Create the VA = PA map */
 	mov	x6, x27		/* The initial page table */
 	mov	x7, #NORMAL_UNCACHED /* Uncached as it's only needed early on */
 	mov	x9, x27
 	mov	x8, x9		/* VA start (== PA start) */
 	bl	build_section_pagetable
 
 	/* Restore the Link register */
 	mov	x30, x5
 	ret
 
 /*
  * Builds a 1 GiB page table entry
  *  x6 = L1 table
  *  x7 = Type (0 = Device, 1 = Normal)
  *  x8 = VA start
  *  x9 = PA start (trashed)
  *  x11, x12 and x13 are trashed
  */
 build_section_pagetable:
 	/*
 	 * Build the L1 table entry.
 	 */
 	/* Find the table index */
 	lsr	x11, x8, #L1_SHIFT
 	and	x11, x11, #Ln_ADDR_MASK
 
 	/* Build the L1 block entry */
 	lsl	x12, x7, #2
 	orr	x12, x12, #L1_BLOCK
 	orr	x12, x12, #(ATTR_AF)
 
 	/* Only use the output address bits */
 	lsr	x9, x9, #L1_SHIFT
 	orr	x12, x12, x9, lsl #L1_SHIFT
 
 	/* Store the entry */
 	str	x12, [x6, x11, lsl #3]
 
 	ret
 
 /*
  * Builds an L1 -> L2 table descriptor
  *
  * This is a link for a 1GiB block of memory with up to 2MiB regions mapped
  * within it by build_block_pagetable.
  *
  *  x6  = L1 table
  *  x8  = Virtual Address
  *  x9  = L2 PA (trashed)
  *  x11, x12 and x13 are trashed
  */
 link_l1_pagetable:
 	/*
 	 * Link an L1 -> L2 table entry.
 	 */
 	/* Find the table index */
 	lsr	x11, x8, #L1_SHIFT
 	and	x11, x11, #Ln_ADDR_MASK
 
 	/* Build the L1 block entry */
 	mov	x12, #L1_TABLE
 
 	/* Only use the output address bits */
 	lsr	x9, x9, #12
 	orr	x12, x12, x9, lsl #12
 
 	/* Store the entry */
 	str	x12, [x6, x11, lsl #3]
 
 	ret
 
 /*
  * Builds count 2 MiB page table entry
  *  x6  = L2 table
  *  x7  = Type (0 = Device, 1 = Normal)
  *  x8  = VA start
  *  x9  = PA start (trashed)
  *  x10 = Entry count (TODO)
  *  x11, x12 and x13 are trashed
  */
 build_block_pagetable:
 	/*
 	 * Build the L2 table entry.
 	 */
 	/* Find the table index */
 	lsr	x11, x8, #L2_SHIFT
 	and	x11, x11, #Ln_ADDR_MASK
 
 	/* Build the L2 block entry */
 	lsl	x12, x7, #2
 	orr	x12, x12, #L2_BLOCK
 	orr	x12, x12, #(ATTR_AF)
 #ifdef SMP
 	orr	x12, x12, ATTR_SH(ATTR_SH_IS)
 #endif
 
 	/* Only use the output address bits */
 	lsr	x9, x9, #L2_SHIFT
 
 	/* Set the physical address for this virtual address */
 1:	orr	x12, x12, x9, lsl #L2_SHIFT
 
 	/* Store the entry */
 	str	x12, [x6, x11, lsl #3]
 
 	/* Clear the address bits */
 	and	x12, x12, #ATTR_MASK_L
 
 	sub	x10, x10, #1
 	add	x11, x11, #1
 	add	x9, x9, #1
 	cbnz	x10, 1b
 
 2:	ret
 
 start_mmu:
 	dsb	sy
 
 	/* Load the exception vectors */
 	ldr	x2, =exception_vectors
 	msr	vbar_el1, x2
 
 	/* Load ttbr0 and ttbr1 */
 	msr	ttbr0_el1, x27
 	msr	ttbr1_el1, x26
 	isb
 
 	/* Clear the Monitor Debug System control register */
 	msr	mdscr_el1, xzr
 
 	/* Invalidate the TLB */
 	tlbi	vmalle1is
 
 	ldr	x2, mair
 	msr	mair_el1, x2
 
 	/* Setup TCR according to PARange bits from ID_AA64MMFR0_EL1 */
 	ldr	x2, tcr
 	mrs	x3, id_aa64mmfr0_el1
 	bfi	x2, x3, #32, #3
 	msr	tcr_el1, x2
 
 	/* Setup SCTLR */
 	ldr	x2, sctlr_set
 	ldr	x3, sctlr_clear
 	mrs	x1, sctlr_el1
 	bic	x1, x1, x3	/* Clear the required bits */
 	orr	x1, x1, x2	/* Set the required bits */
 	msr	sctlr_el1, x1
 	isb
 
 	ret
 
 	.align 3
 mair:
 		/* Device            Normal, no cache     Normal, write-back */
 	.quad	MAIR_ATTR(0x00, 0) | MAIR_ATTR(0x44, 1) | MAIR_ATTR(0xff, 2)
 tcr:
 	.quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \
 	    TCR_CACHE_ATTRS | TCR_SMP_ATTRS)
 sctlr_set:
 	/* Bits to set */
 	.quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \
 	    SCTLR_I | SCTLR_SED | SCTLR_C | SCTLR_M)
 sctlr_clear:
 	/* Bits to clear */
 	.quad (SCTLR_EE | SCTLR_EOE | SCTLR_WXN | SCTLR_UMA | SCTLR_ITD | \
 	    SCTLR_THEE | SCTLR_CP15BEN | SCTLR_SA0 | SCTLR_SA | SCTLR_A)
 
 	.globl abort
 abort:
 	b abort
 
 	//.section .init_pagetable
 	.align 12 /* 4KiB aligned */
 	/*
 	 * 3 initial tables (in the following order):
 	 *           L2 for kernel (High addresses)
 	 *           L1 for kernel
 	 *           L1 for user   (Low addresses)
 	 */
 pagetable:
 	.space	PAGE_SIZE
 pagetable_l1_ttbr1:
 	.space	PAGE_SIZE
 pagetable_l1_ttbr0:
 	.space	PAGE_SIZE
 pagetable_end:
 
 el2_pagetable:
 	.space	PAGE_SIZE
 
 	.globl init_pt_va
 init_pt_va:
 	.quad pagetable		/* XXX: Keep page tables VA */
 
 	.align	4
 initstack:
 	.space	(PAGE_SIZE * KSTACK_PAGES)
 initstack_end:
 
 
 ENTRY(sigcode)
 	mov	x0, sp
 	add	x0, x0, #SF_UC
 
 1:
 	mov	x8, #SYS_sigreturn
 	svc	0
 
 	/* sigreturn failed, exit */
 	mov	x8, #SYS_exit
 	svc	0
 
 	b	1b
 END(sigcode)
 	/* This may be copied to the stack, keep it 16-byte aligned */
 	.align	3
 esigcode:
 
 	.data
 	.align	3
 	.global	szsigcode
 szsigcode:
 	.quad	esigcode - sigcode
Index: head/sys/arm64/arm64/mp_machdep.c
===================================================================
--- head/sys/arm64/arm64/mp_machdep.c	(revision 285626)
+++ head/sys/arm64/arm64/mp_machdep.c	(revision 285627)
@@ -1,440 +1,441 @@
 /*-
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
+#include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/intr.h>
 #include <machine/smp.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_cpu.h>
 #endif
 
 #include <dev/psci/psci.h>
 
 boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *);
 
 extern struct pcpu __pcpu[];
 
 static enum {
 	CPUS_UNKNOWN,
 #ifdef FDT
 	CPUS_FDT,
 #endif
 } cpu_enum_method;
 
 static device_identify_t arm64_cpu_identify;
 static device_probe_t arm64_cpu_probe;
 static device_attach_t arm64_cpu_attach;
 
 static int ipi_handler(void *arg);
 
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 #ifdef INVARIANTS
 static uint32_t cpu_reg[MAXCPU][2];
 #endif
 static device_t cpu_list[MAXCPU];
 
 void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
 uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
 
 /* # of Applications processors */
 volatile int mp_naps;
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 static device_method_t arm64_cpu_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	arm64_cpu_identify),
 	DEVMETHOD(device_probe,		arm64_cpu_probe),
 	DEVMETHOD(device_attach,	arm64_cpu_attach),
 
 	DEVMETHOD_END
 };
 
 static devclass_t arm64_cpu_devclass;
 static driver_t arm64_cpu_driver = {
 	"arm64_cpu",
 	arm64_cpu_methods,
 	0
 };
 
 DRIVER_MODULE(arm64_cpu, cpu, arm64_cpu_driver, arm64_cpu_devclass, 0, 0);
 
 static void
 arm64_cpu_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "arm64_cpu", -1) != NULL)
 		return;
 	if (BUS_ADD_CHILD(parent, 0, "arm64_cpu", -1) == NULL)
 		device_printf(parent, "add child failed\n");
 }
 
 static int
 arm64_cpu_probe(device_t dev)
 {
 	u_int cpuid;
 
 	cpuid = device_get_unit(dev);
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 
 	return (0);
 }
 
 static int
 arm64_cpu_attach(device_t dev)
 {
 	const uint32_t *reg;
 	size_t reg_size;
 	u_int cpuid;
 	int i;
 
 	cpuid = device_get_unit(dev);
 
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 	KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid));
 
 	reg = cpu_get_cpuid(dev, &reg_size);
 	if (reg == NULL)
 		return (EINVAL);
 
 	device_printf(dev, "Found register:");
 	for (i = 0; i < reg_size; i++)
 		printf(" %x", reg[i]);
 	printf("\n");
 
 	/* Set the device to start it later */
 	cpu_list[cpuid] = dev;
 
 	return (0);
 }
 
 static void
 release_aps(void *dummy __unused)
 {
 	int i;
 
 	/* Setup the IPI handler */
 	for (i = 0; i < COUNT_IPI; i++)
 		arm_setup_ipihandler(ipi_handler, i);
 
 	atomic_store_rel_int(&aps_ready, 1);
 	/* Wake up the other CPUs */
 	__asm __volatile("sev");
 
 	printf("Release APs\n");
 
 	for (i = 0; i < 2000; i++) {
 		if (smp_started)
 			return;
 		DELAY(1000);
 	}
 
 	printf("AP's not started\n");
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 void
 init_secondary(uint64_t cpu)
 {
 	struct pcpu *pcpup;
 	int i;
 
 	pcpup = &__pcpu[cpu];
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	/*
 	 * pcpu_init() updates queue, so it should not be executed in parallel
 	 * on several cores
 	 */
 	while(mp_naps < (cpu - 1))
 		;
 
 	/* Signal our startup to BSP */
 	atomic_add_rel_32(&mp_naps, 1);
 
 	/* Spin until the BSP releases the APs */
 	while (!aps_ready)
 		__asm __volatile("wfe");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
 	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/*
 	 * Identify current CPU. This is necessary to setup
 	 * affinity registers and to provide support for
 	 * runtime chip identification.
 	 */
 	identify_cpu();
 
 	/* Configure the interrupt controller */
 	arm_init_secondary();
 
 	for (i = 0; i < COUNT_IPI; i++)
 		arm_unmask_ipi(i);
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 #ifdef VFP
 	vfp_init();
 #endif
 
 	/* Enable interrupts */
 	intr_enable();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
 }
 
 static int
 ipi_handler(void *arg)
 {
 	u_int cpu, ipi;
 
 	arg = (void *)((uintptr_t)arg & ~(1 << 16));
 	KASSERT((uintptr_t)arg < COUNT_IPI,
 	    ("Invalid IPI %ju", (uintptr_t)arg));
 
 	cpu = PCPU_GET(cpuid);
 	ipi = (uintptr_t)arg;
 
 	switch(ipi) {
 	case IPI_AST:
 		CTR0(KTR_SMP, "IPI_AST");
 		break;
 	case IPI_PREEMPT:
 		CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 		sched_preempt(curthread);
 		break;
 	case IPI_RENDEZVOUS:
 		CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 		smp_rendezvous_action();
 		break;
 	case IPI_STOP:
 	case IPI_STOP_HARD:
 		CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD");
 		savectx(&stoppcbs[cpu]);
 
 		/* Indicate we are stopped */
 		CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 		/* Wait for restart */
 		while (!CPU_ISSET(cpu, &started_cpus))
 			cpu_spinwait();
 
 		CPU_CLR_ATOMIC(cpu, &started_cpus);
 		CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 		CTR0(KTR_SMP, "IPI_STOP (restart)");
 		break;
 	case IPI_HARDCLOCK:
 		CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 		hardclockintr();
 		break;
 	default:
 		panic("Unknown IPI %#0x on cpu %d", ipi, curcpu);
 	}
 
 	return (FILTER_HANDLED);
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	/* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
 	return (1);
 }
 
 #ifdef FDT
 static boolean_t
 cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t target_cpu;
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
 	int err;
 
 	/* Check we are able to start this cpu */
 	if (id > mp_maxid)
 		return (0);
 
 	KASSERT(id < MAXCPU, ("Too mant CPUs"));
 
 	KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size"));
 #ifdef INVARIANTS
 	cpu_reg[id][0] = reg[0];
 	if (addr_size == 2)
 		cpu_reg[id][1] = reg[1];
 #endif
 
 	/* We are already running on cpu 0 */
 	if (id == 0)
 		return (1);
 
 	CPU_SET(id, &all_cpus);
 
 	pcpup = &__pcpu[id];
 	pcpu_init(pcpup, id, sizeof(struct pcpu));
 
 	dpcpu[id - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 	    M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[id - 1], id);
 
 	target_cpu = reg[0];
 	if (addr_size == 2) {
 		target_cpu <<= 32;
 		target_cpu |= reg[1];
 	}
 
 	printf("Starting CPU %u (%lx)\n", id, target_cpu);
 	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
 
 	err = psci_cpu_on(target_cpu, pa, id);
 	if (err != PSCI_RETVAL_SUCCESS)
 		printf("Failed to start CPU %u\n", id);
 
 	return (1);
 }
 #endif
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	CPU_SET(0, &all_cpus);
 
 	switch(cpu_enum_method) {
 #ifdef FDT
 	case CPUS_FDT:
 		ofw_cpu_early_foreach(cpu_init_fdt, true);
 		break;
 #endif
 	case CPUS_UNKNOWN:
 		break;
 	}
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 #ifdef FDT
 	int cores;
 
 	cores = ofw_cpu_early_foreach(NULL, false);
 	if (cores > 0) {
 		cores = MIN(cores, MAXCPU);
 		if (bootverbose)
 			printf("Found %d CPUs in the device tree\n", cores);
 		mp_ncpus = cores;
 		mp_maxid = cores - 1;
 		cpu_enum_method = CPUS_FDT;
 		return;
 	}
 #endif
 
 	if (bootverbose)
 		printf("No CPU data, limiting to 1 core\n");
 	mp_ncpus = 1;
 	mp_maxid = 0;
 }
Index: head/sys/arm64/arm64/swtch.S
===================================================================
--- head/sys/arm64/arm64/swtch.S	(revision 285626)
+++ head/sys/arm64/arm64/swtch.S	(revision 285627)
@@ -1,277 +1,278 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under sponsorship from
  * the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include "assym.s"
+#include "opt_kstack_pages.h"
 #include "opt_sched.h"
 
 #include <machine/asm.h>
 
 __FBSDID("$FreeBSD$");
 
 /*
  * void cpu_throw(struct thread *old, struct thread *new)
  */
 ENTRY(cpu_throw)
 #ifdef VFP
 	/* Backup the new thread pointer around a call to C code */
 	mov	x19, x1
 	bl	vfp_discard
 	mov	x1, x19
 #endif
 
 	/* Store the new curthread */
 	str	x1, [x18, #PC_CURTHREAD]
 	/* And the new pcb */
 	ldr	x4, [x1, #TD_PCB]
 	str	x4, [x18, #PC_CURPCB]
 
 	/*
 	 * TODO: We may need to flush the cache here.
 	 */
 
 	/* Switch to the new pmap */
 	ldr	x5, [x4, #PCB_L1ADDR]
 	msr	ttbr0_el1, x5
 	isb
 
 	/* Invalidate the TLB */
 	dsb	sy
 	tlbi	vmalle1is
 	dsb	sy
 	isb
 
 	/* Restore the registers */
 	ldp	x5, x6, [x4, #PCB_SP]
 	mov	sp, x5
 	msr	tpidr_el0, x6
 	ldp	x8, x9, [x4, #PCB_REGS + 8 * 8]
 	ldp	x10, x11, [x4, #PCB_REGS + 10 * 8]
 	ldp	x12, x13, [x4, #PCB_REGS + 12 * 8]
 	ldp	x14, x15, [x4, #PCB_REGS + 14 * 8]
 	ldp	x16, x17, [x4, #PCB_REGS + 16 * 8]
 	ldr	     x19, [x4, #PCB_REGS + 19 * 8]
 	ldp	x20, x21, [x4, #PCB_REGS + 20 * 8]
 	ldp	x22, x23, [x4, #PCB_REGS + 22 * 8]
 	ldp	x24, x25, [x4, #PCB_REGS + 24 * 8]
 	ldp	x26, x27, [x4, #PCB_REGS + 26 * 8]
 	ldp	x28, x29, [x4, #PCB_REGS + 28 * 8]
 	ldr	x30, [x4, #PCB_REGS + 30 * 8]
 
 	ret
 END(cpu_throw)
 
 /*
  * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
  *
  * x0 = old
  * x1 = new
  * x2 = mtx
  * x3 to x7, x16 and x17 are caller saved
  */
 ENTRY(cpu_switch)
 	/* Store the new curthread */
 	str	x1, [x18, #PC_CURTHREAD]
 	/* And the new pcb */
 	ldr	x4, [x1, #TD_PCB]
 	str	x4, [x18, #PC_CURPCB]
 
 	/*
 	 * Save the old context.
 	 */
 	ldr	x4, [x0, #TD_PCB]
 
 	/* Store the callee-saved registers */
 	stp	x8, x9, [x4, #PCB_REGS + 8 * 8]
 	stp	x10, x11, [x4, #PCB_REGS + 10 * 8]
 	stp	x12, x13, [x4, #PCB_REGS + 12 * 8]
 	stp	x14, x15, [x4, #PCB_REGS + 14 * 8]
 	stp	x16, x17, [x4, #PCB_REGS + 16 * 8]
 	stp	x18, x19, [x4, #PCB_REGS + 18 * 8]
 	stp	x20, x21, [x4, #PCB_REGS + 20 * 8]
 	stp	x22, x23, [x4, #PCB_REGS + 22 * 8]
 	stp	x24, x25, [x4, #PCB_REGS + 24 * 8]
 	stp	x26, x27, [x4, #PCB_REGS + 26 * 8]
 	stp	x28, x29, [x4, #PCB_REGS + 28 * 8]
 	str	x30, [x4, #PCB_REGS + 30 * 8]
 	/* And the old stack pointer */
 	mov	x5, sp
 	mrs	x6, tpidr_el0
 	stp	x5, x6, [x4, #PCB_SP]
 
 #ifdef VFP
 	mov	x19, x0
 	mov	x20, x1
 	mov	x21, x2
 	bl	vfp_save_state
 	mov	x2, x21
 	mov	x1, x20
 	mov	x0, x19
 #endif
 
 	/*
 	 * Restore the saved context.
 	 */
 	ldr	x4, [x1, #TD_PCB]
 
 	/*
 	 * TODO: We may need to flush the cache here if switching
 	 * to a user process.
 	 */
 
 	/* Switch to the new pmap */
 	ldr	x5, [x4, #PCB_L1ADDR]
 	msr	ttbr0_el1, x5
 	isb
 
 	/* Invalidate the TLB */
 	dsb	sy
 	tlbi	vmalle1is
 	dsb	sy
 	isb
 
 	/* Release the old thread */
 	str	x2, [x0, #TD_LOCK]
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Read the value in blocked_lock */
 	ldr	x0, =_C_LABEL(blocked_lock)
 	ldr	x1, [x0]
 	/* Load curthread */
 	ldr	x2, [x18, #PC_CURTHREAD]
 1:
 	ldr	x3, [x2, #TD_LOCK]
 	cmp	x3, x1
 	b.eq	1b
 #endif
 
 	/* Restore the registers */
 	ldp	x5, x6, [x4, #PCB_SP]
 	mov	sp, x5
 	msr	tpidr_el0, x6
 	ldp	x8, x9, [x4, #PCB_REGS + 8 * 8]
 	ldp	x10, x11, [x4, #PCB_REGS + 10 * 8]
 	ldp	x12, x13, [x4, #PCB_REGS + 12 * 8]
 	ldp	x14, x15, [x4, #PCB_REGS + 14 * 8]
 	ldp	x16, x17, [x4, #PCB_REGS + 16 * 8]
 	ldr	     x19, [x4, #PCB_REGS + 19 * 8]
 	ldp	x20, x21, [x4, #PCB_REGS + 20 * 8]
 	ldp	x22, x23, [x4, #PCB_REGS + 22 * 8]
 	ldp	x24, x25, [x4, #PCB_REGS + 24 * 8]
 	ldp	x26, x27, [x4, #PCB_REGS + 26 * 8]
 	ldp	x28, x29, [x4, #PCB_REGS + 28 * 8]
 	ldr	x30, [x4, #PCB_REGS + 30 * 8]
 
 	str	xzr, [x4, #PCB_REGS + 18 * 8]
 	ret
 .Lcpu_switch_panic_str:
 	.asciz "cpu_switch: %p\0"
 END(cpu_switch)
 
 ENTRY(fork_trampoline)
 	mov	x0, x8
 	mov	x1, x9
 	mov	x2, sp
 	mov	fp, #0	/* Stack traceback stops here. */
 	bl	_C_LABEL(fork_exit)
 
 	/* Restore sp and lr */
 	ldp	x0, x1, [sp]
 	msr	sp_el0, x0
 	mov	lr, x1
 
 	/* Restore the registers other than x0 and x1 */
 	ldp	x2, x3, [sp, #TF_X + 2 * 8]
 	ldp	x4, x5, [sp, #TF_X + 4 * 8]
 	ldp	x6, x7, [sp, #TF_X + 6 * 8]
 	ldp	x8, x9, [sp, #TF_X + 8 * 8]
 	ldp	x10, x11, [sp, #TF_X + 10 * 8]
 	ldp	x12, x13, [sp, #TF_X + 12 * 8]
 	ldp	x14, x15, [sp, #TF_X + 14 * 8]
 	ldp	x16, x17, [sp, #TF_X + 16 * 8]
 	ldr	     x19, [sp, #TF_X + 19 * 8]
 	ldp	x20, x21, [sp, #TF_X + 20 * 8]
 	ldp	x22, x23, [sp, #TF_X + 22 * 8]
 	ldp	x24, x25, [sp, #TF_X + 24 * 8]
 	ldp	x26, x27, [sp, #TF_X + 26 * 8]
 	ldp	x28, x29, [sp, #TF_X + 28 * 8]
 	/* Skip x30 as it was restored above as lr */
 
 	/*
 	 * Disable interrupts to avoid
 	 * overwriting spsr_el1 by an IRQ exception.
 	 */
 	msr	daifset, #2
 
 	/* Restore elr and spsr */
 	ldp	x0, x1, [sp, #16]
 	msr	elr_el1, x0
 	msr	spsr_el1, x1
 
 	/* Finally x0 and x1 */
 	ldp	x0, x1, [sp, #TF_X + 0 * 8]
 	ldr	x18, [sp, #TF_X + 18 * 8]
 
 	/*
 	 * No need for interrupts reenabling since PSR
 	 * will be set to the desired value anyway.
 	 */
 	eret
 	
 END(fork_trampoline)
 
 ENTRY(savectx)
 	/* Store the callee-saved registers */
 	stp	x8,  x9,  [x0, #PCB_REGS + 8 * 8]
 	stp	x10, x11, [x0, #PCB_REGS + 10 * 8]
 	stp	x12, x13, [x0, #PCB_REGS + 12 * 8]
 	stp	x14, x15, [x0, #PCB_REGS + 14 * 8]
 	stp	x16, x17, [x0, #PCB_REGS + 16 * 8]
 	stp	x18, x19, [x0, #PCB_REGS + 18 * 8]
 	stp	x20, x21, [x0, #PCB_REGS + 20 * 8]
 	stp	x22, x23, [x0, #PCB_REGS + 22 * 8]
 	stp	x24, x25, [x0, #PCB_REGS + 24 * 8]
 	stp	x26, x27, [x0, #PCB_REGS + 26 * 8]
 	stp	x28, x29, [x0, #PCB_REGS + 28 * 8]
 	str	x30, [x0, #PCB_REGS + 30 * 8]
 	/* And the old stack pointer */
 	mov	x5, sp
 	mrs	x6, tpidr_el0
 	stp	x5, x6, [x0, #PCB_SP]
 
 	/* Store the VFP registers */
 #ifdef VFP
 	mov	x29, lr
 	bl	vfp_save_state
 	mov	lr, x29
 #endif
 
 	ret
 END(savectx)
 
Index: head/sys/ddb/db_ps.c
===================================================================
--- head/sys/ddb/db_ps.c	(revision 285626)
+++ head/sys/ddb/db_ps.c	(revision 285627)
@@ -1,468 +1,470 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #include <sys/param.h>
 #include <sys/cons.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/_kstack_cache.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <ddb/ddb.h>
 
 static void	dumpthread(volatile struct proc *p, volatile struct thread *td,
 		    int all);
 /*
  * At least one non-optional show-command must be implemented using
  * DB_SHOW_ALL_COMMAND() so that db_show_all_cmd_set gets created.
  * Here is one.
  */
 DB_SHOW_ALL_COMMAND(procs, db_procs_cmd)
 {
 	db_ps(addr, have_addr, count, modif);
 }
 
 /*
  * Layout:
  * - column counts
  * - header
  * - single-threaded process
  * - multi-threaded process
  * - thread in a MT process
  *
  *          1         2         3         4         5         6         7
  * 1234567890123456789012345678901234567890123456789012345678901234567890
  *   pid  ppid  pgrp   uid   state   wmesg     wchan    cmd
  * <pid> <ppi> <pgi> <uid>  <stat> < wmesg > < wchan  > <name>
  * <pid> <ppi> <pgi> <uid>  <stat>  (threaded)          <command>
  * <tid >                   <stat> < wmesg > < wchan  > <name>
  *
  * For machines with 64-bit pointers, we expand the wchan field 8 more
  * characters.
  */
 void
 db_ps(db_expr_t addr, bool hasaddr, db_expr_t count, char *modif)
 {
 	volatile struct proc *p, *pp;
 	volatile struct thread *td;
 	struct ucred *cred;
 	struct pgrp *pgrp;
 	char state[9];
 	int np, rflag, sflag, dflag, lflag, wflag;
 
 	np = nprocs;
 
 	if (!LIST_EMPTY(&allproc))
 		p = LIST_FIRST(&allproc);
 	else
 		p = &proc0;
 
 #ifdef __LP64__
 	db_printf("  pid  ppid  pgrp   uid   state   wmesg         wchan        cmd\n");
 #else
 	db_printf("  pid  ppid  pgrp   uid   state   wmesg     wchan    cmd\n");
 #endif
 	while (--np >= 0 && !db_pager_quit) {
 		if (p == NULL) {
 			db_printf("oops, ran out of processes early!\n");
 			break;
 		}
 		pp = p->p_pptr;
 		if (pp == NULL)
 			pp = p;
 
 		cred = p->p_ucred;
 		pgrp = p->p_pgrp;
 		db_printf("%5d %5d %5d %5d ", p->p_pid, pp->p_pid,
 		    pgrp != NULL ? pgrp->pg_id : 0,
 		    cred != NULL ? cred->cr_ruid : 0);
 
 		/* Determine our primary process state. */
 		switch (p->p_state) {
 		case PRS_NORMAL:
 			if (P_SHOULDSTOP(p))
 				state[0] = 'T';
 			else {
 				/*
 				 * One of D, L, R, S, W.  For a
 				 * multithreaded process we will use
 				 * the state of the thread with the
 				 * highest precedence.  The
 				 * precendence order from high to low
 				 * is R, L, D, S, W.  If no thread is
 				 * in a sane state we use '?' for our
 				 * primary state.
 				 */
 				rflag = sflag = dflag = lflag = wflag = 0;
 				FOREACH_THREAD_IN_PROC(p, td) {
 					if (td->td_state == TDS_RUNNING ||
 					    td->td_state == TDS_RUNQ ||
 					    td->td_state == TDS_CAN_RUN)
 						rflag++;
 					if (TD_ON_LOCK(td))
 						lflag++;
 					if (TD_IS_SLEEPING(td)) {
 						if (!(td->td_flags & TDF_SINTR))
 							dflag++;
 						else
 							sflag++;
 					}
 					if (TD_AWAITING_INTR(td))
 						wflag++;
 				}
 				if (rflag)
 					state[0] = 'R';
 				else if (lflag)
 					state[0] = 'L';
 				else if (dflag)
 					state[0] = 'D';
 				else if (sflag)
 					state[0] = 'S';
 				else if (wflag)
 					state[0] = 'W';
 				else
 					state[0] = '?';
 			}
 			break;
 		case PRS_NEW:
 			state[0] = 'N';
 			break;
 		case PRS_ZOMBIE:
 			state[0] = 'Z';
 			break;
 		default:
 			state[0] = 'U';
 			break;
 		}
 		state[1] = '\0';
 
 		/* Additional process state flags. */
 		if (!(p->p_flag & P_INMEM))
 			strlcat(state, "W", sizeof(state));
 		if (p->p_flag & P_TRACED)
 			strlcat(state, "X", sizeof(state));
 		if (p->p_flag & P_WEXIT && p->p_state != PRS_ZOMBIE)
 			strlcat(state, "E", sizeof(state));
 		if (p->p_flag & P_PPWAIT)
 			strlcat(state, "V", sizeof(state));
 		if (p->p_flag & P_SYSTEM || p->p_lock > 0)
 			strlcat(state, "L", sizeof(state));
 		if (p->p_session != NULL && SESS_LEADER(p))
 			strlcat(state, "s", sizeof(state));
 		/* Cheated here and didn't compare pgid's. */
 		if (p->p_flag & P_CONTROLT)
 			strlcat(state, "+", sizeof(state));
 		if (cred != NULL && jailed(cred))
 			strlcat(state, "J", sizeof(state));
 		db_printf(" %-6.6s ", state);
 		if (p->p_flag & P_HADTHREADS) {
 #ifdef __LP64__
 			db_printf(" (threaded)                  ");
 #else
 			db_printf(" (threaded)          ");
 #endif
 			if (p->p_flag & P_SYSTEM)
 				db_printf("[");
 			db_printf("%s", p->p_comm);
 			if (p->p_flag & P_SYSTEM)
 				db_printf("]");
 			db_printf("\n");
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			dumpthread(p, td, p->p_flag & P_HADTHREADS);
 			if (db_pager_quit)
 				break;
 		}
 
 		p = LIST_NEXT(p, p_list);
 		if (p == NULL && np > 0)
 			p = LIST_FIRST(&zombproc);
 	}
 }
 
 static void
 dumpthread(volatile struct proc *p, volatile struct thread *td, int all)
 {
 	char state[9], wprefix;
 	const char *wmesg;
 	void *wchan;
 	
 	if (all) {
 		db_printf("%6d                  ", td->td_tid);
 		switch (td->td_state) {
 		case TDS_RUNNING:
 			snprintf(state, sizeof(state), "Run");
 			break;
 		case TDS_RUNQ:
 			snprintf(state, sizeof(state), "RunQ");
 			break;
 		case TDS_CAN_RUN:
 			snprintf(state, sizeof(state), "CanRun");
 			break;
 		case TDS_INACTIVE:
 			snprintf(state, sizeof(state), "Inactv");
 			break;
 		case TDS_INHIBITED:
 			state[0] = '\0';
 			if (TD_ON_LOCK(td))
 				strlcat(state, "L", sizeof(state));
 			if (TD_IS_SLEEPING(td)) {
 				if (td->td_flags & TDF_SINTR)
 					strlcat(state, "S", sizeof(state));
 				else
 					strlcat(state, "D", sizeof(state));
 			}
 			if (TD_IS_SWAPPED(td))
 				strlcat(state, "W", sizeof(state));
 			if (TD_AWAITING_INTR(td))
 				strlcat(state, "I", sizeof(state));
 			if (TD_IS_SUSPENDED(td))
 				strlcat(state, "s", sizeof(state));
 			if (state[0] != '\0')
 				break;
 		default:
 			snprintf(state, sizeof(state), "???");
 		}			
 		db_printf(" %-6.6s ", state);
 	}
 	wprefix = ' ';
 	if (TD_ON_LOCK(td)) {
 		wprefix = '*';
 		wmesg = td->td_lockname;
 		wchan = td->td_blocked;
 	} else if (TD_ON_SLEEPQ(td)) {
 		wmesg = td->td_wmesg;
 		wchan = td->td_wchan;
 	} else if (TD_IS_RUNNING(td)) {
 		snprintf(state, sizeof(state), "CPU %d", td->td_oncpu);
 		wmesg = state;
 		wchan = NULL;
 	} else {
 		wmesg = "";
 		wchan = NULL;
 	}
 	db_printf("%c%-8.8s ", wprefix, wmesg);
 	if (wchan == NULL)
 #ifdef __LP64__
 		db_printf("%18s ", "");
 #else
 		db_printf("%10s ", "");
 #endif
 	else
 		db_printf("%p ", wchan);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("[");
 	if (td->td_name[0] != '\0')
 		db_printf("%s", td->td_name);
 	else
 		db_printf("%s", td->td_proc->p_comm);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("]");
 	db_printf("\n");
 }
 
 DB_SHOW_COMMAND(thread, db_show_thread)
 {
 	struct thread *td;
 	struct lock_object *lock;
 	bool comma;
 
 	/* Determine which thread to examine. */
 	if (have_addr)
 		td = db_lookup_thread(addr, false);
 	else
 		td = kdb_thread;
 	lock = (struct lock_object *)td->td_lock;
 
 	db_printf("Thread %d at %p:\n", td->td_tid, td);
 	db_printf(" proc (pid %d): %p\n", td->td_proc->p_pid, td->td_proc);
 	if (td->td_name[0] != '\0')
 		db_printf(" name: %s\n", td->td_name);
 	db_printf(" stack: %p-%p\n", (void *)td->td_kstack,
 	    (void *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 1));
 	db_printf(" flags: %#x ", td->td_flags);
 	db_printf(" pflags: %#x\n", td->td_pflags);
 	db_printf(" state: ");
 	switch (td->td_state) {
 	case TDS_INACTIVE:
 		db_printf("INACTIVE\n");
 		break;
 	case TDS_CAN_RUN:
 		db_printf("CAN RUN\n");
 		break;
 	case TDS_RUNQ:
 		db_printf("RUNQ\n");
 		break;
 	case TDS_RUNNING:
 		db_printf("RUNNING (CPU %d)\n", td->td_oncpu);
 		break;
 	case TDS_INHIBITED:
 		db_printf("INHIBITED: {");
 		comma = false;
 		if (TD_IS_SLEEPING(td)) {
 			db_printf("SLEEPING");
 			comma = true;
 		}
 		if (TD_IS_SUSPENDED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SUSPENDED");
 			comma = true;
 		}
 		if (TD_IS_SWAPPED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SWAPPED");
 			comma = true;
 		}
 		if (TD_ON_LOCK(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("LOCK");
 			comma = true;
 		}
 		if (TD_AWAITING_INTR(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("IWAIT");
 		}
 		db_printf("}\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", td->td_state);
 		break;
 	}
 	if (TD_ON_LOCK(td))
 		db_printf(" lock: %s  turnstile: %p\n", td->td_lockname,
 		    td->td_blocked);
 	if (TD_ON_SLEEPQ(td))
 		db_printf(" wmesg: %s  wchan: %p\n", td->td_wmesg,
 		    td->td_wchan);
 	db_printf(" priority: %d\n", td->td_priority);
 	db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
 }
 
 DB_SHOW_COMMAND(proc, db_show_proc)
 {
 	struct thread *td;
 	struct proc *p;
 	int i;
 
 	/* Determine which process to examine. */
 	if (have_addr)
 		p = db_lookup_proc(addr);
 	else
 		p = kdb_thread->td_proc;
 
 	db_printf("Process %d (%s) at %p:\n", p->p_pid, p->p_comm, p);
 	db_printf(" state: ");
 	switch (p->p_state) {
 	case PRS_NEW:
 		db_printf("NEW\n");
 		break;
 	case PRS_NORMAL:
 		db_printf("NORMAL\n");
 		break;
 	case PRS_ZOMBIE:
 		db_printf("ZOMBIE\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", p->p_state);
 	}
 	if (p->p_ucred != NULL) {
 		db_printf(" uid: %d  gids: ", p->p_ucred->cr_uid);
 		for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
 			db_printf("%d", p->p_ucred->cr_groups[i]);
 			if (i < (p->p_ucred->cr_ngroups - 1))
 				db_printf(", ");
 		}
 		db_printf("\n");
 	}
 	if (p->p_pptr != NULL)
 		db_printf(" parent: pid %d at %p\n", p->p_pptr->p_pid,
 		    p->p_pptr);
 	if (p->p_leader != NULL && p->p_leader != p)
 		db_printf(" leader: pid %d at %p\n", p->p_leader->p_pid,
 		    p->p_leader);
 	if (p->p_sysent != NULL)
 		db_printf(" ABI: %s\n", p->p_sysent->sv_name);
 	if (p->p_args != NULL)
 		db_printf(" arguments: %.*s\n", (int)p->p_args->ar_length,
 		    p->p_args->ar_args);
 	db_printf(" threads: %d\n", p->p_numthreads);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		dumpthread(p, td, 1);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 void
 db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused,
     char *dummy4 __unused)
 {
 	struct proc *p;
 	struct thread *td;
 	struct kstack_cache_entry *ks_ce;
 	vm_offset_t saddr;
 
 	if (have_addr)
 		saddr = addr;
 	else {
 		db_printf("Usage: findstack <address>\n");
 		return;
 	}
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (td->td_kstack <= saddr && saddr < td->td_kstack +
 			    PAGE_SIZE * td->td_kstack_pages) {
 				db_printf("Thread %p\n", td);
 				return;
 			}
 		}
 	}
 
 	for (ks_ce = kstack_cache; ks_ce != NULL;
 	     ks_ce = ks_ce->next_ks_entry) {
 		if ((vm_offset_t)ks_ce <= saddr && saddr < (vm_offset_t)ks_ce +
 		    PAGE_SIZE * KSTACK_PAGES) {
 			db_printf("Cached stack %p\n", ks_ce);
 			return;
 		}
 	}
 }
Index: head/sys/ddb/db_sym.c
===================================================================
--- head/sys/ddb/db_sym.c	(revision 285626)
+++ head/sys/ddb/db_sym.c	(revision 285627)
@@ -1,472 +1,474 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 /*
  * 	Author: David B. Golub, Carnegie Mellon University
  *	Date:	7/90
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <net/vnet.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_variables.h>
 
 #include <opt_ddb.h>
 
 /*
  * Multiple symbol tables
  */
 #ifndef MAXNOSYMTABS
 #define	MAXNOSYMTABS	3	/* mach, ux, emulator */
 #endif
 
 static db_symtab_t	db_symtabs[MAXNOSYMTABS] = {{0,},};
 static int db_nsymtab = 0;
 
 static db_symtab_t	*db_last_symtab; /* where last symbol was found */
 
 static c_db_sym_t	db_lookup( const char *symstr);
 static char		*db_qualify(c_db_sym_t sym, char *symtabname);
 static bool		db_symbol_is_ambiguous(c_db_sym_t sym);
 static bool		db_line_at_pc(c_db_sym_t, char **, int *, db_expr_t);
 
 static int db_cpu = -1;
 
 #ifdef VIMAGE
 static void *db_vnet = NULL;
 #endif
 
 /*
  * Validate the CPU number used to interpret per-CPU variables so we can
  * avoid later confusion if an invalid CPU is requested.
  */
 int
 db_var_db_cpu(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	switch (op) {
 	case DB_VAR_GET:
 		*valuep = db_cpu;
 		return (1);
 
 	case DB_VAR_SET:
 		if (*(int *)valuep < -1 && *(int *)valuep > mp_maxid) {
 			db_printf("Invalid value: %d", *(int*)valuep);
 			return (0);
 		}
 		db_cpu = *(int *)valuep;
 		return (1);
 
 	default:
 		db_printf("db_var_db_cpu: unknown operation\n");
 		return (0);
 	}
 }
 
 /*
  * Read-only variable reporting the current CPU, which is what we use when
  * db_cpu is set to -1.
  */
 int
 db_var_curcpu(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	switch (op) {
 	case DB_VAR_GET:
 		*valuep = curcpu;
 		return (1);
 
 	case DB_VAR_SET:
 		db_printf("Read-only variable.\n");
 		return (0);
 
 	default:
 		db_printf("db_var_curcpu: unknown operation\n");
 		return (0);
 	}
 }
 
 #ifdef VIMAGE
 /*
  * Validate the virtual network pointer used to interpret per-vnet global
  * variable expansion.  Right now we don't do much here, really we should
  * walk the global vnet list to check it's an OK pointer.
  */
 int
 db_var_db_vnet(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	switch (op) {
 	case DB_VAR_GET:
 		*valuep = (db_expr_t)db_vnet;
 		return (1);
 
 	case DB_VAR_SET:
 		db_vnet = *(void **)valuep;
 		return (1);
 
 	default:
 		db_printf("db_var_db_vnet: unknown operation\n");
 		return (0);
 	}
 }
 
 /*
  * Read-only variable reporting the current vnet, which is what we use when
  * db_vnet is set to NULL.
  */
 int
 db_var_curvnet(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	switch (op) {
 	case DB_VAR_GET:
 		*valuep = (db_expr_t)curvnet;
 		return (1);
 
 	case DB_VAR_SET:
 		db_printf("Read-only variable.\n");
 		return (0);
 
 	default:
 		db_printf("db_var_curvnet: unknown operation\n");
 		return (0);
 	}
 }
 #endif
 
 /*
  * Add symbol table, with given name, to list of symbol tables.
  */
 void
 db_add_symbol_table(char *start, char *end, char *name, char *ref)
 {
 	if (db_nsymtab >= MAXNOSYMTABS) {
 		printf ("No slots left for %s symbol table", name);
 		panic ("db_sym.c: db_add_symbol_table");
 	}
 
 	db_symtabs[db_nsymtab].start = start;
 	db_symtabs[db_nsymtab].end = end;
 	db_symtabs[db_nsymtab].name = name;
 	db_symtabs[db_nsymtab].private = ref;
 	db_nsymtab++;
 }
 
 /*
  *  db_qualify("vm_map", "ux") returns "unix:vm_map".
  *
  *  Note: return value points to static data whose content is
  *  overwritten by each call... but in practice this seems okay.
  */
 static char *
 db_qualify(c_db_sym_t sym, char *symtabname)
 {
 	const char	*symname;
 	static char     tmp[256];
 
 	db_symbol_values(sym, &symname, 0);
 	snprintf(tmp, sizeof(tmp), "%s:%s", symtabname, symname);
 	return tmp;
 }
 
 
 bool
 db_eqname(const char *src, const char *dst, int c)
 {
 	if (!strcmp(src, dst))
 	    return (true);
 	if (src[0] == c)
 	    return (!strcmp(src+1,dst));
 	return (false);
 }
 
 bool
 db_value_of_name(const char *name, db_expr_t *valuep)
 {
 	c_db_sym_t	sym;
 
 	sym = db_lookup(name);
 	if (sym == C_DB_SYM_NULL)
 	    return (false);
 	db_symbol_values(sym, &name, valuep);
 	return (true);
 }
 
 bool
 db_value_of_name_pcpu(const char *name, db_expr_t *valuep)
 {
 	static char     tmp[256];
 	db_expr_t	value;
 	c_db_sym_t	sym;
 	int		cpu;
 
 	if (db_cpu != -1)
 		cpu = db_cpu;
 	else
 		cpu = curcpu;
 	snprintf(tmp, sizeof(tmp), "pcpu_entry_%s", name);
 	sym = db_lookup(tmp);
 	if (sym == C_DB_SYM_NULL)
 		return (false);
 	db_symbol_values(sym, &name, &value);
 	if (value < DPCPU_START || value >= DPCPU_STOP)
 		return (false);
 	*valuep = (db_expr_t)((uintptr_t)value + dpcpu_off[cpu]);
 	return (true);
 }
 
 bool
 db_value_of_name_vnet(const char *name, db_expr_t *valuep)
 {
 #ifdef VIMAGE
 	static char     tmp[256];
 	db_expr_t	value;
 	c_db_sym_t	sym;
 	struct vnet	*vnet;
 
 	if (db_vnet != NULL)
 		vnet = db_vnet;
 	else
 		vnet = curvnet;
 	snprintf(tmp, sizeof(tmp), "vnet_entry_%s", name);
 	sym = db_lookup(tmp);
 	if (sym == C_DB_SYM_NULL)
 		return (false);
 	db_symbol_values(sym, &name, &value);
 	if (value < VNET_START || value >= VNET_STOP)
 		return (false);
 	*valuep = (db_expr_t)((uintptr_t)value + vnet->vnet_data_base);
 	return (true);
 #else
 	return (false);
 #endif
 }
 
 /*
  * Lookup a symbol.
  * If the symbol has a qualifier (e.g., ux:vm_map),
  * then only the specified symbol table will be searched;
  * otherwise, all symbol tables will be searched.
  */
 static c_db_sym_t
 db_lookup(const char *symstr)
 {
 	c_db_sym_t sp;
 	register int i;
 	int symtab_start = 0;
 	int symtab_end = db_nsymtab;
 	register const char *cp;
 
 	/*
 	 * Look for, remove, and remember any symbol table specifier.
 	 */
 	for (cp = symstr; *cp; cp++) {
 		if (*cp == ':') {
 			for (i = 0; i < db_nsymtab; i++) {
 				int n = strlen(db_symtabs[i].name);
 
 				if (
 				    n == (cp - symstr) &&
 				    strncmp(symstr, db_symtabs[i].name, n) == 0
 				) {
 					symtab_start = i;
 					symtab_end = i + 1;
 					break;
 				}
 			}
 			if (i == db_nsymtab) {
 				db_error("invalid symbol table name");
 			}
 			symstr = cp+1;
 		}
 	}
 
 	/*
 	 * Look in the specified set of symbol tables.
 	 * Return on first match.
 	 */
 	for (i = symtab_start; i < symtab_end; i++) {
 		sp = X_db_lookup(&db_symtabs[i], symstr);
 		if (sp) {
 			db_last_symtab = &db_symtabs[i];
 			return sp;
 		}
 	}
 	return 0;
 }
 
 /*
  * If true, check across symbol tables for multiple occurrences
  * of a name.  Might slow things down quite a bit.
  */
 static volatile bool db_qualify_ambiguous_names = false;
 
 /*
  * Does this symbol name appear in more than one symbol table?
  * Used by db_symbol_values to decide whether to qualify a symbol.
  */
 static bool
 db_symbol_is_ambiguous(c_db_sym_t sym)
 {
 	const char	*sym_name;
 	register int	i;
 	register bool	found_once = false;
 
 	if (!db_qualify_ambiguous_names)
 		return (false);
 
 	db_symbol_values(sym, &sym_name, 0);
 	for (i = 0; i < db_nsymtab; i++) {
 		if (X_db_lookup(&db_symtabs[i], sym_name)) {
 			if (found_once)
 				return (true);
 			found_once = true;
 		}
 	}
 	return (false);
 }
 
 /*
  * Find the closest symbol to val, and return its name
  * and the difference between val and the symbol found.
  */
 c_db_sym_t
 db_search_symbol(db_addr_t val, db_strategy_t strategy, db_expr_t *offp)
 {
 	register
 	unsigned int	diff;
 	size_t		newdiff;
 	register int	i;
 	c_db_sym_t	ret = C_DB_SYM_NULL, sym;
 
 	newdiff = diff = ~0;
 	for (i = 0; i < db_nsymtab; i++) {
 	    sym = X_db_search_symbol(&db_symtabs[i], val, strategy, &newdiff);
 	    if (newdiff < diff) {
 		db_last_symtab = &db_symtabs[i];
 		diff = newdiff;
 		ret = sym;
 	    }
 	}
 	*offp = diff;
 	return ret;
 }
 
 /*
  * Return name and value of a symbol
  */
 void
 db_symbol_values(c_db_sym_t sym, const char **namep, db_expr_t *valuep)
 {
 	db_expr_t	value;
 
 	if (sym == DB_SYM_NULL) {
 		*namep = 0;
 		return;
 	}
 
 	X_db_symbol_values(db_last_symtab, sym, namep, &value);
 
 	if (db_symbol_is_ambiguous(sym))
 		*namep = db_qualify(sym, db_last_symtab->name);
 	if (valuep)
 		*valuep = value;
 }
 
 
 /*
  * Print a the closest symbol to value
  *
  * After matching the symbol according to the given strategy
  * we print it in the name+offset format, provided the symbol's
  * value is close enough (eg smaller than db_maxoff).
  * We also attempt to print [filename:linenum] when applicable
  * (eg for procedure names).
  *
  * If we could not find a reasonable name+offset representation,
  * then we just print the value in hex.  Small values might get
  * bogus symbol associations, e.g. 3 might get some absolute
  * value like _INCLUDE_VERSION or something, therefore we do
  * not accept symbols whose value is "small" (and use plain hex).
  */
 
 db_expr_t	db_maxoff = 0x10000;
 
 void
 db_printsym(db_expr_t off, db_strategy_t strategy)
 {
 	db_expr_t	d;
 	char 		*filename;
 	const char	*name;
 	db_expr_t	value;
 	int 		linenum;
 	c_db_sym_t	cursym;
 
 	cursym = db_search_symbol(off, strategy, &d);
 	db_symbol_values(cursym, &name, &value);
 	if (name == 0)
 		value = off;
 	if (value >= DB_SMALL_VALUE_MIN && value <= DB_SMALL_VALUE_MAX) {
 		db_printf("%+#lr", (long)off);
 		return;
 	}
 	if (name == 0 || d >= (unsigned long)db_maxoff) {
 		db_printf("%#lr", (unsigned long)off);
 		return;
 	}
 #ifdef DDB_NUMSYM
 	db_printf("%#lr = %s", (unsigned long)off, name);
 #else
 	db_printf("%s", name);
 #endif
 	if (d)
 		db_printf("+%+#lr", (long)d);
 	if (strategy == DB_STGY_PROC) {
 		if (db_line_at_pc(cursym, &filename, &linenum, off))
 			db_printf(" [%s:%d]", filename, linenum);
 	}
 }
 
 static bool
 db_line_at_pc(c_db_sym_t sym, char **filename, int *linenum, db_expr_t pc)
 {
 	return (X_db_line_at_pc(db_last_symtab, sym, filename, linenum, pc));
 }
 
 bool
 db_sym_numargs(c_db_sym_t sym, int *nargp, char **argnames)
 {
 	return (X_db_sym_numargs(db_last_symtab, sym, nargp, argnames));
 }
Index: head/sys/i386/i386/locore.s
===================================================================
--- head/sys/i386/i386/locore.s	(revision 285626)
+++ head/sys/i386/i386/locore.s	(revision 285627)
@@ -1,909 +1,910 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
  * $FreeBSD$
  *
  *		originally from: locore.s, by William F. Jolitz
  *
  *		Substantially rewritten by David Greenman, Rod Grimes,
  *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
  *			and many others.
  */
 
 #include "opt_bootp.h"
 #include "opt_compat.h"
+#include "opt_kstack_pages.h"
 #include "opt_nfsroot.h"
 #include "opt_pmap.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  *	XXX
  *
  * Note: This version greatly munged to avoid various assembler errors
  * that may be fixed in newer versions of gas. Perhaps newer versions
  * will have more pleasant appearance.
  */
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  */
 	.globl	PTmap,PTD,PTDpde
 	.set	PTmap,(PTDPTDI << PDRSHIFT)
 	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
 	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
 
 /*
  * Compiled KERNBASE location and the kernel load address
  */
 	.globl	kernbase
 	.set	kernbase,KERNBASE
 	.globl	kernload
 	.set	kernload,KERNLOAD
 
 /*
  * Globals
  */
 	.data
 	ALIGN_DATA			/* just to be sure */
 
 	.space	0x2000			/* space for tmpstk - temporary stack */
 tmpstk:
 
 	.globl	bootinfo
 bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
 
 		.globl KERNend
 KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
 physfree:	.long	0		/* phys addr of next free page */
 
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
 #if defined(PAE) || defined(PAE_TABLES)
 	.globl	IdlePDPT
 IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
 #endif
 
 	.globl	KPTmap
 KPTmap:		.long	0		/* address of kernel page tables */
 
 	.globl	KPTphys
 KPTphys:	.long	0		/* phys addr of kernel page tables */
 
 	.globl	proc0kstack
 proc0kstack:	.long	0		/* address of proc 0 kstack space */
 p0kpa:		.long	0		/* phys addr of proc0's STACK */
 
 vm86phystk:	.long	0		/* PA of vm86/bios stack */
 
 	.globl	vm86paddr, vm86pa
 vm86paddr:	.long	0		/* address of vm86 region */
 vm86pa:		.long	0		/* phys addr of vm86 region */
 
 #ifdef PC98
 	.globl	pc98_system_parameter
 pc98_system_parameter:
 	.space	0x240
 #endif
 
 /**********************************************************************
  *
  * Some handy macros
  *
  */
 
 #define R(foo) ((foo)-KERNBASE)
 
 #define ALLOCPAGES(foo) \
 	movl	R(physfree), %esi ; \
 	movl	$((foo)*PAGE_SIZE), %eax ; \
 	addl	%esi, %eax ; \
 	movl	%eax, R(physfree) ; \
 	movl	%esi, %edi ; \
 	movl	$((foo)*PAGE_SIZE),%ecx ; \
 	xorl	%eax,%eax ; \
 	cld ; \
 	rep ; \
 	stosb
 
 /*
  * fillkpt
  *	eax = page frame address
  *	ebx = index into page table
  *	ecx = how many pages to map
  * 	base = base address of page dir/table
  *	prot = protection bits
  */
 #define	fillkpt(base, prot)		  \
 	shll	$PTESHIFT,%ebx		; \
 	addl	base,%ebx		; \
 	orl	$PG_V,%eax		; \
 	orl	prot,%eax		; \
 1:	movl	%eax,(%ebx)		; \
 	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
 	addl	$PTESIZE,%ebx		; /* next pte */ \
 	loop	1b
 
 /*
  * fillkptphys(prot)
  *	eax = physical address
  *	ecx = how many pages to map
  *	prot = protection bits
  */
 #define	fillkptphys(prot)		  \
 	movl	%eax, %ebx		; \
 	shrl	$PAGE_SHIFT, %ebx	; \
 	fillkpt(R(KPTphys), prot)
 
 	.text
 /**********************************************************************
  *
  * This is where the bootblocks start us, set the ball rolling...
  *
  */
 NON_GPROF_ENTRY(btext)
 
 #ifdef PC98
 	/* save SYSTEM PARAMETER for resume (NS/T or other) */
 	movl	$0xa1400,%esi
 	movl	$R(pc98_system_parameter),%edi
 	movl	$0x0240,%ecx
 	cld
 	rep
 	movsb
 #else	/* IBM-PC */
 /* Tell the bios to warmboot next time */
 	movw	$0x1234,0x472
 #endif	/* PC98 */
 
 /* Set up a real frame in case the double return in newboot is executed. */
 	pushl	%ebp
 	movl	%esp, %ebp
 
 /* Don't trust what the BIOS gives for eflags. */
 	pushl	$PSL_KERNEL
 	popfl
 
 /*
  * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
  * to set %cs, %ds, %es and %ss.
  */
 	mov	%ds, %ax
 	mov	%ax, %fs
 	mov	%ax, %gs
 
 /*
  * Clear the bss.  Not all boot programs do it, and it is our job anyway.
  *
  * XXX we don't check that there is memory for our bss and page tables
  * before using it.
  *
  * Note: we must be careful to not overwrite an active gdt or idt.  They
  * inactive from now until we switch to new ones, since we don't load any
  * more segment registers or permit interrupts until after the switch.
  */
 	movl	$R(end),%ecx
 	movl	$R(edata),%edi
 	subl	%edi,%ecx
 	xorl	%eax,%eax
 	cld
 	rep
 	stosb
 
 	call	recover_bootinfo
 
 /* Get onto a stack that we can trust. */
 /*
  * XXX this step is delayed in case recover_bootinfo needs to return via
  * the old stack, but it need not be, since recover_bootinfo actually
  * returns via the old frame.
  */
 	movl	$R(tmpstk),%esp
 
 #ifdef PC98
 	/* pc98_machine_type & M_EPSON_PC98 */
 	testb	$0x02,R(pc98_system_parameter)+220
 	jz	3f
 	/* epson_machine_id <= 0x0b */
 	cmpb	$0x0b,R(pc98_system_parameter)+224
 	ja	3f
 
 	/* count up memory */
 	movl	$0x100000,%eax		/* next, talley remaining memory */
 	movl	$0xFFF-0x100,%ecx
 1:	movl	0(%eax),%ebx		/* save location to check */
 	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
 	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
 	jne	2f
 	movl	%ebx,0(%eax)		/* restore memory */
 	addl	$PAGE_SIZE,%eax
 	loop	1b
 2:	subl	$0x100000,%eax
 	shrl	$17,%eax
 	movb	%al,R(pc98_system_parameter)+1
 3:
 
 	movw	R(pc98_system_parameter+0x86),%ax
 	movw	%ax,R(cpu_id)
 #endif
 
 	call	identify_cpu
 	call	create_pagetables
 
 /*
  * If the CPU has support for VME, turn it on.
  */ 
 	testl	$CPUID_VME, R(cpu_feature)
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 /* Now enable paging */
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	%eax, %cr3
 	movl	%cr4, %eax
 	orl	$CR4_PAE, %eax
 	movl	%eax, %cr4
 #else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
 #endif
 	movl	%cr0,%eax		/* get control word */
 	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
 	movl	%eax,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
 /* now running relocated at KERNBASE where the system is linked to run */
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 
 	/*
 	 * Only use bottom page for init386().  init386() calculates the
 	 * PCB + FPU save area size and returns the true top of stack.
 	 */
 	leal	PAGE_SIZE(%eax),%esp
 
 	xorl	%ebp,%ebp		/* mark end of frames */
 
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
 	/*
 	 * Clean up the stack in a way that db_numargs() understands, so
 	 * that backtraces in ddb don't underrun the stack.  Traps for
 	 * inaccessible memory are more fatal than usual this early.
 	 */
 	addl	$4,%esp
 
 	/* Switch to true top of stack. */
 	movl	%eax,%esp
 
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
 
 /*
  * Signal trampoline, copied to top of user stack
  */
 NON_GPROF_ENTRY(sigcode)
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	1f
 	mov	UC_GS(%eax),%gs		/* restore %gs */
 1:
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 
 #ifdef COMPAT_FREEBSD4
 	ALIGN_TEXT
 freebsd4_sigcode:
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC4_EFLAGS(%eax)
 	jne	1f
 	mov	UC4_GS(%eax),%gs	/* restore %gs */
 1:
 	movl	$344,%eax		/* 4.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 #endif
 
 #ifdef COMPAT_43
 	ALIGN_TEXT
 osigcode:
 	call	*SIGF_HANDLER(%esp)	/* call signal handler */
 	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
 	pushl	%eax
 	testl	$PSL_VM,SC_PS(%eax)
 	jne	9f
 	mov	SC_GS(%eax),%gs		/* restore %gs */
 9:
 	movl	$103,%eax		/* 3.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 0:	jmp	0b
 #endif /* COMPAT_43 */
 
 	ALIGN_TEXT
 esigcode:
 
 	.data
 	.globl	szsigcode
 szsigcode:
 	.long	esigcode-sigcode
 #ifdef COMPAT_FREEBSD4
 	.globl	szfreebsd4_sigcode
 szfreebsd4_sigcode:
 	.long	esigcode-freebsd4_sigcode
 #endif
 #ifdef COMPAT_43
 	.globl	szosigcode
 szosigcode:
 	.long	esigcode-osigcode
 #endif
 	.text
 
 /**********************************************************************
  *
  * Recover the bootinfo passed to us from the boot program
  *
  */
 recover_bootinfo:
 	/*
 	 * This code is called in different ways depending on what loaded
 	 * and started the kernel.  This is used to detect how we get the
 	 * arguments from the other code and what we do with them.
 	 *
 	 * Old disk boot blocks:
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 *	[return address == 0, and can NOT be returned to]
 	 *	[cyloffset was not supported by the FreeBSD boot code
 	 *	 and always passed in as 0]
 	 *	[esym is also known as total in the boot code, and
 	 *	 was never properly supported by the FreeBSD boot code]
 	 *
 	 * Old diskless netboot code:
 	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
 	 *	[return address != 0, and can NOT be returned to]
 	 *	If we are being booted by this code it will NOT work,
 	 *	so we are just going to halt if we find this case.
 	 *
 	 * New uniform boot code:
 	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
 	 *	[return address != 0, and can be returned to]
 	 *
 	 * There may seem to be a lot of wasted arguments in here, but
 	 * that is so the newer boot code can still load very old kernels
 	 * and old boot code can load new kernels.
 	 */
 
 	/*
 	 * The old style disk boot blocks fake a frame on the stack and
 	 * did an lret to get here.  The frame on the stack has a return
 	 * address of 0.
 	 */
 	cmpl	$0,4(%ebp)
 	je	olddiskboot
 
 	/*
 	 * We have some form of return address, so this is either the
 	 * old diskless netboot code, or the new uniform code.  That can
 	 * be detected by looking at the 5th argument, if it is 0
 	 * we are being booted by the new uniform boot code.
 	 */
 	cmpl	$0,24(%ebp)
 	je	newboot
 
 	/*
 	 * Seems we have been loaded by the old diskless boot code, we
 	 * don't stand a chance of running as the diskless structure
 	 * changed considerably between the two, so just halt.
 	 */
 	 hlt
 
 	/*
 	 * We have been loaded by the new uniform boot code.
 	 * Let's check the bootinfo version, and if we do not understand
 	 * it we return to the loader with a status of 1 to indicate this error
 	 */
 newboot:
 	movl	28(%ebp),%ebx		/* &bootinfo.version */
 	movl	BI_VERSION(%ebx),%eax
 	cmpl	$1,%eax			/* We only understand version 1 */
 	je	1f
 	movl	$1,%eax			/* Return status */
 	leave
 	/*
 	 * XXX this returns to our caller's caller (as is required) since
 	 * we didn't set up a frame and our caller did.
 	 */
 	ret
 
 1:
 	/*
 	 * If we have a kernelname copy it in
 	 */
 	movl	BI_KERNELNAME(%ebx),%esi
 	cmpl	$0,%esi
 	je	2f			/* No kernelname */
 	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
 	movl	$R(kernelname),%edi
 	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
 	je	1f
 	movb	$'/',(%edi)
 	incl	%edi
 	decl	%ecx
 1:
 	cld
 	rep
 	movsb
 
 2:
 	/*
 	 * Determine the size of the boot loader's copy of the bootinfo
 	 * struct.  This is impossible to do properly because old versions
 	 * of the struct don't contain a size field and there are 2 old
 	 * versions with the same version number.
 	 */
 	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
 	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
 	je	got_bi_size		/* no, sizeless version */
 	movl	BI_SIZE(%ebx),%ecx
 got_bi_size:
 
 	/*
 	 * Copy the common part of the bootinfo struct
 	 */
 	movl	%ebx,%esi
 	movl	$R(bootinfo),%edi
 	cmpl	$BOOTINFO_SIZE,%ecx
 	jbe	got_common_bi_size
 	movl	$BOOTINFO_SIZE,%ecx
 got_common_bi_size:
 	cld
 	rep
 	movsb
 
 #ifdef NFS_ROOT
 #ifndef BOOTP_NFSV3
 	/*
 	 * If we have a nfs_diskless structure copy it in
 	 */
 	movl	BI_NFS_DISKLESS(%ebx),%esi
 	cmpl	$0,%esi
 	je	olddiskboot
 	movl	$R(nfs_diskless),%edi
 	movl	$NFSDISKLESS_SIZE,%ecx
 	cld
 	rep
 	movsb
 	movl	$R(nfs_diskless_valid),%edi
 	movl	$1,(%edi)
 #endif
 #endif
 
 	/*
 	 * The old style disk boot.
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 * Note that the newer boot code just falls into here to pick
 	 * up howto and bootdev, cyloffset and esym are no longer used
 	 */
 olddiskboot:
 	movl	8(%ebp),%eax
 	movl	%eax,R(boothowto)
 	movl	12(%ebp),%eax
 	movl	%eax,R(bootdev)
 
 	ret
 
 
 /**********************************************************************
  *
  * Identify the CPU and initialize anything special about it
  *
  */
 identify_cpu:
 
 	/* Try to toggle alignment check flag; does not exist on 386. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	orl	$PSL_AC,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_AC,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	try486
 
 	/* NexGen CPU does not have aligment check flag. */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jz	trynexgen
 	popfl
 	movl	$CPU_386,R(cpu)
 	jmp	3f
 
 trynexgen:
 	popfl
 	movl	$CPU_NX586,R(cpu)
 	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
 	movl	$0x72446e65,R(cpu_vendor+4)
 	movl	$0x6e657669,R(cpu_vendor+8)
 	movl	$0,R(cpu_vendor+12)
 	jmp	3f
 
 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	xorl	$PSL_ID,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_ID,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	trycpuid
 	movl	$CPU_486,R(cpu)
 
 	/*
 	 * Check Cyrix CPU
 	 * Cyrix CPUs do not change the undefined flags following
 	 * execution of the divide instruction which divides 5 by 2.
 	 *
 	 * Note: CPUID is enabled on M2, so it passes another way.
 	 */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jnc	trycyrix
 	popfl
 	jmp	3f		/* You may use Intel CPU. */
 
 trycyrix:
 	popfl
 	/*
 	 * IBM Bluelighting CPU also doesn't change the undefined flags.
 	 * Because IBM doesn't disclose the information for Bluelighting
 	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
 	 * brand of Cyrix CPUs).
 	 */
 	movl	$0x69727943,R(cpu_vendor)	# store vendor string
 	movl	$0x736e4978,R(cpu_vendor+4)
 	movl	$0x64616574,R(cpu_vendor+8)
 	jmp	3f
 
 trycpuid:	/* Use the `cpuid' instruction. */
 	xorl	%eax,%eax
 	cpuid					# cpuid 0
 	movl	%eax,R(cpu_high)		# highest capability
 	movl	%ebx,R(cpu_vendor)		# store vendor string
 	movl	%edx,R(cpu_vendor+4)
 	movl	%ecx,R(cpu_vendor+8)
 	movb	$0,R(cpu_vendor+12)
 
 	movl	$1,%eax
 	cpuid					# cpuid 1
 	movl	%eax,R(cpu_id)			# store cpu_id
 	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
 	movl	%edx,R(cpu_feature)		# store cpu_feature
 	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
 	rorl	$8,%eax				# extract family type
 	andl	$15,%eax
 	cmpl	$5,%eax
 	jae	1f
 
 	/* less than Pentium; must be 486 */
 	movl	$CPU_486,R(cpu)
 	jmp	3f
 1:
 	/* a Pentium? */
 	cmpl	$5,%eax
 	jne	2f
 	movl	$CPU_586,R(cpu)
 	jmp	3f
 2:
 	/* Greater than Pentium...call it a Pentium Pro */
 	movl	$CPU_686,R(cpu)
 3:
 	ret
 
 
 /**********************************************************************
  *
  * Create the first page directory and its page tables.
  *
  */
 
 create_pagetables:
 
 /* Find end of kernel image (rounded up to a page boundary). */
 	movl	$R(_end),%esi
 
 /* Include symbols, if any. */
 	movl	R(bootinfo+BI_ESYMTAB),%edi
 	testl	%edi,%edi
 	je	over_symalloc
 	movl	%edi,%esi
 	movl	$KERNBASE,%edi
 	addl	%edi,R(bootinfo+BI_SYMTAB)
 	addl	%edi,R(bootinfo+BI_ESYMTAB)
 over_symalloc:
 
 /* If we are told where the end of the kernel space is, believe it. */
 	movl	R(bootinfo+BI_KERNEND),%edi
 	testl	%edi,%edi
 	je	no_kernend
 	movl	%edi,%esi
 no_kernend:
 
 	addl	$PDRMASK,%esi		/* Play conservative for now, and */
 	andl	$~PDRMASK,%esi		/*   ... wrap to next 4M. */
 	movl	%esi,R(KERNend)		/* save end of kernel */
 	movl	%esi,R(physfree)	/* next free page is at end of kernel */
 
 /* Allocate Kernel Page Tables */
 	ALLOCPAGES(NKPT)
 	movl	%esi,R(KPTphys)
 	addl	$(KERNBASE-(KPTDI<<(PDRSHIFT-PAGE_SHIFT+PTESHIFT))),%esi
 	movl	%esi,R(KPTmap)
 
 /* Allocate Page Table Directory */
 #if defined(PAE) || defined(PAE_TABLES)
 	/* XXX only need 32 bytes (easier for now) */
 	ALLOCPAGES(1)
 	movl	%esi,R(IdlePDPT)
 #endif
 	ALLOCPAGES(NPGPTD)
 	movl	%esi,R(IdlePTD)
 
 /* Allocate KSTACK */
 	ALLOCPAGES(KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
 
 	ALLOCPAGES(1)			/* vm86/bios stack */
 	movl	%esi,R(vm86phystk)
 
 	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
 	movl	%esi,R(vm86pa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(vm86paddr)
 
 /*
  * Enable PSE and PGE.
  */
 #ifndef DISABLE_PSE
 	testl	$CPUID_PSE, R(cpu_feature)
 	jz	1f
 	movl	$PG_PS, R(pseflag)
 	movl	%cr4, %eax
 	orl	$CR4_PSE, %eax
 	movl	%eax, %cr4
 1:
 #endif
 #ifndef DISABLE_PG_G
 	testl	$CPUID_PGE, R(cpu_feature)
 	jz	2f
 	movl	$PG_G, R(pgeflag)
 	movl	%cr4, %eax
 	orl	$CR4_PGE, %eax
 	movl	%eax, %cr4
 2:
 #endif
 
 /*
  * Initialize page table pages mapping physical address zero through the
  * end of the kernel.  All of the page table entries allow read and write
  * access.  Write access to the first physical page is required by bios32
  * calls, and write access to the first 1 MB of physical memory is required
  * by ACPI for implementing suspend and resume.  We do this even
  * if we've enabled PSE above, we'll just switch the corresponding kernel
  * PDEs before we turn on paging.
  *
  * XXX: We waste some pages here in the PSE case!
  */
 	xorl	%eax, %eax
 	movl	R(KERNend),%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys($PG_RW)
 
 /* Map page table pages. */
 	movl	R(KPTphys),%eax
 	movl	$NKPT,%ecx
 	fillkptphys($PG_RW)
 
 /* Map page directory. */
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 #endif
 
 	movl	R(IdlePTD), %eax
 	movl	$NPGPTD, %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
 	movl	$(KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkptphys($PG_RW)
 
 /* Map space for the vm86 region */
 	movl	R(vm86phystk), %eax
 	movl	$4, %ecx
 	fillkptphys($PG_RW)
 
 /* Map page 0 into the vm86 page table */
 	movl	$0, %eax
 	movl	$0, %ebx
 	movl	$1, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /* ...likewise for the ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /*
  * Create an identity mapping for low physical memory, including the kernel.
  * The part of this mapping that covers the first 1 MB of physical memory
  * becomes a permanent part of the kernel's address space.  The rest of this
  * mapping is destroyed in pmap_bootstrap().  Ordinarily, the same page table
  * pages are shared by the identity mapping and the kernel's native mapping.
  * However, the permanent identity mapping cannot contain PG_G mappings.
  * Thus, if the kernel is loaded within the permanent identity mapping, that
  * page table page must be duplicated and not shared.
  *
  * N.B. Due to errata concerning large pages and physical address zero,
  * a PG_PS mapping is not used.
  */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 #if KERNLOAD < (1 << PDRSHIFT)
 	testl	$PG_G, R(pgeflag)
 	jz	1f
 	ALLOCPAGES(1)
 	movl	%esi, %edi
 	movl	R(IdlePTD), %eax
 	movl	(%eax), %esi
 	movl	%edi, (%eax)
 	movl	$PAGE_SIZE, %ecx
 	cld
 	rep
 	movsb
 1:	
 #endif
 
 /*
  * For the non-PSE case, install PDEs for PTs covering the KVA.
  * For the PSE case, do the same, but clobber the ones corresponding
  * to the kernel (from btext to KERNend) with 4M (2M for PAE) ('PS')
  * PDEs immediately after.
  */
 	movl	R(KPTphys), %eax
 	movl	$KPTDI, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 	cmpl	$0,R(pseflag)
 	je	done_pde
 
 	movl	R(KERNend), %ecx
 	movl	$KERNLOAD, %eax
 	subl	%eax, %ecx
 	shrl	$PDRSHIFT, %ecx
 	movl	$(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
 	shll	$PDESHIFT, %ebx
 	addl	R(IdlePTD), %ebx
 	orl	$(PG_V|PG_RW|PG_PS), %eax
 1:	movl	%eax, (%ebx)
 	addl	$(1 << PDRSHIFT), %eax
 	addl	$PDESIZE, %ebx
 	loop	1b
 
 done_pde:
 /* install a pde recursively mapping page directory as a page table */
 	movl	R(IdlePTD), %eax
 	movl	$PTDPTDI, %ebx
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePTD), %eax
 	xorl	%ebx, %ebx
 	movl	$NPGPTD, %ecx
 	fillkpt(R(IdlePDPT), $0x0)
 #endif
 
 	ret
 
 #ifdef XENHVM
 /* Xen Hypercall page */
 	.text
 .p2align PAGE_SHIFT, 0x90	/* Hypercall_page needs to be PAGE aligned */
 
 NON_GPROF_ENTRY(hypercall_page)
 	.skip	0x1000, 0x90	/* Fill with "nop"s */
 #endif
Index: head/sys/powerpc/booke/pmap.c
===================================================================
--- head/sys/powerpc/booke/pmap.c	(revision 285626)
+++ head/sys/powerpc/booke/pmap.c	(revision 285627)
@@ -1,3322 +1,3324 @@
 /*-
  * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
  * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Some hw specific parts of this pmap were derived or influenced
  * by NetBSD's ibm4xx pmap module. More generic code is shared with
  * a few other pmap modules from the FreeBSD tree.
  */
 
  /*
   * VM layout notes:
   *
   * Kernel and user threads run within one common virtual address space
   * defined by AS=0.
   *
   * Virtual address space layout:
   * -----------------------------
   * 0x0000_0000 - 0xafff_ffff	: user process
   * 0xb000_0000 - 0xbfff_ffff	: pmap_mapdev()-ed area (PCI/PCIE etc.)
   * 0xc000_0000 - 0xc0ff_ffff	: kernel reserved
   *   0xc000_0000 - data_end	: kernel code+data, env, metadata etc.
   * 0xc100_0000 - 0xfeef_ffff	: KVA
   *   0xc100_0000 - 0xc100_3fff : reserved for page zero/copy
   *   0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs
   *   0xc200_4000 - 0xc200_8fff : guard page + kstack0
   *   0xc200_9000 - 0xfeef_ffff	: actual free KVA space
   * 0xfef0_0000 - 0xffff_ffff	: I/O devices region
   */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/malloc.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/user.h>
 #include <sys/queue.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_param.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/platform.h>
 
 #include <machine/tlb.h>
 #include <machine/spr.h>
 #include <machine/md_var.h>
 #include <machine/mmuvar.h>
 #include <machine/pmap.h>
 #include <machine/pte.h>
 
 #include "mmu_if.h"
 
 #ifdef  DEBUG
 #define debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define debugf(fmt, args...)
 #endif
 
 #define TODO			panic("%s: not implemented", __func__);
 
 extern unsigned char _etext[];
 extern unsigned char _end[];
 
 extern uint32_t *bootinfo;
 
 #ifdef SMP
 extern uint32_t bp_ntlb1s;
 #endif
 
 vm_paddr_t kernload;
 vm_offset_t kernstart;
 vm_size_t kernsize;
 
 /* Message buffer and tables. */
 static vm_offset_t data_start;
 static vm_size_t data_end;
 
 /* Phys/avail memory regions. */
 static struct mem_region *availmem_regions;
 static int availmem_regions_sz;
 static struct mem_region *physmem_regions;
 static int physmem_regions_sz;
 
 /* Reserved KVA space and mutex for mmu_booke_zero_page. */
 static vm_offset_t zero_page_va;
 static struct mtx zero_page_mutex;
 
 static struct mtx tlbivax_mutex;
 
 /*
  * Reserved KVA space for mmu_booke_zero_page_idle. This is used
  * by idle thred only, no lock required.
  */
 static vm_offset_t zero_page_idle_va;
 
 /* Reserved KVA space and mutex for mmu_booke_copy_page. */
 static vm_offset_t copy_page_src_va;
 static vm_offset_t copy_page_dst_va;
 static struct mtx copy_page_mutex;
 
 /**************************************************************************/
 /* PMAP */
 /**************************************************************************/
 
 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t,
     vm_prot_t, u_int flags, int8_t psind);
 
 unsigned int kptbl_min;		/* Index of the first kernel ptbl. */
 unsigned int kernel_ptbls;	/* Number of KVA ptbls. */
 
 /*
  * If user pmap is processed with mmu_booke_remove and the resident count
  * drops to 0, there are no more pages to remove, so we need not continue.
  */
 #define PMAP_REMOVE_DONE(pmap) \
 	((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
 
 extern void tid_flush(tlbtid_t tid, int tlb0_ways, int tlb0_entries_per_way);
 extern int elf32_nxstack;
 
 /**************************************************************************/
 /* TLB and TID handling */
 /**************************************************************************/
 
 /* Translation ID busy table */
 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1];
 
 /*
  * TLB0 capabilities (entry, way numbers etc.). These can vary between e500
  * core revisions and should be read from h/w registers during early config.
  */
 uint32_t tlb0_entries;
 uint32_t tlb0_ways;
 uint32_t tlb0_entries_per_way;
 
 #define TLB0_ENTRIES		(tlb0_entries)
 #define TLB0_WAYS		(tlb0_ways)
 #define TLB0_ENTRIES_PER_WAY	(tlb0_entries_per_way)
 
 #define TLB1_ENTRIES 16
 
 /* In-ram copy of the TLB1 */
 static tlb_entry_t tlb1[TLB1_ENTRIES];
 
 /* Next free entry in the TLB1 */
 static unsigned int tlb1_idx;
 static vm_offset_t tlb1_map_base = VM_MAX_KERNEL_ADDRESS;
 
 static tlbtid_t tid_alloc(struct pmap *);
 
 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
 
 static int tlb1_set_entry(vm_offset_t, vm_paddr_t, vm_size_t, uint32_t);
 static void tlb1_write_entry(unsigned int);
 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t);
 
 static vm_size_t tsize2size(unsigned int);
 static unsigned int size2tsize(vm_size_t);
 static unsigned int ilog2(unsigned int);
 
 static void set_mas4_defaults(void);
 
 static inline void tlb0_flush_entry(vm_offset_t);
 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int);
 
 /**************************************************************************/
 /* Page table management */
 /**************************************************************************/
 
 static struct rwlock_padalign pvh_global_lock;
 
 /* Data for the pv entry allocation mechanism */
 static uma_zone_t pvzone;
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 
 #define PV_ENTRY_ZONE_MIN	2048	/* min pv entries in uma zone */
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC	200
 #endif
 
 static void ptbl_init(void);
 static struct ptbl_buf *ptbl_buf_alloc(void);
 static void ptbl_buf_free(struct ptbl_buf *);
 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
 
 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t);
 static void ptbl_free(mmu_t, pmap_t, unsigned int);
 static void ptbl_hold(mmu_t, pmap_t, unsigned int);
 static int ptbl_unhold(mmu_t, pmap_t, unsigned int);
 
 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t);
 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t);
 
 static pv_entry_t pv_alloc(void);
 static void pv_free(pv_entry_t);
 static void pv_insert(pmap_t, vm_offset_t, vm_page_t);
 static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
 
 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
 #define PTBL_BUFS		(128 * 16)
 
 struct ptbl_buf {
 	TAILQ_ENTRY(ptbl_buf) link;	/* list link */
 	vm_offset_t kva;		/* va of mapping */
 };
 
 /* ptbl free list and a lock used for access synchronization. */
 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
 static struct mtx ptbl_buf_freelist_lock;
 
 /* Base address of kva space allocated fot ptbl bufs. */
 static vm_offset_t ptbl_buf_pool_vabase;
 
 /* Pointer to ptbl_buf structures. */
 static struct ptbl_buf *ptbl_bufs;
 
 #ifdef SMP
 void pmap_bootstrap_ap(volatile uint32_t *);
 #endif
 
 /*
  * Kernel MMU interface
  */
 static void		mmu_booke_clear_modify(mmu_t, vm_page_t);
 static void		mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t,
     vm_size_t, vm_offset_t);
 static void		mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t);
 static void		mmu_booke_copy_pages(mmu_t, vm_page_t *,
     vm_offset_t, vm_page_t *, vm_offset_t, int);
 static int		mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t,
     vm_prot_t, u_int flags, int8_t psind);
 static void		mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
     vm_page_t, vm_prot_t);
 static void		mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t,
     vm_prot_t);
 static vm_paddr_t	mmu_booke_extract(mmu_t, pmap_t, vm_offset_t);
 static vm_page_t	mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t,
     vm_prot_t);
 static void		mmu_booke_init(mmu_t);
 static boolean_t	mmu_booke_is_modified(mmu_t, vm_page_t);
 static boolean_t	mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
 static boolean_t	mmu_booke_is_referenced(mmu_t, vm_page_t);
 static int		mmu_booke_ts_referenced(mmu_t, vm_page_t);
 static vm_offset_t	mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t,
     int);
 static int		mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t,
     vm_paddr_t *);
 static void		mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t,
     vm_object_t, vm_pindex_t, vm_size_t);
 static boolean_t	mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t);
 static void		mmu_booke_page_init(mmu_t, vm_page_t);
 static int		mmu_booke_page_wired_mappings(mmu_t, vm_page_t);
 static void		mmu_booke_pinit(mmu_t, pmap_t);
 static void		mmu_booke_pinit0(mmu_t, pmap_t);
 static void		mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
     vm_prot_t);
 static void		mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
 static void		mmu_booke_qremove(mmu_t, vm_offset_t, int);
 static void		mmu_booke_release(mmu_t, pmap_t);
 static void		mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 static void		mmu_booke_remove_all(mmu_t, vm_page_t);
 static void		mmu_booke_remove_write(mmu_t, vm_page_t);
 static void		mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
 static void		mmu_booke_zero_page(mmu_t, vm_page_t);
 static void		mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int);
 static void		mmu_booke_zero_page_idle(mmu_t, vm_page_t);
 static void		mmu_booke_activate(mmu_t, struct thread *);
 static void		mmu_booke_deactivate(mmu_t, struct thread *);
 static void		mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
 static void		*mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t);
 static void		*mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t);
 static void		mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t);
 static vm_paddr_t	mmu_booke_kextract(mmu_t, vm_offset_t);
 static void		mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t);
 static void		mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t);
 static void		mmu_booke_kremove(mmu_t, vm_offset_t);
 static boolean_t	mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
 static void		mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t,
     vm_size_t);
 static void		mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t,
     void **);
 static void		mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t,
     void *);
 static void		mmu_booke_scan_init(mmu_t);
 
 static mmu_method_t mmu_booke_methods[] = {
 	/* pmap dispatcher interface */
 	MMUMETHOD(mmu_clear_modify,	mmu_booke_clear_modify),
 	MMUMETHOD(mmu_copy,		mmu_booke_copy),
 	MMUMETHOD(mmu_copy_page,	mmu_booke_copy_page),
 	MMUMETHOD(mmu_copy_pages,	mmu_booke_copy_pages),
 	MMUMETHOD(mmu_enter,		mmu_booke_enter),
 	MMUMETHOD(mmu_enter_object,	mmu_booke_enter_object),
 	MMUMETHOD(mmu_enter_quick,	mmu_booke_enter_quick),
 	MMUMETHOD(mmu_extract,		mmu_booke_extract),
 	MMUMETHOD(mmu_extract_and_hold,	mmu_booke_extract_and_hold),
 	MMUMETHOD(mmu_init,		mmu_booke_init),
 	MMUMETHOD(mmu_is_modified,	mmu_booke_is_modified),
 	MMUMETHOD(mmu_is_prefaultable,	mmu_booke_is_prefaultable),
 	MMUMETHOD(mmu_is_referenced,	mmu_booke_is_referenced),
 	MMUMETHOD(mmu_ts_referenced,	mmu_booke_ts_referenced),
 	MMUMETHOD(mmu_map,		mmu_booke_map),
 	MMUMETHOD(mmu_mincore,		mmu_booke_mincore),
 	MMUMETHOD(mmu_object_init_pt,	mmu_booke_object_init_pt),
 	MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick),
 	MMUMETHOD(mmu_page_init,	mmu_booke_page_init),
 	MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings),
 	MMUMETHOD(mmu_pinit,		mmu_booke_pinit),
 	MMUMETHOD(mmu_pinit0,		mmu_booke_pinit0),
 	MMUMETHOD(mmu_protect,		mmu_booke_protect),
 	MMUMETHOD(mmu_qenter,		mmu_booke_qenter),
 	MMUMETHOD(mmu_qremove,		mmu_booke_qremove),
 	MMUMETHOD(mmu_release,		mmu_booke_release),
 	MMUMETHOD(mmu_remove,		mmu_booke_remove),
 	MMUMETHOD(mmu_remove_all,	mmu_booke_remove_all),
 	MMUMETHOD(mmu_remove_write,	mmu_booke_remove_write),
 	MMUMETHOD(mmu_sync_icache,	mmu_booke_sync_icache),
 	MMUMETHOD(mmu_unwire,		mmu_booke_unwire),
 	MMUMETHOD(mmu_zero_page,	mmu_booke_zero_page),
 	MMUMETHOD(mmu_zero_page_area,	mmu_booke_zero_page_area),
 	MMUMETHOD(mmu_zero_page_idle,	mmu_booke_zero_page_idle),
 	MMUMETHOD(mmu_activate,		mmu_booke_activate),
 	MMUMETHOD(mmu_deactivate,	mmu_booke_deactivate),
 
 	/* Internal interfaces */
 	MMUMETHOD(mmu_bootstrap,	mmu_booke_bootstrap),
 	MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped),
 	MMUMETHOD(mmu_mapdev,		mmu_booke_mapdev),
 	MMUMETHOD(mmu_mapdev_attr,	mmu_booke_mapdev_attr),
 	MMUMETHOD(mmu_kenter,		mmu_booke_kenter),
 	MMUMETHOD(mmu_kenter_attr,	mmu_booke_kenter_attr),
 	MMUMETHOD(mmu_kextract,		mmu_booke_kextract),
 /*	MMUMETHOD(mmu_kremove,		mmu_booke_kremove),	*/
 	MMUMETHOD(mmu_unmapdev,		mmu_booke_unmapdev),
 
 	/* dumpsys() support */
 	MMUMETHOD(mmu_dumpsys_map,	mmu_booke_dumpsys_map),
 	MMUMETHOD(mmu_dumpsys_unmap,	mmu_booke_dumpsys_unmap),
 	MMUMETHOD(mmu_scan_init,	mmu_booke_scan_init),
 
 	{ 0, 0 }
 };
 
 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0);
 
 static __inline uint32_t
 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
 {
 	uint32_t attrib;
 	int i;
 
 	if (ma != VM_MEMATTR_DEFAULT) {
 		switch (ma) {
 		case VM_MEMATTR_UNCACHEABLE:
 			return (PTE_I | PTE_G);
 		case VM_MEMATTR_WRITE_COMBINING:
 		case VM_MEMATTR_WRITE_BACK:
 		case VM_MEMATTR_PREFETCHABLE:
 			return (PTE_I);
 		case VM_MEMATTR_WRITE_THROUGH:
 			return (PTE_W | PTE_M);
 		}
 	}
 
 	/*
 	 * Assume the page is cache inhibited and access is guarded unless
 	 * it's in our available memory array.
 	 */
 	attrib = _TLB_ENTRY_IO;
 	for (i = 0; i < physmem_regions_sz; i++) {
 		if ((pa >= physmem_regions[i].mr_start) &&
 		    (pa < (physmem_regions[i].mr_start +
 		     physmem_regions[i].mr_size))) {
 			attrib = _TLB_ENTRY_MEM;
 			break;
 		}
 	}
 
 	return (attrib);
 }
 
 static inline void
 tlb_miss_lock(void)
 {
 #ifdef SMP
 	struct pcpu *pc;
 
 	if (!smp_started)
 		return;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (pc != pcpup) {
 
 			CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, "
 			    "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke_tlb_lock);
 
 			KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)),
 			    ("tlb_miss_lock: tried to lock self"));
 
 			tlb_lock(pc->pc_booke_tlb_lock);
 
 			CTR1(KTR_PMAP, "%s: locked", __func__);
 		}
 	}
 #endif
 }
 
 static inline void
 tlb_miss_unlock(void)
 {
 #ifdef SMP
 	struct pcpu *pc;
 
 	if (!smp_started)
 		return;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (pc != pcpup) {
 			CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d",
 			    __func__, pc->pc_cpuid);
 
 			tlb_unlock(pc->pc_booke_tlb_lock);
 
 			CTR1(KTR_PMAP, "%s: unlocked", __func__);
 		}
 	}
 #endif
 }
 
 /* Return number of entries in TLB0. */
 static __inline void
 tlb0_get_tlbconf(void)
 {
 	uint32_t tlb0_cfg;
 
 	tlb0_cfg = mfspr(SPR_TLB0CFG);
 	tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK;
 	tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT;
 	tlb0_entries_per_way = tlb0_entries / tlb0_ways;
 }
 
 /* Initialize pool of kva ptbl buffers. */
 static void
 ptbl_init(void)
 {
 	int i;
 
 	CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__,
 	    (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS);
 	CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)",
 	    __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE);
 
 	mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
 	TAILQ_INIT(&ptbl_buf_freelist);
 
 	for (i = 0; i < PTBL_BUFS; i++) {
 		ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
 		TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
 	}
 }
 
 /* Get a ptbl_buf from the freelist. */
 static struct ptbl_buf *
 ptbl_buf_alloc(void)
 {
 	struct ptbl_buf *buf;
 
 	mtx_lock(&ptbl_buf_freelist_lock);
 	buf = TAILQ_FIRST(&ptbl_buf_freelist);
 	if (buf != NULL)
 		TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
 	mtx_unlock(&ptbl_buf_freelist_lock);
 
 	CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
 
 	return (buf);
 }
 
 /* Return ptbl buff to free pool. */
 static void
 ptbl_buf_free(struct ptbl_buf *buf)
 {
 
 	CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
 
 	mtx_lock(&ptbl_buf_freelist_lock);
 	TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
 	mtx_unlock(&ptbl_buf_freelist_lock);
 }
 
 /*
  * Search the list of allocated ptbl bufs and find on list of allocated ptbls
  */
 static void
 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl)
 {
 	struct ptbl_buf *pbuf;
 
 	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link)
 		if (pbuf->kva == (vm_offset_t)ptbl) {
 			/* Remove from pmap ptbl buf list. */
 			TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
 
 			/* Free corresponding ptbl buf. */
 			ptbl_buf_free(pbuf);
 			break;
 		}
 }
 
 /* Allocate page table. */
 static pte_t *
 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
 {
 	vm_page_t mtbl[PTBL_PAGES];
 	vm_page_t m;
 	struct ptbl_buf *pbuf;
 	unsigned int pidx;
 	pte_t *ptbl;
 	int i, j;
 
 	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
 	    (pmap == kernel_pmap), pdir_idx);
 
 	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
 	    ("ptbl_alloc: invalid pdir_idx"));
 	KASSERT((pmap->pm_pdir[pdir_idx] == NULL),
 	    ("pte_alloc: valid ptbl entry exists!"));
 
 	pbuf = ptbl_buf_alloc();
 	if (pbuf == NULL)
 		panic("pte_alloc: couldn't alloc kernel virtual memory");
 		
 	ptbl = (pte_t *)pbuf->kva;
 
 	CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
 
 	/* Allocate ptbl pages, this will sleep! */
 	for (i = 0; i < PTBL_PAGES; i++) {
 		pidx = (PTBL_PAGES * pdir_idx) + i;
 		while ((m = vm_page_alloc(NULL, pidx,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 			PMAP_UNLOCK(pmap);
 			rw_wunlock(&pvh_global_lock);
 			if (nosleep) {
 				ptbl_free_pmap_ptbl(pmap, ptbl);
 				for (j = 0; j < i; j++)
 					vm_page_free(mtbl[j]);
 				atomic_subtract_int(&vm_cnt.v_wire_count, i);
 				return (NULL);
 			}
 			VM_WAIT;
 			rw_wlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 		mtbl[i] = m;
 	}
 
 	/* Map allocated pages into kernel_pmap. */
 	mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES);
 
 	/* Zero whole ptbl. */
 	bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE);
 
 	/* Add pbuf to the pmap ptbl bufs list. */
 	TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
 
 	return (ptbl);
 }
 
 /* Free ptbl pages and invalidate pdir entry. */
 static void
 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
 {
 	pte_t *ptbl;
 	vm_paddr_t pa;
 	vm_offset_t va;
 	vm_page_t m;
 	int i;
 
 	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
 	    (pmap == kernel_pmap), pdir_idx);
 
 	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
 	    ("ptbl_free: invalid pdir_idx"));
 
 	ptbl = pmap->pm_pdir[pdir_idx];
 
 	CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
 
 	KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
 
 	/*
 	 * Invalidate the pdir entry as soon as possible, so that other CPUs
 	 * don't attempt to look up the page tables we are releasing.
 	 */
 	mtx_lock_spin(&tlbivax_mutex);
 	tlb_miss_lock();
 	
 	pmap->pm_pdir[pdir_idx] = NULL;
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
 
 	for (i = 0; i < PTBL_PAGES; i++) {
 		va = ((vm_offset_t)ptbl + (i * PAGE_SIZE));
 		pa = pte_vatopa(mmu, kernel_pmap, va);
 		m = PHYS_TO_VM_PAGE(pa);
 		vm_page_free_zero(m);
 		atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 		mmu_booke_kremove(mmu, va);
 	}
 
 	ptbl_free_pmap_ptbl(pmap, ptbl);
 }
 
 /*
  * Decrement ptbl pages hold count and attempt to free ptbl pages.
  * Called when removing pte entry from ptbl.
  *
  * Return 1 if ptbl pages were freed.
  */
 static int
 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
 {
 	pte_t *ptbl;
 	vm_paddr_t pa;
 	vm_page_t m;
 	int i;
 
 	CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
 	    (pmap == kernel_pmap), pdir_idx);
 
 	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
 	    ("ptbl_unhold: invalid pdir_idx"));
 	KASSERT((pmap != kernel_pmap),
 	    ("ptbl_unhold: unholding kernel ptbl!"));
 
 	ptbl = pmap->pm_pdir[pdir_idx];
 
 	//debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl);
 	KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS),
 	    ("ptbl_unhold: non kva ptbl"));
 
 	/* decrement hold count */
 	for (i = 0; i < PTBL_PAGES; i++) {
 		pa = pte_vatopa(mmu, kernel_pmap,
 		    (vm_offset_t)ptbl + (i * PAGE_SIZE));
 		m = PHYS_TO_VM_PAGE(pa);
 		m->wire_count--;
 	}
 
 	/*
 	 * Free ptbl pages if there are no pte etries in this ptbl.
 	 * wire_count has the same value for all ptbl pages, so check the last
 	 * page.
 	 */
 	if (m->wire_count == 0) {
 		ptbl_free(mmu, pmap, pdir_idx);
 
 		//debugf("ptbl_unhold: e (freed ptbl)\n");
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Increment hold count for ptbl pages. This routine is used when a new pte
  * entry is being inserted into the ptbl.
  */
 static void
 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
 {
 	vm_paddr_t pa;
 	pte_t *ptbl;
 	vm_page_t m;
 	int i;
 
 	CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap,
 	    pdir_idx);
 
 	KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
 	    ("ptbl_hold: invalid pdir_idx"));
 	KASSERT((pmap != kernel_pmap),
 	    ("ptbl_hold: holding kernel ptbl!"));
 
 	ptbl = pmap->pm_pdir[pdir_idx];
 
 	KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
 
 	for (i = 0; i < PTBL_PAGES; i++) {
 		pa = pte_vatopa(mmu, kernel_pmap,
 		    (vm_offset_t)ptbl + (i * PAGE_SIZE));
 		m = PHYS_TO_VM_PAGE(pa);
 		m->wire_count++;
 	}
 }
 
 /* Allocate pv_entry structure. */
 pv_entry_t
 pv_alloc(void)
 {
 	pv_entry_t pv;
 
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		pagedaemon_wakeup();
 	pv = uma_zalloc(pvzone, M_NOWAIT);
 
 	return (pv);
 }
 
 /* Free pv_entry structure. */
 static __inline void
 pv_free(pv_entry_t pve)
 {
 
 	pv_entry_count--;
 	uma_zfree(pvzone, pve);
 }
 
 
 /* Allocate and initialize pv_entry structure. */
 static void
 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pve;
 
 	//int su = (pmap == kernel_pmap);
 	//debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su,
 	//	(u_int32_t)pmap, va, (u_int32_t)m);
 
 	pve = pv_alloc();
 	if (pve == NULL)
 		panic("pv_insert: no pv entries!");
 
 	pve->pv_pmap = pmap;
 	pve->pv_va = va;
 
 	/* add to pv_list */
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link);
 
 	//debugf("pv_insert: e\n");
 }
 
 /* Destroy pv entry. */
 static void
 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pve;
 
 	//int su = (pmap == kernel_pmap);
 	//debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 
 	/* find pv entry */
 	TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) {
 		if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
 			/* remove from pv_list */
 			TAILQ_REMOVE(&m->md.pv_list, pve, pv_link);
 			if (TAILQ_EMPTY(&m->md.pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 
 			/* free pv entry struct */
 			pv_free(pve);
 			break;
 		}
 	}
 
 	//debugf("pv_remove: e\n");
 }
 
 /*
  * Clean pte entry, try to free page table page if requested.
  *
  * Return 1 if ptbl pages were freed, otherwise return 0.
  */
 static int
 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags)
 {
 	unsigned int pdir_idx = PDIR_IDX(va);
 	unsigned int ptbl_idx = PTBL_IDX(va);
 	vm_page_t m;
 	pte_t *ptbl;
 	pte_t *pte;
 
 	//int su = (pmap == kernel_pmap);
 	//debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n",
 	//		su, (u_int32_t)pmap, va, flags);
 
 	ptbl = pmap->pm_pdir[pdir_idx];
 	KASSERT(ptbl, ("pte_remove: null ptbl"));
 
 	pte = &ptbl[ptbl_idx];
 
 	if (pte == NULL || !PTE_ISVALID(pte))
 		return (0);
 
 	if (PTE_ISWIRED(pte))
 		pmap->pm_stats.wired_count--;
 
 	/* Handle managed entry. */
 	if (PTE_ISMANAGED(pte)) {
 		/* Get vm_page_t for mapped pte. */
 		m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 
 		if (PTE_ISMODIFIED(pte))
 			vm_page_dirty(m);
 
 		if (PTE_ISREFERENCED(pte))
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		pv_remove(pmap, va, m);
 	}
 
 	mtx_lock_spin(&tlbivax_mutex);
 	tlb_miss_lock();
 
 	tlb0_flush_entry(va);
 	pte->flags = 0;
 	pte->rpn = 0;
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
 
 	pmap->pm_stats.resident_count--;
 
 	if (flags & PTBL_UNHOLD) {
 		//debugf("pte_remove: e (unhold)\n");
 		return (ptbl_unhold(mmu, pmap, pdir_idx));
 	}
 
 	//debugf("pte_remove: e\n");
 	return (0);
 }
 
 /*
  * Insert PTE for a given page and virtual address.
  */
 static int
 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags,
     boolean_t nosleep)
 {
 	unsigned int pdir_idx = PDIR_IDX(va);
 	unsigned int ptbl_idx = PTBL_IDX(va);
 	pte_t *ptbl, *pte;
 
 	CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__,
 	    pmap == kernel_pmap, pmap, va);
 
 	/* Get the page table pointer. */
 	ptbl = pmap->pm_pdir[pdir_idx];
 
 	if (ptbl == NULL) {
 		/* Allocate page table pages. */
 		ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep);
 		if (ptbl == NULL) {
 			KASSERT(nosleep, ("nosleep and NULL ptbl"));
 			return (ENOMEM);
 		}
 	} else {
 		/*
 		 * Check if there is valid mapping for requested
 		 * va, if there is, remove it.
 		 */
 		pte = &pmap->pm_pdir[pdir_idx][ptbl_idx];
 		if (PTE_ISVALID(pte)) {
 			pte_remove(mmu, pmap, va, PTBL_HOLD);
 		} else {
 			/*
 			 * pte is not used, increment hold count
 			 * for ptbl pages.
 			 */
 			if (pmap != kernel_pmap)
 				ptbl_hold(mmu, pmap, pdir_idx);
 		}
 	}
 
 	/*
 	 * Insert pv_entry into pv_list for mapped page if part of managed
 	 * memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		flags |= PTE_MANAGED;
 
 		/* Create and insert pv entry. */
 		pv_insert(pmap, va, m);
 	}
 
 	pmap->pm_stats.resident_count++;
 	
 	mtx_lock_spin(&tlbivax_mutex);
 	tlb_miss_lock();
 
 	tlb0_flush_entry(va);
 	if (pmap->pm_pdir[pdir_idx] == NULL) {
 		/*
 		 * If we just allocated a new page table, hook it in
 		 * the pdir.
 		 */
 		pmap->pm_pdir[pdir_idx] = ptbl;
 	}
 	pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]);
 	pte->rpn = VM_PAGE_TO_PHYS(m) & ~PTE_PA_MASK;
 	pte->flags |= (PTE_VALID | flags);
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
 	return (0);
 }
 
 /* Return the pa for the given pmap/va. */
 static vm_paddr_t
 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t pa = 0;
 	pte_t *pte;
 
 	pte = pte_find(mmu, pmap, va);
 	if ((pte != NULL) && PTE_ISVALID(pte))
 		pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
 	return (pa);
 }
 
 /* Get a pointer to a PTE in a page table. */
 static pte_t *
 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	unsigned int pdir_idx = PDIR_IDX(va);
 	unsigned int ptbl_idx = PTBL_IDX(va);
 
 	KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
 
 	if (pmap->pm_pdir[pdir_idx])
 		return (&(pmap->pm_pdir[pdir_idx][ptbl_idx]));
 
 	return (NULL);
 }
 
 /**************************************************************************/
 /* PMAP related */
 /**************************************************************************/
 
 /*
  * This is called during booke_init, before the system is really initialized.
  */
 static void
 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
 {
 	vm_offset_t phys_kernelend;
 	struct mem_region *mp, *mp1;
 	int cnt, i, j;
 	u_int s, e, sz;
 	u_int phys_avail_count;
 	vm_size_t physsz, hwphyssz, kstack0_sz;
 	vm_offset_t kernel_pdir, kstack0, va;
 	vm_paddr_t kstack0_phys;
 	void *dpcpu;
 	pte_t *pte;
 
 	debugf("mmu_booke_bootstrap: entered\n");
 
 	/* Set interesting system properties */
 	hw_direct_map = 0;
 	elf32_nxstack = 1;
 
 	/* Initialize invalidation mutex */
 	mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
 
 	/* Read TLB0 size and associativity. */
 	tlb0_get_tlbconf();
 
 	/*
 	 * Align kernel start and end address (kernel image).
 	 * Note that kernel end does not necessarily relate to kernsize.
 	 * kernsize is the size of the kernel that is actually mapped.
 	 */
 	kernstart = trunc_page(start);
 	data_start = round_page(kernelend);
 	data_end = data_start;
 
 	/*
 	 * Addresses of preloaded modules (like file systems) use
 	 * physical addresses. Make sure we relocate those into
 	 * virtual addresses.
 	 */
 	preload_addr_relocate = kernstart - kernload;
 
 	/* Allocate the dynamic per-cpu area. */
 	dpcpu = (void *)data_end;
 	data_end += DPCPU_SIZE;
 
 	/* Allocate space for the message buffer. */
 	msgbufp = (struct msgbuf *)data_end;
 	data_end += msgbufsize;
 	debugf(" msgbufp at 0x%08x end = 0x%08x\n", (uint32_t)msgbufp,
 	    data_end);
 
 	data_end = round_page(data_end);
 
 	/* Allocate space for ptbl_bufs. */
 	ptbl_bufs = (struct ptbl_buf *)data_end;
 	data_end += sizeof(struct ptbl_buf) * PTBL_BUFS;
 	debugf(" ptbl_bufs at 0x%08x end = 0x%08x\n", (uint32_t)ptbl_bufs,
 	    data_end);
 
 	data_end = round_page(data_end);
 
 	/* Allocate PTE tables for kernel KVA. */
 	kernel_pdir = data_end;
 	kernel_ptbls = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS +
 	    PDIR_SIZE - 1) / PDIR_SIZE;
 	data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE;
 	debugf(" kernel ptbls: %d\n", kernel_ptbls);
 	debugf(" kernel pdir at 0x%08x end = 0x%08x\n", kernel_pdir, data_end);
 
 	debugf(" data_end: 0x%08x\n", data_end);
 	if (data_end - kernstart > kernsize) {
 		kernsize += tlb1_mapin_region(kernstart + kernsize,
 		    kernload + kernsize, (data_end - kernstart) - kernsize);
 	}
 	data_end = kernstart + kernsize;
 	debugf(" updated data_end: 0x%08x\n", data_end);
 
 	/*
 	 * Clear the structures - note we can only do it safely after the
 	 * possible additional TLB1 translations are in place (above) so that
 	 * all range up to the currently calculated 'data_end' is covered.
 	 */
 	dpcpu_init(dpcpu, 0);
 	memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE);
 	memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE);
 
 	/*******************************************************/
 	/* Set the start and end of kva. */
 	/*******************************************************/
 	virtual_avail = round_page(data_end);
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/* Allocate KVA space for page zero/copy operations. */
 	zero_page_va = virtual_avail;
 	virtual_avail += PAGE_SIZE;
 	zero_page_idle_va = virtual_avail;
 	virtual_avail += PAGE_SIZE;
 	copy_page_src_va = virtual_avail;
 	virtual_avail += PAGE_SIZE;
 	copy_page_dst_va = virtual_avail;
 	virtual_avail += PAGE_SIZE;
 	debugf("zero_page_va = 0x%08x\n", zero_page_va);
 	debugf("zero_page_idle_va = 0x%08x\n", zero_page_idle_va);
 	debugf("copy_page_src_va = 0x%08x\n", copy_page_src_va);
 	debugf("copy_page_dst_va = 0x%08x\n", copy_page_dst_va);
 
 	/* Initialize page zero/copy mutexes. */
 	mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
 	mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
 
 	/* Allocate KVA space for ptbl bufs. */
 	ptbl_buf_pool_vabase = virtual_avail;
 	virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
 	debugf("ptbl_buf_pool_vabase = 0x%08x end = 0x%08x\n",
 	    ptbl_buf_pool_vabase, virtual_avail);
 
 	/* Calculate corresponding physical addresses for the kernel region. */
 	phys_kernelend = kernload + kernsize;
 	debugf("kernel image and allocated data:\n");
 	debugf(" kernload    = 0x%08x\n", kernload);
 	debugf(" kernstart   = 0x%08x\n", kernstart);
 	debugf(" kernsize    = 0x%08x\n", kernsize);
 
 	if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz)
 		panic("mmu_booke_bootstrap: phys_avail too small");
 
 	/*
 	 * Remove kernel physical address range from avail regions list. Page
 	 * align all regions.  Non-page aligned memory isn't very interesting
 	 * to us.  Also, sort the entries for ascending addresses.
 	 */
 
 	/* Retrieve phys/avail mem regions */
 	mem_regions(&physmem_regions, &physmem_regions_sz,
 	    &availmem_regions, &availmem_regions_sz);
 	sz = 0;
 	cnt = availmem_regions_sz;
 	debugf("processing avail regions:\n");
 	for (mp = availmem_regions; mp->mr_size; mp++) {
 		s = mp->mr_start;
 		e = mp->mr_start + mp->mr_size;
 		debugf(" %08x-%08x -> ", s, e);
 		/* Check whether this region holds all of the kernel. */
 		if (s < kernload && e > phys_kernelend) {
 			availmem_regions[cnt].mr_start = phys_kernelend;
 			availmem_regions[cnt++].mr_size = e - phys_kernelend;
 			e = kernload;
 		}
 		/* Look whether this regions starts within the kernel. */
 		if (s >= kernload && s < phys_kernelend) {
 			if (e <= phys_kernelend)
 				goto empty;
 			s = phys_kernelend;
 		}
 		/* Now look whether this region ends within the kernel. */
 		if (e > kernload && e <= phys_kernelend) {
 			if (s >= kernload)
 				goto empty;
 			e = kernload;
 		}
 		/* Now page align the start and size of the region. */
 		s = round_page(s);
 		e = trunc_page(e);
 		if (e < s)
 			e = s;
 		sz = e - s;
 		debugf("%08x-%08x = %x\n", s, e, sz);
 
 		/* Check whether some memory is left here. */
 		if (sz == 0) {
 		empty:
 			memmove(mp, mp + 1,
 			    (cnt - (mp - availmem_regions)) * sizeof(*mp));
 			cnt--;
 			mp--;
 			continue;
 		}
 
 		/* Do an insertion sort. */
 		for (mp1 = availmem_regions; mp1 < mp; mp1++)
 			if (s < mp1->mr_start)
 				break;
 		if (mp1 < mp) {
 			memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1);
 			mp1->mr_start = s;
 			mp1->mr_size = sz;
 		} else {
 			mp->mr_start = s;
 			mp->mr_size = sz;
 		}
 	}
 	availmem_regions_sz = cnt;
 
 	/*******************************************************/
 	/* Steal physical memory for kernel stack from the end */
 	/* of the first avail region                           */
 	/*******************************************************/
 	kstack0_sz = KSTACK_PAGES * PAGE_SIZE;
 	kstack0_phys = availmem_regions[0].mr_start +
 	    availmem_regions[0].mr_size;
 	kstack0_phys -= kstack0_sz;
 	availmem_regions[0].mr_size -= kstack0_sz;
 
 	/*******************************************************/
 	/* Fill in phys_avail table, based on availmem_regions */
 	/*******************************************************/
 	phys_avail_count = 0;
 	physsz = 0;
 	hwphyssz = 0;
 	TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 
 	debugf("fill in phys_avail:\n");
 	for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) {
 
 		debugf(" region: 0x%08x - 0x%08x (0x%08x)\n",
 		    availmem_regions[i].mr_start,
 		    availmem_regions[i].mr_start +
 		        availmem_regions[i].mr_size,
 		    availmem_regions[i].mr_size);
 
 		if (hwphyssz != 0 &&
 		    (physsz + availmem_regions[i].mr_size) >= hwphyssz) {
 			debugf(" hw.physmem adjust\n");
 			if (physsz < hwphyssz) {
 				phys_avail[j] = availmem_regions[i].mr_start;
 				phys_avail[j + 1] =
 				    availmem_regions[i].mr_start +
 				    hwphyssz - physsz;
 				physsz = hwphyssz;
 				phys_avail_count++;
 			}
 			break;
 		}
 
 		phys_avail[j] = availmem_regions[i].mr_start;
 		phys_avail[j + 1] = availmem_regions[i].mr_start +
 		    availmem_regions[i].mr_size;
 		phys_avail_count++;
 		physsz += availmem_regions[i].mr_size;
 	}
 	physmem = btoc(physsz);
 
 	/* Calculate the last available physical address. */
 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
 		;
 	Maxmem = powerpc_btop(phys_avail[i + 1]);
 
 	debugf("Maxmem = 0x%08lx\n", Maxmem);
 	debugf("phys_avail_count = %d\n", phys_avail_count);
 	debugf("physsz = 0x%08x physmem = %ld (0x%08lx)\n", physsz, physmem,
 	    physmem);
 
 	/*******************************************************/
 	/* Initialize (statically allocated) kernel pmap. */
 	/*******************************************************/
 	PMAP_LOCK_INIT(kernel_pmap);
 	kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE;
 
 	debugf("kernel_pmap = 0x%08x\n", (uint32_t)kernel_pmap);
 	debugf("kptbl_min = %d, kernel_ptbls = %d\n", kptbl_min, kernel_ptbls);
 	debugf("kernel pdir range: 0x%08x - 0x%08x\n",
 	    kptbl_min * PDIR_SIZE, (kptbl_min + kernel_ptbls) * PDIR_SIZE - 1);
 
 	/* Initialize kernel pdir */
 	for (i = 0; i < kernel_ptbls; i++)
 		kernel_pmap->pm_pdir[kptbl_min + i] =
 		    (pte_t *)(kernel_pdir + (i * PAGE_SIZE * PTBL_PAGES));
 
 	for (i = 0; i < MAXCPU; i++) {
 		kernel_pmap->pm_tid[i] = TID_KERNEL;
 		
 		/* Initialize each CPU's tidbusy entry 0 with kernel_pmap */
 		tidbusy[i][0] = kernel_pmap;
 	}
 
 	/*
 	 * Fill in PTEs covering kernel code and data. They are not required
 	 * for address translation, as this area is covered by static TLB1
 	 * entries, but for pte_vatopa() to work correctly with kernel area
 	 * addresses.
 	 */
 	for (va = kernstart; va < data_end; va += PAGE_SIZE) {
 		pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]);
 		pte->rpn = kernload + (va - kernstart);
 		pte->flags = PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
 		    PTE_VALID;
 	}
 	/* Mark kernel_pmap active on all CPUs */
 	CPU_FILL(&kernel_pmap->pm_active);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	/*******************************************************/
 	/* Final setup */
 	/*******************************************************/
 
 	/* Enter kstack0 into kernel map, provide guard page */
 	kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 	thread0.td_kstack = kstack0;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 
 	debugf("kstack_sz = 0x%08x\n", kstack0_sz);
 	debugf("kstack0_phys at 0x%08x - 0x%08x\n",
 	    kstack0_phys, kstack0_phys + kstack0_sz);
 	debugf("kstack0 at 0x%08x - 0x%08x\n", kstack0, kstack0 + kstack0_sz);
 	
 	virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz;
 	for (i = 0; i < KSTACK_PAGES; i++) {
 		mmu_booke_kenter(mmu, kstack0, kstack0_phys);
 		kstack0 += PAGE_SIZE;
 		kstack0_phys += PAGE_SIZE;
 	}
 
 	pmap_bootstrapped = 1;
 	
 	debugf("virtual_avail = %08x\n", virtual_avail);
 	debugf("virtual_end   = %08x\n", virtual_end);
 
 	debugf("mmu_booke_bootstrap: exit\n");
 }
 
 #ifdef SMP
 void
 pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
 {
 	int i;
 
 	/*
 	 * Finish TLB1 configuration: the BSP already set up its TLB1 and we
 	 * have the snapshot of its contents in the s/w tlb1[] table, so use
 	 * these values directly to (re)program AP's TLB1 hardware.
 	 */
 	for (i = bp_ntlb1s; i < tlb1_idx; i++) {
 		/* Skip invalid entries */
 		if (!(tlb1[i].mas1 & MAS1_VALID))
 			continue;
 
 		tlb1_write_entry(i);
 	}
 
 	set_mas4_defaults();
 }
 #endif
 
 /*
  * Get the physical page address for the given pmap/virtual address.
  */
 static vm_paddr_t
 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t pa;
 
 	PMAP_LOCK(pmap);
 	pa = pte_vatopa(mmu, pmap, va);
 	PMAP_UNLOCK(pmap);
 
 	return (pa);
 }
 
 /*
  * Extract the physical page address associated with the given
  * kernel virtual address.
  */
 static vm_paddr_t
 mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
 {
 	int i;
 
 	/* Check TLB1 mappings */
 	for (i = 0; i < tlb1_idx; i++) {
 		if (!(tlb1[i].mas1 & MAS1_VALID))
 			continue;
 		if (va >= tlb1[i].virt && va < tlb1[i].virt + tlb1[i].size)
 			return (tlb1[i].phys + (va - tlb1[i].virt));
 	}
 
 	return (pte_vatopa(mmu, kernel_pmap, va));
 }
 
 /*
  * Initialize the pmap module.
  * Called by vm_init, to initialize any structures that the pmap
  * system needs to map virtual memory.
  */
 static void
 mmu_booke_init(mmu_t mmu)
 {
 	int shpgperproc = PMAP_SHPGPERPROC;
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
 
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	uma_zone_reserve_kva(pvzone, pv_entry_max);
 
 	/* Pre-fill pvzone with initial number of pv entries. */
 	uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN);
 
 	/* Initialize ptbl allocation. */
 	ptbl_init();
 }
 
 /*
  * Map a list of wired pages into kernel virtual address space.  This is
  * intended for temporary mappings which do not need page modification or
  * references recorded.  Existing mappings in the region are overwritten.
  */
 static void
 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 }
 
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by mmu_booke_qenter.
  */
 static void
 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		mmu_booke_kremove(mmu, va);
 		va += PAGE_SIZE;
 	}
 }
 
 /*
  * Map a wired page into kernel virtual address space.
  */
 static void
 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
 {
 
 	mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
 }
 
 static void
 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
 {
 	unsigned int pdir_idx = PDIR_IDX(va);
 	unsigned int ptbl_idx = PTBL_IDX(va);
 	uint32_t flags;
 	pte_t *pte;
 
 	KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
 	    (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va"));
 
 	flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
 	flags |= tlb_calc_wimg(pa, ma);
 
 	pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]);
 
 	mtx_lock_spin(&tlbivax_mutex);
 	tlb_miss_lock();
 	
 	if (PTE_ISVALID(pte)) {
 	
 		CTR1(KTR_PMAP, "%s: replacing entry!", __func__);
 
 		/* Flush entry from TLB0 */
 		tlb0_flush_entry(va);
 	}
 
 	pte->rpn = pa & ~PTE_PA_MASK;
 	pte->flags = flags;
 
 	//debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x "
 	//		"pa=0x%08x rpn=0x%08x flags=0x%08x\n",
 	//		pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags);
 
 	/* Flush the real memory from the instruction cache. */
 	if ((flags & (PTE_I | PTE_G)) == 0) {
 		__syncicache((void *)va, PAGE_SIZE);
 	}
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
 }
 
 /*
  * Remove a page from kernel page table.
  */
 static void
 mmu_booke_kremove(mmu_t mmu, vm_offset_t va)
 {
 	unsigned int pdir_idx = PDIR_IDX(va);
 	unsigned int ptbl_idx = PTBL_IDX(va);
 	pte_t *pte;
 
 //	CTR2(KTR_PMAP,("%s: s (va = 0x%08x)\n", __func__, va));
 
 	KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
 	    (va <= VM_MAX_KERNEL_ADDRESS)),
 	    ("mmu_booke_kremove: invalid va"));
 
 	pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]);
 
 	if (!PTE_ISVALID(pte)) {
 	
 		CTR1(KTR_PMAP, "%s: invalid pte", __func__);
 
 		return;
 	}
 
 	mtx_lock_spin(&tlbivax_mutex);
 	tlb_miss_lock();
 
 	/* Invalidate entry in TLB0, update PTE. */
 	tlb0_flush_entry(va);
 	pte->flags = 0;
 	pte->rpn = 0;
 
 	tlb_miss_unlock();
 	mtx_unlock_spin(&tlbivax_mutex);
 }
 
 /*
  * Initialize pmap associated with process 0.
  */
 static void
 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	mmu_booke_pinit(mmu, pmap);
 	PCPU_SET(curpmap, pmap);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 static void
 mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
 {
 	int i;
 
 	CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
 	    curthread->td_proc->p_pid, curthread->td_proc->p_comm);
 
 	KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
 
 	for (i = 0; i < MAXCPU; i++)
 		pmap->pm_tid[i] = TID_NONE;
 	CPU_ZERO(&kernel_pmap->pm_active);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
 	TAILQ_INIT(&pmap->pm_ptbl_list);
 }
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by mmu_booke_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 static void
 mmu_booke_release(mmu_t mmu, pmap_t pmap)
 {
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 }
 
 /*
  * Insert the given physical page at the specified virtual address in the
  * target physical map with the protection requested. If specified the page
  * will be wired down.
  */
 static int
 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, u_int flags, int8_t psind)
 {
 	int error;
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (error);
 }
 
 static int
 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, u_int pmap_flags, int8_t psind __unused)
 {
 	pte_t *pte;
 	vm_paddr_t pa;
 	uint32_t flags;
 	int error, su, sync;
 
 	pa = VM_PAGE_TO_PHYS(m);
 	su = (pmap == kernel_pmap);
 	sync = 0;
 
 	//debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x "
 	//		"pa=0x%08x prot=0x%08x flags=%#x)\n",
 	//		(u_int32_t)pmap, su, pmap->pm_tid,
 	//		(u_int32_t)m, va, pa, prot, flags);
 
 	if (su) {
 		KASSERT(((va >= virtual_avail) &&
 		    (va <= VM_MAX_KERNEL_ADDRESS)),
 		    ("mmu_booke_enter_locked: kernel pmap, non kernel va"));
 	} else {
 		KASSERT((va <= VM_MAXUSER_ADDRESS),
 		    ("mmu_booke_enter_locked: user pmap, non user va"));
 	}
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * If there is an existing mapping, and the physical address has not
 	 * changed, must be protection or wiring change.
 	 */
 	if (((pte = pte_find(mmu, pmap, va)) != NULL) &&
 	    (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) {
 	    
 		/*
 		 * Before actually updating pte->flags we calculate and
 		 * prepare its new value in a helper var.
 		 */
 		flags = pte->flags;
 		flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED);
 
 		/* Wiring change, just update stats. */
 		if ((pmap_flags & PMAP_ENTER_WIRED) != 0) {
 			if (!PTE_ISWIRED(pte)) {
 				flags |= PTE_WIRED;
 				pmap->pm_stats.wired_count++;
 			}
 		} else {
 			if (PTE_ISWIRED(pte)) {
 				flags &= ~PTE_WIRED;
 				pmap->pm_stats.wired_count--;
 			}
 		}
 
 		if (prot & VM_PROT_WRITE) {
 			/* Add write permissions. */
 			flags |= PTE_SW;
 			if (!su)
 				flags |= PTE_UW;
 
 			if ((flags & PTE_MANAGED) != 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 		} else {
 			/* Handle modified pages, sense modify status. */
 
 			/*
 			 * The PTE_MODIFIED flag could be set by underlying
 			 * TLB misses since we last read it (above), possibly
 			 * other CPUs could update it so we check in the PTE
 			 * directly rather than rely on that saved local flags
 			 * copy.
 			 */
 			if (PTE_ISMODIFIED(pte))
 				vm_page_dirty(m);
 		}
 
 		if (prot & VM_PROT_EXECUTE) {
 			flags |= PTE_SX;
 			if (!su)
 				flags |= PTE_UX;
 
 			/*
 			 * Check existing flags for execute permissions: if we
 			 * are turning execute permissions on, icache should
 			 * be flushed.
 			 */
 			if ((pte->flags & (PTE_UX | PTE_SX)) == 0)
 				sync++;
 		}
 
 		flags &= ~PTE_REFERENCED;
 
 		/*
 		 * The new flags value is all calculated -- only now actually
 		 * update the PTE.
 		 */
 		mtx_lock_spin(&tlbivax_mutex);
 		tlb_miss_lock();
 
 		tlb0_flush_entry(va);
 		pte->flags = flags;
 
 		tlb_miss_unlock();
 		mtx_unlock_spin(&tlbivax_mutex);
 
 	} else {
 		/*
 		 * If there is an existing mapping, but it's for a different
 		 * physical address, pte_enter() will delete the old mapping.
 		 */
 		//if ((pte != NULL) && PTE_ISVALID(pte))
 		//	debugf("mmu_booke_enter_locked: replace\n");
 		//else
 		//	debugf("mmu_booke_enter_locked: new\n");
 
 		/* Now set up the flags and install the new mapping. */
 		flags = (PTE_SR | PTE_VALID);
 		flags |= PTE_M;
 
 		if (!su)
 			flags |= PTE_UR;
 
 		if (prot & VM_PROT_WRITE) {
 			flags |= PTE_SW;
 			if (!su)
 				flags |= PTE_UW;
 
 			if ((m->oflags & VPO_UNMANAGED) == 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 		}
 
 		if (prot & VM_PROT_EXECUTE) {
 			flags |= PTE_SX;
 			if (!su)
 				flags |= PTE_UX;
 		}
 
 		/* If its wired update stats. */
 		if ((pmap_flags & PMAP_ENTER_WIRED) != 0)
 			flags |= PTE_WIRED;
 
 		error = pte_enter(mmu, pmap, m, va, flags,
 		    (pmap_flags & PMAP_ENTER_NOSLEEP) != 0);
 		if (error != 0)
 			return (KERN_RESOURCE_SHORTAGE);
 
 		if ((flags & PMAP_ENTER_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 
 		/* Flush the real memory from the instruction cache. */
 		if (prot & VM_PROT_EXECUTE)
 			sync++;
 	}
 
 	if (sync && (su || pmap == PCPU_GET(curpmap))) {
 		__syncicache((void *)va, PAGE_SIZE);
 		sync = 0;
 	}
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 static void
 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start,
     vm_offset_t end, vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	m = m_start;
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m,
 		    prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 		    PMAP_ENTER_NOSLEEP, 0);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static void
 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	mmu_booke_enter_locked(mmu, pmap, va, m,
 	    prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP,
 	    0);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Remove the given range of addresses from the specified map.
  *
  * It is assumed that the start and end are properly rounded to the page size.
  */
 static void
 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva)
 {
 	pte_t *pte;
 	uint8_t hold_flag;
 
 	int su = (pmap == kernel_pmap);
 
 	//debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n",
 	//		su, (u_int32_t)pmap, pmap->pm_tid, va, endva);
 
 	if (su) {
 		KASSERT(((va >= virtual_avail) &&
 		    (va <= VM_MAX_KERNEL_ADDRESS)),
 		    ("mmu_booke_remove: kernel pmap, non kernel va"));
 	} else {
 		KASSERT((va <= VM_MAXUSER_ADDRESS),
 		    ("mmu_booke_remove: user pmap, non user va"));
 	}
 
 	if (PMAP_REMOVE_DONE(pmap)) {
 		//debugf("mmu_booke_remove: e (empty)\n");
 		return;
 	}
 
 	hold_flag = PTBL_HOLD_FLAG(pmap);
 	//debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag);
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	for (; va < endva; va += PAGE_SIZE) {
 		pte = pte_find(mmu, pmap, va);
 		if ((pte != NULL) && PTE_ISVALID(pte))
 			pte_remove(mmu, pmap, va, hold_flag);
 	}
 	PMAP_UNLOCK(pmap);
 	rw_wunlock(&pvh_global_lock);
 
 	//debugf("mmu_booke_remove: e\n");
 }
 
 /*
  * Remove physical page from all pmaps in which it resides.
  */
 static void
 mmu_booke_remove_all(mmu_t mmu, vm_page_t m)
 {
 	pv_entry_t pv, pvn;
 	uint8_t hold_flag;
 
 	rw_wlock(&pvh_global_lock);
 	for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) {
 		pvn = TAILQ_NEXT(pv, pv_link);
 
 		PMAP_LOCK(pv->pv_pmap);
 		hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap);
 		pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag);
 		PMAP_UNLOCK(pv->pv_pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Map a range of physical addresses into kernel virtual address space.
  */
 static vm_offset_t
 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
     vm_paddr_t pa_end, int prot)
 {
 	vm_offset_t sva = *virt;
 	vm_offset_t va = sva;
 
 	//debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n",
 	//		sva, pa_start, pa_end);
 
 	while (pa_start < pa_end) {
 		mmu_booke_kenter(mmu, va, pa_start);
 		va += PAGE_SIZE;
 		pa_start += PAGE_SIZE;
 	}
 	*virt = va;
 
 	//debugf("mmu_booke_map: e (va = 0x%08x)\n", va);
 	return (sva);
 }
 
 /*
  * The pmap must be activated before it's address space can be accessed in any
  * way.
  */
 static void
 mmu_booke_activate(mmu_t mmu, struct thread *td)
 {
 	pmap_t pmap;
 	u_int cpuid;
 
 	pmap = &td->td_proc->p_vmspace->vm_pmap;
 
 	CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%08x)",
 	    __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
 
 	KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!"));
 
 	sched_pin();
 
 	cpuid = PCPU_GET(cpuid);
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	
 	if (pmap->pm_tid[cpuid] == TID_NONE)
 		tid_alloc(pmap);
 
 	/* Load PID0 register with pmap tid value. */
 	mtspr(SPR_PID0, pmap->pm_tid[cpuid]);
 	__asm __volatile("isync");
 
 	mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0);
 
 	sched_unpin();
 
 	CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__,
 	    pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm);
 }
 
 /*
  * Deactivate the specified process's address space.
  */
 static void
 mmu_booke_deactivate(mmu_t mmu, struct thread *td)
 {
 	pmap_t pmap;
 
 	pmap = &td->td_proc->p_vmspace->vm_pmap;
 	
 	CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x",
 	    __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
 
 	td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0);
 
 	CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active);
 	PCPU_SET(curpmap, NULL);
 }
 
 /*
  * Copy the range specified by src_addr/len
  * from the source map to the range dst_addr/len
  * in the destination map.
  *
  * This routine is only advisory and need not do anything.
  */
 static void
 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap,
     vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr)
 {
 
 }
 
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
 static void
 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
     vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m;
 	pte_t *pte;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		mmu_booke_remove(mmu, pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (va = sva; va < eva; va += PAGE_SIZE) {
 		if ((pte = pte_find(mmu, pmap, va)) != NULL) {
 			if (PTE_ISVALID(pte)) {
 				m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 
 				mtx_lock_spin(&tlbivax_mutex);
 				tlb_miss_lock();
 
 				/* Handle modified pages. */
 				if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte))
 					vm_page_dirty(m);
 
 				tlb0_flush_entry(va);
 				pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
 
 				tlb_miss_unlock();
 				mtx_unlock_spin(&tlbivax_mutex);
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 static void
 mmu_booke_remove_write(mmu_t mmu, vm_page_t m)
 {
 	pv_entry_t pv;
 	pte_t *pte;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
 			if (PTE_ISVALID(pte)) {
 				m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 
 				mtx_lock_spin(&tlbivax_mutex);
 				tlb_miss_lock();
 
 				/* Handle modified pages. */
 				if (PTE_ISMODIFIED(pte))
 					vm_page_dirty(m);
 
 				/* Flush mapping from TLB0. */
 				pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
 
 				tlb_miss_unlock();
 				mtx_unlock_spin(&tlbivax_mutex);
 			}
 		}
 		PMAP_UNLOCK(pv->pv_pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 }
 
 static void
 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 	pte_t *pte;
 	pmap_t pmap;
 	vm_page_t m;
 	vm_offset_t addr;
 	vm_paddr_t pa = 0;
 	int active, valid;
  
 	va = trunc_page(va);
 	sz = round_page(sz);
 
 	rw_wlock(&pvh_global_lock);
 	pmap = PCPU_GET(curpmap);
 	active = (pm == kernel_pmap || pm == pmap) ? 1 : 0;
 	while (sz > 0) {
 		PMAP_LOCK(pm);
 		pte = pte_find(mmu, pm, va);
 		valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0;
 		if (valid)
 			pa = PTE_PA(pte);
 		PMAP_UNLOCK(pm);
 		if (valid) {
 			if (!active) {
 				/* Create a mapping in the active pmap. */
 				addr = 0;
 				m = PHYS_TO_VM_PAGE(pa);
 				PMAP_LOCK(pmap);
 				pte_enter(mmu, pmap, m, addr,
 				    PTE_SR | PTE_VALID | PTE_UR, FALSE);
 				__syncicache((void *)addr, PAGE_SIZE);
 				pte_remove(mmu, pmap, addr, PTBL_UNHOLD);
 				PMAP_UNLOCK(pmap);
 			} else
 				__syncicache((void *)va, PAGE_SIZE);
 		}
 		va += PAGE_SIZE;
 		sz -= PAGE_SIZE;
 	}
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Atomically extract and hold the physical page with the given
  * pmap and virtual address pair if that mapping permits the given
  * protection.
  */
 static vm_page_t
 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va,
     vm_prot_t prot)
 {
 	pte_t *pte;
 	vm_page_t m;
 	uint32_t pte_wbit;
 	vm_paddr_t pa;
 	
 	m = NULL;
 	pa = 0;	
 	PMAP_LOCK(pmap);
 retry:
 	pte = pte_find(mmu, pmap, va);
 	if ((pte != NULL) && PTE_ISVALID(pte)) {
 		if (pmap == kernel_pmap)
 			pte_wbit = PTE_SW;
 		else
 			pte_wbit = PTE_UW;
 
 		if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) {
 			if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 			vm_page_hold(m);
 		}
 	}
 
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /*
  * Initialize a vm_page's machine-dependent fields.
  */
 static void
 mmu_booke_page_init(mmu_t mmu, vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 }
 
 /*
  * mmu_booke_zero_page_area zeros the specified hardware page by
  * mapping it into virtual memory and using bzero to clear
  * its contents.
  *
  * off and size must reside within a single page.
  */
 static void
 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 {
 	vm_offset_t va;
 
 	/* XXX KASSERT off and size are within a single page? */
 
 	mtx_lock(&zero_page_mutex);
 	va = zero_page_va;
 
 	mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
 	bzero((caddr_t)va + off, size);
 	mmu_booke_kremove(mmu, va);
 
 	mtx_unlock(&zero_page_mutex);
 }
 
 /*
  * mmu_booke_zero_page zeros the specified hardware page.
  */
 static void
 mmu_booke_zero_page(mmu_t mmu, vm_page_t m)
 {
 
 	mmu_booke_zero_page_area(mmu, m, 0, PAGE_SIZE);
 }
 
 /*
  * mmu_booke_copy_page copies the specified (machine independent) page by
  * mapping the page into virtual memory and using memcopy to copy the page,
  * one machine dependent page at a time.
  */
 static void
 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm)
 {
 	vm_offset_t sva, dva;
 
 	sva = copy_page_src_va;
 	dva = copy_page_dst_va;
 
 	mtx_lock(&copy_page_mutex);
 	mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm));
 	mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm));
 	memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
 	mmu_booke_kremove(mmu, dva);
 	mmu_booke_kremove(mmu, sva);
 	mtx_unlock(&copy_page_mutex);
 }
 
 static inline void
 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	mtx_lock(&copy_page_mutex);
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		mmu_booke_kenter(mmu, copy_page_src_va,
 		    VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
 		a_cp = (char *)copy_page_src_va + a_pg_offset;
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		mmu_booke_kenter(mmu, copy_page_dst_va,
 		    VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
 		b_cp = (char *)copy_page_dst_va + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		mmu_booke_kremove(mmu, copy_page_dst_va);
 		mmu_booke_kremove(mmu, copy_page_src_va);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 	mtx_unlock(&copy_page_mutex);
 }
 
 /*
  * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it
  * into virtual memory and using bzero to clear its contents. This is intended
  * to be called from the vm_pagezero process only and outside of Giant. No
  * lock is required.
  */
 static void
 mmu_booke_zero_page_idle(mmu_t mmu, vm_page_t m)
 {
 	vm_offset_t va;
 
 	va = zero_page_idle_va;
 	mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
 	bzero((caddr_t)va, PAGE_SIZE);
 	mmu_booke_kremove(mmu, va);
 }
 
 /*
  * Return whether or not the specified physical page was modified
  * in any of physical maps.
  */
 static boolean_t
 mmu_booke_is_modified(mmu_t mmu, vm_page_t m)
 {
 	pte_t *pte;
 	pv_entry_t pv;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_is_modified: page %p is not managed", m));
 	rv = FALSE;
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can be modified.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 		    PTE_ISVALID(pte)) {
 			if (PTE_ISMODIFIED(pte))
 				rv = TRUE;
 		}
 		PMAP_UNLOCK(pv->pv_pmap);
 		if (rv)
 			break;
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Return whether or not the specified virtual address is eligible
  * for prefault.
  */
 static boolean_t
 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr)
 {
 
 	return (FALSE);
 }
 
 /*
  * Return whether or not the specified physical page was referenced
  * in any physical maps.
  */
 static boolean_t
 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m)
 {
 	pte_t *pte;
 	pv_entry_t pv;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_is_referenced: page %p is not managed", m));
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 		    PTE_ISVALID(pte)) {
 			if (PTE_ISREFERENCED(pte))
 				rv = TRUE;
 		}
 		PMAP_UNLOCK(pv->pv_pmap);
 		if (rv)
 			break;
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Clear the modify bits on the specified physical page.
  */
 static void
 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m)
 {
 	pte_t *pte;
 	pv_entry_t pv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("mmu_booke_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PG_AWRITEABLE, then no PTEs can be modified.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PG_AWRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 		    PTE_ISVALID(pte)) {
 			mtx_lock_spin(&tlbivax_mutex);
 			tlb_miss_lock();
 			
 			if (pte->flags & (PTE_SW | PTE_UW | PTE_MODIFIED)) {
 				tlb0_flush_entry(pv->pv_va);
 				pte->flags &= ~(PTE_SW | PTE_UW | PTE_MODIFIED |
 				    PTE_REFERENCED);
 			}
 
 			tlb_miss_unlock();
 			mtx_unlock_spin(&tlbivax_mutex);
 		}
 		PMAP_UNLOCK(pv->pv_pmap);
 	}
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Return a count of reference bits for a page, clearing those bits.
  * It is not necessary for every reference bit to be cleared, but it
  * is necessary that 0 only be returned when there are truly no
  * reference bits set.
  *
  * XXX: The exact number of bits to check and clear is a matter that
  * should be tested and standardized at some point in the future for
  * optimal aging of shared pages.
  */
 static int
 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m)
 {
 	pte_t *pte;
 	pv_entry_t pv;
 	int count;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_ts_referenced: page %p is not managed", m));
 	count = 0;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 		    PTE_ISVALID(pte)) {
 			if (PTE_ISREFERENCED(pte)) {
 				mtx_lock_spin(&tlbivax_mutex);
 				tlb_miss_lock();
 
 				tlb0_flush_entry(pv->pv_va);
 				pte->flags &= ~PTE_REFERENCED;
 
 				tlb_miss_unlock();
 				mtx_unlock_spin(&tlbivax_mutex);
 
 				if (++count > 4) {
 					PMAP_UNLOCK(pv->pv_pmap);
 					break;
 				}
 			}
 		}
 		PMAP_UNLOCK(pv->pv_pmap);
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  * Clear the wired attribute from the mappings for the specified range of
  * addresses in the given pmap.  Every valid mapping within that range must
  * have the wired attribute set.  In contrast, invalid mappings cannot have
  * the wired attribute set, so they are ignored.
  *
  * The wired attribute of the page table entry is not a hardware feature, so
  * there is no need to invalidate any TLB entries.
  */
 static void
 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va;
 	pte_t *pte;
 
 	PMAP_LOCK(pmap);
 	for (va = sva; va < eva; va += PAGE_SIZE) {
 		if ((pte = pte_find(mmu, pmap, va)) != NULL &&
 		    PTE_ISVALID(pte)) {
 			if (!PTE_ISWIRED(pte))
 				panic("mmu_booke_unwire: pte %p isn't wired",
 				    pte);
 			pte->flags &= ~PTE_WIRED;
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 
 }
 
 /*
  * Return true if the pmap's pv is one of the first 16 pvs linked to from this
  * page.  This count may be changed upwards or downwards in the future; it is
  * only necessary that true be returned for a small subset of pmaps for proper
  * page aging.
  */
 static boolean_t
 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
 {
 	pv_entry_t pv;
 	int loops;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("mmu_booke_page_exists_quick: page %p is not managed", m));
 	loops = 0;
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		if (pv->pv_pmap == pmap) {
 			rv = TRUE;
 			break;
 		}
 		if (++loops >= 16)
 			break;
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Return the number of managed mappings to the given physical page that are
  * wired.
  */
 static int
 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m)
 {
 	pv_entry_t pv;
 	pte_t *pte;
 	int count = 0;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 		PMAP_LOCK(pv->pv_pmap);
 		if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL)
 			if (PTE_ISVALID(pte) && PTE_ISWIRED(pte))
 				count++;
 		PMAP_UNLOCK(pv->pv_pmap);
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 static int
 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 	int i;
 	vm_offset_t va;
 
 	/*
 	 * This currently does not work for entries that
 	 * overlap TLB1 entries.
 	 */
 	for (i = 0; i < tlb1_idx; i ++) {
 		if (tlb1_iomapped(i, pa, size, &va) == 0)
 			return (0);
 	}
 
 	return (EFAULT);
 }
 
 void
 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va)
 {
 	vm_paddr_t ppa;
 	vm_offset_t ofs;
 	vm_size_t gran;
 
 	/* Minidumps are based on virtual memory addresses. */
 	if (do_minidump) {
 		*va = (void *)pa;
 		return;
 	}
 
 	/* Raw physical memory dumps don't have a virtual address. */
 	/* We always map a 256MB page at 256M. */
 	gran = 256 * 1024 * 1024;
 	ppa = pa & ~(gran - 1);
 	ofs = pa - ppa;
 	*va = (void *)gran;
 	tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO);
 
 	if (sz > (gran - ofs))
 		tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran,
 		    _TLB_ENTRY_IO);
 }
 
 void
 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va)
 {
 	vm_paddr_t ppa;
 	vm_offset_t ofs;
 	vm_size_t gran;
 
 	/* Minidumps are based on virtual memory addresses. */
 	/* Nothing to do... */
 	if (do_minidump)
 		return;
 
 	/* Raw physical memory dumps don't have a virtual address. */
 	tlb1_idx--;
 	tlb1[tlb1_idx].mas1 = 0;
 	tlb1[tlb1_idx].mas2 = 0;
 	tlb1[tlb1_idx].mas3 = 0;
 	tlb1_write_entry(tlb1_idx);
 
 	gran = 256 * 1024 * 1024;
 	ppa = pa & ~(gran - 1);
 	ofs = pa - ppa;
 	if (sz > (gran - ofs)) {
 		tlb1_idx--;
 		tlb1[tlb1_idx].mas1 = 0;
 		tlb1[tlb1_idx].mas2 = 0;
 		tlb1[tlb1_idx].mas3 = 0;
 		tlb1_write_entry(tlb1_idx);
 	}
 }
 
 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
 
 void
 mmu_booke_scan_init(mmu_t mmu)
 {
 	vm_offset_t va;
 	pte_t *pte;
 	int i;
 
 	if (!do_minidump) {
 		/* Initialize phys. segments for dumpsys(). */
 		memset(&dump_map, 0, sizeof(dump_map));
 		mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions,
 		    &availmem_regions_sz);
 		for (i = 0; i < physmem_regions_sz; i++) {
 			dump_map[i].pa_start = physmem_regions[i].mr_start;
 			dump_map[i].pa_size = physmem_regions[i].mr_size;
 		}
 		return;
 	}
 
 	/* Virtual segments for minidumps: */
 	memset(&dump_map, 0, sizeof(dump_map));
 
 	/* 1st: kernel .data and .bss. */
 	dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
 	dump_map[0].pa_size =
 	    round_page((uintptr_t)_end) - dump_map[0].pa_start;
 
 	/* 2nd: msgbuf and tables (see pmap_bootstrap()). */
 	dump_map[1].pa_start = data_start;
 	dump_map[1].pa_size = data_end - data_start;
 
 	/* 3rd: kernel VM. */
 	va = dump_map[1].pa_start + dump_map[1].pa_size;
 	/* Find start of next chunk (from va). */
 	while (va < virtual_end) {
 		/* Don't dump the buffer cache. */
 		if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
 			va = kmi.buffer_eva;
 			continue;
 		}
 		pte = pte_find(mmu, kernel_pmap, va);
 		if (pte != NULL && PTE_ISVALID(pte))
 			break;
 		va += PAGE_SIZE;
 	}
 	if (va < virtual_end) {
 		dump_map[2].pa_start = va;
 		va += PAGE_SIZE;
 		/* Find last page in chunk. */
 		while (va < virtual_end) {
 			/* Don't run into the buffer cache. */
 			if (va == kmi.buffer_sva)
 				break;
 			pte = pte_find(mmu, kernel_pmap, va);
 			if (pte == NULL || !PTE_ISVALID(pte))
 				break;
 			va += PAGE_SIZE;
 		}
 		dump_map[2].pa_size = va - dump_map[2].pa_start;
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual address space.
  * Return a pointer to where it is mapped. This routine is intended to be used
  * for mapping device memory, NOT real memory.
  */
 static void *
 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 {
 
 	return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT));
 }
 
 static void *
 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
 {
 	void *res;
 	uintptr_t va;
 	vm_size_t sz;
 	int i;
 
 	/*
 	 * Check if this is premapped in TLB1. Note: this should probably also
 	 * check whether a sequence of TLB1 entries exist that match the
 	 * requirement, but now only checks the easy case.
 	 */
 	if (ma == VM_MEMATTR_DEFAULT) {
 		for (i = 0; i < tlb1_idx; i++) {
 			if (!(tlb1[i].mas1 & MAS1_VALID))
 				continue;
 			if (pa >= tlb1[i].phys &&
 			    (pa + size) <= (tlb1[i].phys + tlb1[i].size))
 				return (void *)(tlb1[i].virt +
 				    (pa - tlb1[i].phys));
 		}
 	}
 
 	size = roundup(size, PAGE_SIZE);
 
 	/*
 	 * We leave a hole for device direct mapping between the maximum user
 	 * address (0x8000000) and the minimum KVA address (0xc0000000). If
 	 * devices are in there, just map them 1:1. If not, map them to the
 	 * device mapping area about VM_MAX_KERNEL_ADDRESS. These mapped
 	 * addresses should be pulled from an allocator, but since we do not
 	 * ever free TLB1 entries, it is safe just to increment a counter.
 	 * Note that there isn't a lot of address space here (128 MB) and it
 	 * is not at all difficult to imagine running out, since that is a 4:1
 	 * compression from the 0xc0000000 - 0xf0000000 address space that gets
 	 * mapped there.
 	 */
 	if (pa >= (VM_MAXUSER_ADDRESS + PAGE_SIZE) &&
 	    (pa + size - 1) < VM_MIN_KERNEL_ADDRESS) 
 		va = pa;
 	else
 		va = atomic_fetchadd_int(&tlb1_map_base, size);
 	res = (void *)va;
 
 	do {
 		sz = 1 << (ilog2(size) & ~1);
 		if (bootverbose)
 			printf("Wiring VA=%x to PA=%x (size=%x), "
 			    "using TLB1[%d]\n", va, pa, sz, tlb1_idx);
 		tlb1_set_entry(va, pa, sz, tlb_calc_wimg(pa, ma));
 		size -= sz;
 		pa += sz;
 		va += sz;
 	} while (size > 0);
 
 	return (res);
 }
 
 /*
  * 'Unmap' a range mapped by mmu_booke_mapdev().
  */
 static void
 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
 {
 #ifdef SUPPORTS_SHRINKING_TLB1
 	vm_offset_t base, offset;
 
 	/*
 	 * Unmap only if this is inside kernel virtual space.
 	 */
 	if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) {
 		base = trunc_page(va);
 		offset = va & PAGE_MASK;
 		size = roundup(offset + size, PAGE_SIZE);
 		kva_free(base, size);
 	}
 #endif
 }
 
 /*
  * mmu_booke_object_init_pt preloads the ptes for a given object into the
  * specified pmap. This eliminates the blast of soft faults on process startup
  * and immediately after an mmap.
  */
 static void
 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr,
     vm_object_t object, vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("mmu_booke_object_init_pt: non-device object"));
 }
 
 /*
  * Perform the pmap work for mincore.
  */
 static int
 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr,
     vm_paddr_t *locked_pa)
 {
 
 	/* XXX: this should be implemented at some point */
 	return (0);
 }
 
 /**************************************************************************/
 /* TID handling */
 /**************************************************************************/
 
 /*
  * Allocate a TID. If necessary, steal one from someone else.
  * The new TID is flushed from the TLB before returning.
  */
 static tlbtid_t
 tid_alloc(pmap_t pmap)
 {
 	tlbtid_t tid;
 	int thiscpu;
 
 	KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap"));
 
 	CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap);
 
 	thiscpu = PCPU_GET(cpuid);
 
 	tid = PCPU_GET(tid_next);
 	if (tid > TID_MAX)
 		tid = TID_MIN;
 	PCPU_SET(tid_next, tid + 1);
 
 	/* If we are stealing TID then clear the relevant pmap's field */
 	if (tidbusy[thiscpu][tid] != NULL) {
 
 		CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid);
 		
 		tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE;
 
 		/* Flush all entries from TLB0 matching this TID. */
 		tid_flush(tid, tlb0_ways, tlb0_entries_per_way);
 	}
 
 	tidbusy[thiscpu][tid] = pmap;
 	pmap->pm_tid[thiscpu] = tid;
 	__asm __volatile("msync; isync");
 
 	CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid,
 	    PCPU_GET(tid_next));
 
 	return (tid);
 }
 
 /**************************************************************************/
 /* TLB0 handling */
 /**************************************************************************/
 
 static void
 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3,
     uint32_t mas7)
 {
 	int as;
 	char desc[3];
 	tlbtid_t tid;
 	vm_size_t size;
 	unsigned int tsize;
 
 	desc[2] = '\0';
 	if (mas1 & MAS1_VALID)
 		desc[0] = 'V';
 	else
 		desc[0] = ' ';
 
 	if (mas1 & MAS1_IPROT)
 		desc[1] = 'P';
 	else
 		desc[1] = ' ';
 
 	as = (mas1 & MAS1_TS_MASK) ? 1 : 0;
 	tid = MAS1_GETTID(mas1);
 
 	tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 	size = 0;
 	if (tsize)
 		size = tsize2size(tsize);
 
 	debugf("%3d: (%s) [AS=%d] "
 	    "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x "
 	    "mas2(va) = 0x%08x mas3(pa) = 0x%08x mas7 = 0x%08x\n",
 	    i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7);
 }
 
 /* Convert TLB0 va and way number to tlb0[] table index. */
 static inline unsigned int
 tlb0_tableidx(vm_offset_t va, unsigned int way)
 {
 	unsigned int idx;
 
 	idx = (way * TLB0_ENTRIES_PER_WAY);
 	idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT;
 	return (idx);
 }
 
 /*
  * Invalidate TLB0 entry.
  */
 static inline void
 tlb0_flush_entry(vm_offset_t va)
 {
 
 	CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va);
 
 	mtx_assert(&tlbivax_mutex, MA_OWNED);
 
 	__asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK));
 	__asm __volatile("isync; msync");
 	__asm __volatile("tlbsync; msync");
 
 	CTR1(KTR_PMAP, "%s: e", __func__);
 }
 
 /* Print out contents of the MAS registers for each TLB0 entry */
 void
 tlb0_print_tlbentries(void)
 {
 	uint32_t mas0, mas1, mas2, mas3, mas7;
 	int entryidx, way, idx;
 
 	debugf("TLB0 entries:\n");
 	for (way = 0; way < TLB0_WAYS; way ++)
 		for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) {
 
 			mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
 			mtspr(SPR_MAS0, mas0);
 			__asm __volatile("isync");
 
 			mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT;
 			mtspr(SPR_MAS2, mas2);
 
 			__asm __volatile("isync; tlbre");
 
 			mas1 = mfspr(SPR_MAS1);
 			mas2 = mfspr(SPR_MAS2);
 			mas3 = mfspr(SPR_MAS3);
 			mas7 = mfspr(SPR_MAS7);
 
 			idx = tlb0_tableidx(mas2, way);
 			tlb_print_entry(idx, mas1, mas2, mas3, mas7);
 		}
 }
 
 /**************************************************************************/
 /* TLB1 handling */
 /**************************************************************************/
 
 /*
  * TLB1 mapping notes:
  *
  * TLB1[0]	Kernel text and data.
  * TLB1[1-15]	Additional kernel text and data mappings (if required), PCI
  *		windows, other devices mappings.
  */
 
 /*
  * Write given entry to TLB1 hardware.
  * Use 32 bit pa, clear 4 high-order bits of RPN (mas7).
  */
 static void
 tlb1_write_entry(unsigned int idx)
 {
 	uint32_t mas0, mas7;
 
 	//debugf("tlb1_write_entry: s\n");
 
 	/* Clear high order RPN bits */
 	mas7 = 0;
 
 	/* Select entry */
 	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx);
 	//debugf("tlb1_write_entry: mas0 = 0x%08x\n", mas0);
 
 	mtspr(SPR_MAS0, mas0);
 	__asm __volatile("isync");
 	mtspr(SPR_MAS1, tlb1[idx].mas1);
 	__asm __volatile("isync");
 	mtspr(SPR_MAS2, tlb1[idx].mas2);
 	__asm __volatile("isync");
 	mtspr(SPR_MAS3, tlb1[idx].mas3);
 	__asm __volatile("isync");
 	mtspr(SPR_MAS7, mas7);
 	__asm __volatile("isync; tlbwe; isync; msync");
 
 	//debugf("tlb1_write_entry: e\n");
 }
 
 /*
  * Return the largest uint value log such that 2^log <= num.
  */
 static unsigned int
 ilog2(unsigned int num)
 {
 	int lz;
 
 	__asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num));
 	return (31 - lz);
 }
 
 /*
  * Convert TLB TSIZE value to mapped region size.
  */
 static vm_size_t
 tsize2size(unsigned int tsize)
 {
 
 	/*
 	 * size = 4^tsize KB
 	 * size = 4^tsize * 2^10 = 2^(2 * tsize - 10)
 	 */
 
 	return ((1 << (2 * tsize)) * 1024);
 }
 
 /*
  * Convert region size (must be power of 4) to TLB TSIZE value.
  */
 static unsigned int
 size2tsize(vm_size_t size)
 {
 
 	return (ilog2(size) / 2 - 5);
 }
 
 /*
  * Register permanent kernel mapping in TLB1.
  *
  * Entries are created starting from index 0 (current free entry is
  * kept in tlb1_idx) and are not supposed to be invalidated.
  */
 static int
 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
     uint32_t flags)
 {
 	uint32_t ts, tid;
 	int tsize, index;
 
 	index = atomic_fetchadd_int(&tlb1_idx, 1);
 	if (index >= TLB1_ENTRIES) {
 		printf("tlb1_set_entry: TLB1 full!\n");
 		return (-1);
 	}
 
 	/* Convert size to TSIZE */
 	tsize = size2tsize(size);
 
 	tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK;
 	/* XXX TS is hard coded to 0 for now as we only use single address space */
 	ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
 
 	/*
 	 * Atomicity is preserved by the atomic increment above since nothing
 	 * is ever removed from tlb1.
 	 */
 
 	tlb1[index].phys = pa;
 	tlb1[index].virt = va;
 	tlb1[index].size = size;
 	tlb1[index].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
 	tlb1[index].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
 	tlb1[index].mas2 = (va & MAS2_EPN_MASK) | flags;
 
 	/* Set supervisor RWX permission bits */
 	tlb1[index].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
 
 	tlb1_write_entry(index);
 
 	/*
 	 * XXX in general TLB1 updates should be propagated between CPUs,
 	 * since current design assumes to have the same TLB1 set-up on all
 	 * cores.
 	 */
 	return (0);
 }
 
 /*
  * Map in contiguous RAM region into the TLB1 using maximum of
  * KERNEL_REGION_MAX_TLB_ENTRIES entries.
  *
  * If necessary round up last entry size and return total size
  * used by all allocated entries.
  */
 vm_size_t
 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 {
 	vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES];
 	vm_size_t mapped, pgsz, base, mask;
 	int idx, nents;
 
 	/* Round up to the next 1M */
 	size = (size + (1 << 20) - 1) & ~((1 << 20) - 1);
 
 	mapped = 0;
 	idx = 0;
 	base = va;
 	pgsz = 64*1024*1024;
 	while (mapped < size) {
 		while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) {
 			while (pgsz > (size - mapped))
 				pgsz >>= 2;
 			pgs[idx++] = pgsz;
 			mapped += pgsz;
 		}
 
 		/* We under-map. Correct for this. */
 		if (mapped < size) {
 			while (pgs[idx - 1] == pgsz) {
 				idx--;
 				mapped -= pgsz;
 			}
 			/* XXX We may increase beyond out starting point. */
 			pgsz <<= 2;
 			pgs[idx++] = pgsz;
 			mapped += pgsz;
 		}
 	}
 
 	nents = idx;
 	mask = pgs[0] - 1;
 	/* Align address to the boundary */
 	if (va & mask) {
 		va = (va + mask) & ~mask;
 		pa = (pa + mask) & ~mask;
 	}
 
 	for (idx = 0; idx < nents; idx++) {
 		pgsz = pgs[idx];
 		debugf("%u: %x -> %x, size=%x\n", idx, pa, va, pgsz);
 		tlb1_set_entry(va, pa, pgsz, _TLB_ENTRY_MEM);
 		pa += pgsz;
 		va += pgsz;
 	}
 
 	mapped = (va - base);
 	printf("mapped size 0x%08x (wasted space 0x%08x)\n",
 	    mapped, mapped - size);
 	return (mapped);
 }
 
 /*
  * TLB1 initialization routine, to be called after the very first
  * assembler level setup done in locore.S.
  */
 void
 tlb1_init()
 {
 	uint32_t mas0, mas1, mas2, mas3;
 	uint32_t tsz;
 	u_int i;
 
 	if (bootinfo != NULL && bootinfo[0] != 1) {
 		tlb1_idx = *((uint16_t *)(bootinfo + 8));
 	} else
 		tlb1_idx = 1;
 
 	/* The first entry/entries are used to map the kernel. */
 	for (i = 0; i < tlb1_idx; i++) {
 		mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
 		mtspr(SPR_MAS0, mas0);
 		__asm __volatile("isync; tlbre");
 
 		mas1 = mfspr(SPR_MAS1);
 		if ((mas1 & MAS1_VALID) == 0)
 			continue;
 
 		mas2 = mfspr(SPR_MAS2);
 		mas3 = mfspr(SPR_MAS3);
 
 		tlb1[i].mas1 = mas1;
 		tlb1[i].mas2 = mfspr(SPR_MAS2);
 		tlb1[i].mas3 = mas3;
 		tlb1[i].virt = mas2 & MAS2_EPN_MASK;
 		tlb1[i].phys = mas3 & MAS3_RPN;
 
 		if (i == 0)
 			kernload = tlb1[i].phys;
 
 		tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 		tlb1[i].size = (tsz > 0) ? tsize2size(tsz) : 0;
 		kernsize += tlb1[i].size;
 	}
 
 #ifdef SMP
 	bp_ntlb1s = tlb1_idx;
 #endif
 
 	/* Purge the remaining entries */
 	for (i = tlb1_idx; i < TLB1_ENTRIES; i++)
 		tlb1_write_entry(i);
 
 	/* Setup TLB miss defaults */
 	set_mas4_defaults();
 }
 
 vm_offset_t 
 pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
 {
 	vm_paddr_t pa_base;
 	vm_offset_t va, sz;
 	int i;
 
 	KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
 	
 	for (i = 0; i < tlb1_idx; i++) {
 		if (!(tlb1[i].mas1 & MAS1_VALID))
 			continue;
 		if (pa >= tlb1[i].phys && (pa + size) <=
 		    (tlb1[i].phys + tlb1[i].size))
 			return (tlb1[i].virt + (pa - tlb1[i].phys));
 	}
 
 	pa_base = trunc_page(pa);
 	size = roundup(size + (pa - pa_base), PAGE_SIZE);
 	tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1));
 	va = tlb1_map_base + (pa - pa_base);
 
 	do {
 		sz = 1 << (ilog2(size) & ~1);
 		tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_IO);
 		size -= sz;
 		pa_base += sz;
 		tlb1_map_base += sz;
 	} while (size > 0);
 
 #ifdef SMP
 	bp_ntlb1s = tlb1_idx;
 #endif
 
 	return (va);
 }
 
 /*
  * Setup MAS4 defaults.
  * These values are loaded to MAS0-2 on a TLB miss.
  */
 static void
 set_mas4_defaults(void)
 {
 	uint32_t mas4;
 
 	/* Defaults: TLB0, PID0, TSIZED=4K */
 	mas4 = MAS4_TLBSELD0;
 	mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK;
 #ifdef SMP
 	mas4 |= MAS4_MD;
 #endif
 	mtspr(SPR_MAS4, mas4);
 	__asm __volatile("isync");
 }
 
 /*
  * Print out contents of the MAS registers for each TLB1 entry
  */
 void
 tlb1_print_tlbentries(void)
 {
 	uint32_t mas0, mas1, mas2, mas3, mas7;
 	int i;
 
 	debugf("TLB1 entries:\n");
 	for (i = 0; i < TLB1_ENTRIES; i++) {
 
 		mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
 		mtspr(SPR_MAS0, mas0);
 
 		__asm __volatile("isync; tlbre");
 
 		mas1 = mfspr(SPR_MAS1);
 		mas2 = mfspr(SPR_MAS2);
 		mas3 = mfspr(SPR_MAS3);
 		mas7 = mfspr(SPR_MAS7);
 
 		tlb_print_entry(i, mas1, mas2, mas3, mas7);
 	}
 }
 
 /*
  * Print out contents of the in-ram tlb1 table.
  */
 void
 tlb1_print_entries(void)
 {
 	int i;
 
 	debugf("tlb1[] table entries:\n");
 	for (i = 0; i < TLB1_ENTRIES; i++)
 		tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3, 0);
 }
 
 /*
  * Return 0 if the physical IO range is encompassed by one of the
  * the TLB1 entries, otherwise return related error code.
  */
 static int
 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va)
 {
 	uint32_t prot;
 	vm_paddr_t pa_start;
 	vm_paddr_t pa_end;
 	unsigned int entry_tsize;
 	vm_size_t entry_size;
 
 	*va = (vm_offset_t)NULL;
 
 	/* Skip invalid entries */
 	if (!(tlb1[i].mas1 & MAS1_VALID))
 		return (EINVAL);
 
 	/*
 	 * The entry must be cache-inhibited, guarded, and r/w
 	 * so it can function as an i/o page
 	 */
 	prot = tlb1[i].mas2 & (MAS2_I | MAS2_G);
 	if (prot != (MAS2_I | MAS2_G))
 		return (EPERM);
 
 	prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW);
 	if (prot != (MAS3_SR | MAS3_SW))
 		return (EPERM);
 
 	/* The address should be within the entry range. */
 	entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 	KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
 
 	entry_size = tsize2size(entry_tsize);
 	pa_start = tlb1[i].mas3 & MAS3_RPN;
 	pa_end = pa_start + entry_size - 1;
 
 	if ((pa < pa_start) || ((pa + size) > pa_end))
 		return (ERANGE);
 
 	/* Return virtual address of this mapping. */
 	*va = (tlb1[i].mas2 & MAS2_EPN_MASK) + (pa - pa_start);
 	return (0);
 }
Index: head/sys/sparc64/sparc64/exception.S
===================================================================
--- head/sys/sparc64/sparc64/exception.S	(revision 285626)
+++ head/sys/sparc64/sparc64/exception.S	(revision 285627)
@@ -1,3082 +1,3083 @@
 /*-
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	BSDI $Id: locore.s,v 1.36.2.15 1999/08/23 22:34:41 cp Exp $
  */
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
+#include "opt_kstack_pages.h"
 
 #include <machine/asi.h>
 #include <machine/asmacros.h>
 #include <machine/frame.h>
 #include <machine/fsr.h>
 #include <machine/intr_machdep.h>
 #include <machine/ktr.h>
 #include <machine/pcb.h>
 #include <machine/pstate.h>
 #include <machine/trap.h>
 #include <machine/tsb.h>
 #include <machine/tstate.h>
 #include <machine/utrap.h>
 #include <machine/wstate.h>
 
 #include "assym.s"
 
 #define	TSB_ASI			0x0
 #define	TSB_KERNEL		0x0
 #define	TSB_KERNEL_MASK		0x0
 #define	TSB_KERNEL_PHYS		0x0
 #define	TSB_KERNEL_PHYS_END	0x0
 #define	TSB_QUAD_LDD		0x0
 
 	.register %g2,#ignore
 	.register %g3,#ignore
 	.register %g6,#ignore
 	.register %g7,#ignore
 
 /*
  * Atomically set a bit in a TTE.
  */
 #define	TTE_SET_BIT(r1, r2, r3, bit, a, asi) \
 	add	r1, TTE_DATA, r1 ; \
 	LD(x, a) [r1] asi, r2 ; \
 9:	or	r2, bit, r3 ; \
 	CAS(x, a) [r1] asi, r2, r3 ; \
 	cmp	r2, r3 ; \
 	bne,pn	%xcc, 9b ; \
 	 mov	r3, r2
 
 #define	TTE_SET_REF(r1, r2, r3, a, asi)	TTE_SET_BIT(r1, r2, r3, TD_REF, a, asi)
 #define	TTE_SET_W(r1, r2, r3, a, asi)	TTE_SET_BIT(r1, r2, r3, TD_W, a, asi)
 
 /*
  * Macros for spilling and filling live windows.
  *
  * NOTE: These macros use exactly 16 instructions, and it is assumed that the
  * handler will not use more than 24 instructions total, to leave room for
  * resume vectors which occupy the last 8 instructions.
  */
 
 #define	SPILL(storer, base, size, asi) \
 	storer	%l0, [base + (0 * size)] asi ; \
 	storer	%l1, [base + (1 * size)] asi ; \
 	storer	%l2, [base + (2 * size)] asi ; \
 	storer	%l3, [base + (3 * size)] asi ; \
 	storer	%l4, [base + (4 * size)] asi ; \
 	storer	%l5, [base + (5 * size)] asi ; \
 	storer	%l6, [base + (6 * size)] asi ; \
 	storer	%l7, [base + (7 * size)] asi ; \
 	storer	%i0, [base + (8 * size)] asi ; \
 	storer	%i1, [base + (9 * size)] asi ; \
 	storer	%i2, [base + (10 * size)] asi ; \
 	storer	%i3, [base + (11 * size)] asi ; \
 	storer	%i4, [base + (12 * size)] asi ; \
 	storer	%i5, [base + (13 * size)] asi ; \
 	storer	%i6, [base + (14 * size)] asi ; \
 	storer	%i7, [base + (15 * size)] asi
 
 #define	FILL(loader, base, size, asi) \
 	loader	[base + (0 * size)] asi, %l0 ; \
 	loader	[base + (1 * size)] asi, %l1 ; \
 	loader	[base + (2 * size)] asi, %l2 ; \
 	loader	[base + (3 * size)] asi, %l3 ; \
 	loader	[base + (4 * size)] asi, %l4 ; \
 	loader	[base + (5 * size)] asi, %l5 ; \
 	loader	[base + (6 * size)] asi, %l6 ; \
 	loader	[base + (7 * size)] asi, %l7 ; \
 	loader	[base + (8 * size)] asi, %i0 ; \
 	loader	[base + (9 * size)] asi, %i1 ; \
 	loader	[base + (10 * size)] asi, %i2 ; \
 	loader	[base + (11 * size)] asi, %i3 ; \
 	loader	[base + (12 * size)] asi, %i4 ; \
 	loader	[base + (13 * size)] asi, %i5 ; \
 	loader	[base + (14 * size)] asi, %i6 ; \
 	loader	[base + (15 * size)] asi, %i7
 
 #define	ERRATUM50(reg)	mov reg, reg
 
 #define	KSTACK_SLOP	1024
 
 /*
  * Sanity check the kernel stack and bail out if it's wrong.
  * XXX: doesn't handle being on the panic stack.
  */
 #define	KSTACK_CHECK \
 	dec	16, ASP_REG ; \
 	stx	%g1, [ASP_REG + 0] ; \
 	stx	%g2, [ASP_REG + 8] ; \
 	add	%sp, SPOFF, %g1 ; \
 	andcc	%g1, (1 << PTR_SHIFT) - 1, %g0 ; \
 	bnz,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	ldx	[PCPU(CURTHREAD)], %g2 ; \
 	ldx	[%g2 + TD_KSTACK], %g2 ; \
 	add	%g2, KSTACK_SLOP, %g2 ; \
 	subcc	%g1, %g2, %g1 ; \
 	ble,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	set	KSTACK_PAGES * PAGE_SIZE, %g2 ; \
 	cmp	%g1, %g2 ; \
 	bgt,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG
 
 	.globl	tl_text_begin
 tl_text_begin:
 	nop
 
 ENTRY(tl1_kstack_fault)
 	rdpr	%tl, %g1
 1:	cmp	%g1, 2
 	be,a	2f
 	 nop
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_kstack_fault: tl=%#lx tpc=%#lx tnpc=%#lx"
 	    , %g2, %g3, %g4, 7, 8, 9)
 	rdpr	%tl, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 	rdpr	%tpc, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 	rdpr	%tnpc, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 9:
 #endif
 
 	sub	%g1, 1, %g1
 	wrpr	%g1, 0, %tl
 	ba,a	%xcc, 1b
 	 nop
 
 2:
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP,
 	    "tl1_kstack_fault: sp=%#lx ks=%#lx cr=%#lx cs=%#lx ow=%#lx ws=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	add	%sp, SPOFF, %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[PCPU(CURTHREAD)], %g2
 	ldx	[%g2 + TD_KSTACK], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%canrestore, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	rdpr	%cansave, %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 	rdpr	%otherwin, %g2
 	stx	%g2, [%g1 + KTR_PARM5]
 	rdpr	%wstate, %g2
 	stx	%g2, [%g1 + KTR_PARM6]
 9:
 #endif
 
 	wrpr	%g0, 0, %canrestore
 	wrpr	%g0, 6, %cansave
 	wrpr	%g0, 0, %otherwin
 	wrpr	%g0, WSTATE_KERNEL, %wstate
 
 	sub	ASP_REG, SPOFF + CCFSZ, %sp
 	clr	%fp
 
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	T_KSTACK_FAULT | T_KERNEL, %o0
 END(tl1_kstack_fault)
 
 /*
  * Magic to resume from a spill or fill trap.  If we get an alignment or an
  * MMU fault during a spill or a fill, this macro will detect the fault and
  * resume at a set instruction offset in the trap handler.
  *
  * To check if the previous trap was a spill/fill we convert the trapped pc
  * to a trap type and verify that it is in the range of spill/fill vectors.
  * The spill/fill vectors are types 0x80-0xff and 0x280-0x2ff, masking off the
  * tl bit allows us to detect both ranges with one test.
  *
  * This is:
  *	0x80 <= (((%tpc - %tba) >> 5) & ~0x200) < 0x100
  *
  * To calculate the new pc we take advantage of the xor feature of wrpr.
  * Forcing all the low bits of the trapped pc on we can produce any offset
  * into the spill/fill vector.  The size of a spill/fill trap vector is 0x80.
  *
  *	0x7f ^ 0x1f == 0x60
  *	0x1f == (0x80 - 0x60) - 1
  *
  * Which are the offset and xor value used to resume from alignment faults.
  */
 
 /*
  * Determine if we have trapped inside of a spill/fill vector, and if so resume
  * at a fixed instruction offset in the trap vector.  Must be called on
  * alternate globals.
  */
 #define	RESUME_SPILLFILL_MAGIC(stxa_g0_sfsr, xor) \
 	dec	16, ASP_REG ; \
 	stx	%g1, [ASP_REG + 0] ; \
 	stx	%g2, [ASP_REG + 8] ; \
 	rdpr	%tpc, %g1 ; \
 	ERRATUM50(%g1) ; \
 	rdpr	%tba, %g2 ; \
 	sub	%g1, %g2, %g2 ; \
 	srlx	%g2, 5, %g2 ; \
 	andn	%g2, 0x200, %g2 ; \
 	cmp	%g2, 0x80 ; \
 	blu,pt	%xcc, 9f ; \
 	 cmp	%g2, 0x100 ; \
 	bgeu,pt	%xcc, 9f ; \
 	 or	%g1, 0x7f, %g1 ; \
 	wrpr	%g1, xor, %tnpc ; \
 	stxa_g0_sfsr ; \
 	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG ; \
 	done ; \
 9:	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG
 
 /*
  * For certain faults we need to clear the SFSR MMU register before returning.
  */
 #define	RSF_CLR_SFSR \
 	wr	%g0, ASI_DMMU, %asi ; \
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 
 #define	RSF_XOR(off)	((0x80 - off) - 1)
 
 /*
  * Instruction offsets in spill and fill trap handlers for handling certain
  * nested traps, and corresponding xor constants for wrpr.
  */
 #define	RSF_OFF_ALIGN	0x60
 #define	RSF_OFF_MMU	0x70
 
 #define	RESUME_SPILLFILL_ALIGN \
 	RESUME_SPILLFILL_MAGIC(RSF_CLR_SFSR, RSF_XOR(RSF_OFF_ALIGN))
 #define	RESUME_SPILLFILL_MMU \
 	RESUME_SPILLFILL_MAGIC(EMPTY, RSF_XOR(RSF_OFF_MMU))
 #define	RESUME_SPILLFILL_MMU_CLR_SFSR \
 	RESUME_SPILLFILL_MAGIC(RSF_CLR_SFSR, RSF_XOR(RSF_OFF_MMU))
 
 /*
  * Constant to add to %tnpc when taking a fill trap just before returning to
  * user mode.
  */
 #define	RSF_FILL_INC	tl0_ret_fill_end - tl0_ret_fill
 
 /*
  * Generate a T_SPILL or T_FILL trap if the window operation fails.
  */
 #define	RSF_TRAP(type) \
 	ba	%xcc, tl0_sftrap ; \
 	 mov	type, %g2 ; \
 	.align	16
 
 /*
  * Game over if the window operation fails.
  */
 #define	RSF_FATAL(type) \
 	ba	%xcc, rsf_fatal ; \
 	 mov	type, %g2 ; \
 	.align	16
 
 /*
  * Magic to resume from a failed fill a few instructions after the corrsponding
  * restore.  This is used on return from the kernel to usermode.
  */
 #define	RSF_FILL_MAGIC \
 	rdpr	%tnpc, %g1 ; \
 	add	%g1, RSF_FILL_INC, %g1 ; \
 	wrpr	%g1, 0, %tnpc ; \
 	done ; \
 	.align	16
 
 /*
  * Spill to the pcb if a spill to the user stack in kernel mode fails.
  */
 #define	RSF_SPILL_TOPCB \
 	ba,a	%xcc, tl1_spill_topcb ; \
 	 nop ; \
 	.align	16
 
 ENTRY(rsf_fatal)
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "rsf_fatal: bad window trap tt=%#lx type=%#lx"
 	    , %g1, %g3, %g4, 7, 8, 9)
 	rdpr	%tt, %g3
 	stx	%g3, [%g1 + KTR_PARM1]
 	stx	%g2, [%g1 + KTR_PARM2]
 9:
 #endif
 
 	KSTACK_CHECK
 
 	sir
 END(rsf_fatal)
 
 	.data
 	_ALIGN_DATA
 	.globl	intrnames, sintrnames
 intrnames:
 	.space	(IV_MAX + PIL_MAX) * (MAXCOMLEN + 1)
 sintrnames:
 	.quad	(IV_MAX + PIL_MAX) * (MAXCOMLEN + 1)
 
 	.globl	intrcnt, sintrcnt
 intrcnt:
 	.space	(IV_MAX + PIL_MAX) * 8
 sintrcnt:
 	.quad	(IV_MAX + PIL_MAX) * 8
 
 	.text
 
 /*
  * Trap table and associated macros
  *
  * Due to its size a trap table is an inherently hard thing to represent in
  * code in a clean way.  There are approximately 1024 vectors, of 8 or 32
  * instructions each, many of which are identical.  The way that this is
  * laid out is the instructions (8 or 32) for the actual trap vector appear
  * as an AS macro.  In general this code branches to tl0_trap or tl1_trap,
  * but if not supporting code can be placed just after the definition of the
  * macro.  The macros are then instantiated in a different section (.trap),
  * which is setup to be placed by the linker at the beginning of .text, and the
  * code around the macros is moved to the end of trap table.  In this way the
  * code that must be sequential in memory can be split up, and located near
  * its supporting code so that it is easier to follow.
  */
 
 	/*
 	 * Clean window traps occur when %cleanwin is zero to ensure that data
 	 * is not leaked between address spaces in registers.
 	 */
 	.macro	clean_window
 	clr	%o0
 	clr	%o1
 	clr	%o2
 	clr	%o3
 	clr	%o4
 	clr	%o5
 	clr	%o6
 	clr	%o7
 	clr	%l0
 	clr	%l1
 	clr	%l2
 	clr	%l3
 	clr	%l4
 	clr	%l5
 	clr	%l6
 	rdpr	%cleanwin, %l7
 	inc	%l7
 	wrpr	%l7, 0, %cleanwin
 	clr	%l7
 	retry
 	.align	128
 	.endm
 
 	/*
 	 * Stack fixups for entry from user mode.  We are still running on the
 	 * user stack, and with its live registers, so we must save soon.  We
 	 * are on alternate globals so we do have some registers.  Set the
 	 * transitional window state, and do the save.  If this traps we
 	 * attempt to spill a window to the user stack.  If this fails, we
 	 * spill the window to the pcb and continue.  Spilling to the pcb
 	 * must not fail.
 	 *
 	 * NOTE: Must be called with alternate globals and clobbers %g1.
 	 */
 
 	.macro	tl0_split
 	rdpr	%wstate, %g1
 	wrpr	%g1, WSTATE_TRANSITION, %wstate
 	save
 	.endm
 
 	.macro	tl0_setup	type
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl0_utrap
 	 mov	\type, %o0
 	.endm
 
 	/*
 	 * Generic trap type.  Call trap() with the specified type.
 	 */
 	.macro	tl0_gen		type
 	tl0_setup \type
 	.align	32
 	.endm
 
 	/*
 	 * This is used to suck up the massive swaths of reserved trap types.
 	 * Generates count "reserved" trap vectors.
 	 */
 	.macro	tl0_reserved	count
 	.rept	\count
 	tl0_gen	T_RESERVED
 	.endr
 	.endm
 
 	.macro	tl1_split
 	rdpr	%wstate, %g1
 	wrpr	%g1, WSTATE_NESTED, %wstate
 	save	%sp, -(CCFSZ + TF_SIZEOF), %sp
 	.endm
 
 	.macro	tl1_setup	type
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	\type | T_KERNEL, %o0
 	.endm
 
 	.macro	tl1_gen		type
 	tl1_setup \type
 	.align	32
 	.endm
 
 	.macro	tl1_reserved	count
 	.rept	\count
 	tl1_gen	T_RESERVED
 	.endr
 	.endm
 
 	.macro	tl0_insn_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_IMMU, %asi
 	rdpr	%tpc, %g3
 	ldxa	[%g0 + AA_IMMU_SFSR] %asi, %g4
 	/*
 	 * XXX in theory, a store to AA_IMMU_SFSR must be immediately
 	 * followed by a DONE, FLUSH or RETRY for USIII.  In practice,
 	 * this triggers a RED state exception though.
 	 */
 	stxa	%g0, [%g0 + AA_IMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_INSTRUCTION_EXCEPTION, %g2
 	.align	32
 	.endm
 
 	.macro	tl0_data_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_DATA_EXCEPTION, %g2
 	.align	32
 	.endm
 
 	.macro	tl0_align
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_MEM_ADDRESS_NOT_ALIGNED, %g2
 	.align	32
 	.endm
 
 ENTRY(tl0_sfsr_trap)
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl0_utrap
 	 mov	%g2, %o0
 END(tl0_sfsr_trap)
 
 	.macro	tl0_intr level, mask
 	tl0_split
 	set	\mask, %o1
 	ba	%xcc, tl0_intr
 	 mov	\level, %o0
 	.align	32
 	.endm
 
 #define	INTR(level, traplvl)						\
 	tl ## traplvl ## _intr	level, 1 << level
 
 #define	TICK(traplvl) \
 	tl ## traplvl ## _intr	PIL_TICK, 0x10001
 
 #define	INTR_LEVEL(tl)							\
 	INTR(1, tl) ;							\
 	INTR(2, tl) ;							\
 	INTR(3, tl) ;							\
 	INTR(4, tl) ;							\
 	INTR(5, tl) ;							\
 	INTR(6, tl) ;							\
 	INTR(7, tl) ;							\
 	INTR(8, tl) ;							\
 	INTR(9, tl) ;							\
 	INTR(10, tl) ;							\
 	INTR(11, tl) ;							\
 	INTR(12, tl) ;							\
 	INTR(13, tl) ;							\
 	TICK(tl) ;							\
 	INTR(15, tl) ;
 
 	.macro	tl0_intr_level
 	INTR_LEVEL(0)
 	.endm
 
 	.macro	intr_vector
 	ldxa	[%g0] ASI_INTR_RECEIVE, %g1
 	andcc	%g1, IRSR_BUSY, %g0
 	bnz,a,pt %xcc, intr_vector
 	 nop
 	ba,a,pt	%xcc, intr_vector_stray
 	 nop
 	.align	32
 	.endm
 
 	.macro	tl0_immu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_IMMU, %asi
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_IMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and executable and that the TTE tags match.
 	 */
 	brgez,pn %g7, 3f
 	 andcc	%g7, TD_EXEC, %g0
 	bz,pn	%xcc, 3f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 3f
 	 EMPTY
 
 	/*
 	 * We matched a TTE, load the TLB.
 	 */
 
 	/*
 	 * Set the reference bit, if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl0_immu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	stxa	%g7, [%g0] ASI_ITLB_DATA_IN_REG
 	retry
 
 	/*
 	 * Advance to the next TTE in this bucket, and check the low bits
 	 * of the bucket pointer to see if we've finished the bucket.
 	 */
 3:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_immu_miss_trap
 	.align	128
 	.endm
 
 ENTRY(tl0_immu_miss_set_ref)
 	/*
 	 * Set the reference bit.
 	 */
 	TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 1f
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_ITLB_DATA_IN_REG
 1:	retry
 END(tl0_immu_miss_set_ref)
 
 ENTRY(tl0_immu_miss_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	sethi	%hi(KERNBASE), %g2
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	flush	%g2
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g2
 
 	/*
 	 * Save the tag access register, and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl0_utrap
 	 mov	T_INSTRUCTION_MISS, %o0
 END(tl0_immu_miss_trap)
 
 	.macro	tl0_dmmu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 tl1_dmmu_miss_user:
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_DMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and that the virtual page numbers match.
 	 */
 	brgez,pn %g7, 3f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 3f
 	 EMPTY
 
 	/*
 	 * We matched a TTE, load the TLB.
 	 */
 
 	/*
 	 * Set the reference bit, if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl0_dmmu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g7, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 
 	/*
 	 * Advance to the next TTE in this bucket, and check the low bits
 	 * of the bucket pointer to see if we've finished the bucket.
 	 */
 3:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_dmmu_miss_trap
 	.align	128
 	.endm
 
 ENTRY(tl0_dmmu_miss_set_ref)
 	/*
 	 * Set the reference bit.
 	 */
 	TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 1f
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl0_dmmu_miss_set_ref)
 
 ENTRY(tl0_dmmu_miss_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	membar	#Sync
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Check if we actually came from the kernel.
 	 */
 	rdpr	%tl, %g1
 	cmp	%g1, 1
 	bgt,a,pn %xcc, 1f
 	 nop
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	/*
 	 * Save the tag access register and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl0_utrap
 	 mov	T_DATA_MISS, %o0
 
 	/*
 	 * Handle faults during window spill/fill.
 	 */
 1:	RESUME_SPILLFILL_MMU
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_MISS | T_KERNEL, %o0
 END(tl0_dmmu_miss_trap)
 
 	.macro	tl0_dmmu_prot
 	ba,a	%xcc, tl0_dmmu_prot_1
 	 nop
 	.align	128
 	.endm
 
 ENTRY(tl0_dmmu_prot_1)
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 tl1_dmmu_prot_user:
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_DMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and writable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, 4f
 	 andcc	%g7, TD_SW, %g0
 	bz,pn	%xcc, 4f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 4f
 	 nop
 
 	/*
 	 * Set the hardware write bit.
 	 */
 	TTE_SET_W(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * Delete the old TLB entry and clear the SFSR.
 	 */
 	srlx	%g1, PAGE_SHIFT, %g3
 	sllx	%g3, PAGE_SHIFT, %g3
 	stxa	%g0, [%g3] ASI_DMMU_DEMAP
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 3f
 	 or	%g2, TD_W, %g2
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_DTLB_DATA_IN_REG
 3:	retry
 
 	/*
 	 * Check the low bits to see if we've finished the bucket.
 	 */
 4:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_dmmu_prot_trap
 	 nop
 END(tl0_dmmu_prot_1)
 
 ENTRY(tl0_dmmu_prot_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	membar	#Sync
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Check if we actually came from the kernel.
 	 */
 	rdpr	%tl, %g1
 	cmp	%g1, 1
 	bgt,a,pn %xcc, 1f
 	 nop
 
 	/*
 	 * Load the SFAR, SFSR and TAR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * Save the MMU registers and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl0_utrap
 	 mov	T_DATA_PROTECTION, %o0
 
 	/*
 	 * Handle faults during window spill/fill.
 	 */
 1:	RESUME_SPILLFILL_MMU_CLR_SFSR
 
 	/*
 	 * Load the SFAR, SFSR and TAR.  Clear the SFSR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_PROTECTION | T_KERNEL, %o0
 END(tl0_dmmu_prot_trap)
 
 	.macro	tl0_spill_0_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_TRAP(T_SPILL)
 	RSF_TRAP(T_SPILL)
 	.endm
 
 	.macro	tl0_spill_1_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_TRAP(T_SPILL)
 	RSF_TRAP(T_SPILL)
 	.endm
 
 	.macro	tl0_fill_0_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(ldxa, %sp + SPOFF, 8, %asi)
 	restored
 	retry
 	.align	32
 	RSF_TRAP(T_FILL)
 	RSF_TRAP(T_FILL)
 	.endm
 
 	.macro	tl0_fill_1_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(lduwa, %sp, 4, %asi)
 	restored
 	retry
 	.align	32
 	RSF_TRAP(T_FILL)
 	RSF_TRAP(T_FILL)
 	.endm
 
 ENTRY(tl0_sftrap)
 	rdpr	%tstate, %g1
 	and	%g1, TSTATE_CWP_MASK, %g1
 	wrpr	%g1, 0, %cwp
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl0_trap
 	 mov	%g2, %o0
 END(tl0_sftrap)
 
 	.macro	tl0_spill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl0_fill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl0_syscall
 	tl0_split
 	clr	%o1
 	set	syscall, %o2
 	ba	%xcc, tl0_trap
 	 mov	T_SYSCALL, %o0
 	.align	32
 	.endm
 
 	.macro	tl0_fp_restore
 	ba,a	%xcc, tl0_fp_restore
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl0_fp_restore)
 	ldx	[PCB_REG + PCB_FLAGS], %g1
 	andn	%g1, PCB_FEF, %g1
 	stx	%g1, [PCB_REG + PCB_FLAGS]
 
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
 	ldda	[PCB_REG + PCB_UFP + (0 * 64)] %asi, %f0
 	ldda	[PCB_REG + PCB_UFP + (1 * 64)] %asi, %f16
 	ldda	[PCB_REG + PCB_UFP + (2 * 64)] %asi, %f32
 	ldda	[PCB_REG + PCB_UFP + (3 * 64)] %asi, %f48
 	membar	#Sync
 	done
 END(tl0_fp_restore)
 
 	.macro	tl1_insn_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_IMMU, %asi
 	rdpr	%tpc, %g3
 	ldxa	[%g0 + AA_IMMU_SFSR] %asi, %g4
 	/*
 	 * XXX in theory, a store to AA_IMMU_SFSR must be immediately
 	 * followed by a DONE, FLUSH or RETRY for USIII.  In practice,
 	 * this triggers a RED state exception though.
 	 */
 	stxa	%g0, [%g0 + AA_IMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl1_insn_exceptn_trap
 	 mov	T_INSTRUCTION_EXCEPTION | T_KERNEL, %g2
 	.align	32
 	.endm
 
 ENTRY(tl1_insn_exceptn_trap)
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	%g2, %o0
 END(tl1_insn_exceptn_trap)
 
 	.macro	tl1_fp_disabled
 	ba,a	%xcc, tl1_fp_disabled_1
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_fp_disabled_1)
 	rdpr	%tpc, %g1
 	set	fpu_fault_begin, %g2
 	sub	%g1, %g2, %g1
 	cmp	%g1, fpu_fault_size
 	bgeu,a,pn %xcc, 1f
 	 nop
 
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
 	ldda	[PCB_REG + PCB_KFP + (0 * 64)] %asi, %f0
 	ldda	[PCB_REG + PCB_KFP + (1 * 64)] %asi, %f16
 	ldda	[PCB_REG + PCB_KFP + (2 * 64)] %asi, %f32
 	ldda	[PCB_REG + PCB_KFP + (3 * 64)] %asi, %f48
 	membar	#Sync
 	retry
 
 1:	tl1_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	T_FP_DISABLED | T_KERNEL, %o0
 END(tl1_fp_disabled_1)
 
 	.macro	tl1_data_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	ba,a	%xcc, tl1_data_excptn_trap
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_data_excptn_trap)
 	RESUME_SPILLFILL_MMU_CLR_SFSR
 	ba	%xcc, tl1_sfsr_trap
 	 mov	T_DATA_EXCEPTION | T_KERNEL, %g2
 END(tl1_data_excptn_trap)
 
 	.macro	tl1_align
 	wrpr	%g0, PSTATE_ALT, %pstate
 	ba,a	%xcc, tl1_align_trap
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_align_trap)
 	RESUME_SPILLFILL_ALIGN
 	ba	%xcc, tl1_sfsr_trap
 	 mov	T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2
 END(tl1_align_trap)
 
 ENTRY(tl1_sfsr_trap)
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	%g2, %o0
 END(tl1_sfsr_trap)
 
 	.macro	tl1_intr level, mask
 	tl1_split
 	set	\mask, %o1
 	ba	%xcc, tl1_intr
 	 mov	\level, %o0
 	.align	32
 	.endm
 
 	.macro	tl1_intr_level
 	INTR_LEVEL(1)
 	.endm
 
 	.macro	tl1_immu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_IMMU, %asi
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g5
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_immu_miss_patch_tsb_1
 tl1_immu_miss_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_immu_miss_patch_tsb_mask_1
 tl1_immu_miss_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_immu_miss_patch_quad_ldd_1
 tl1_immu_miss_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and executable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, tl1_immu_miss_trap
 	 andcc	%g7, TD_EXEC, %g0
 	bz,pn	%xcc, tl1_immu_miss_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn	%xcc, tl1_immu_miss_trap
 	 EMPTY
 
 	/*
 	 * Set the reference bit if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl1_immu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g7, [%g0] ASI_ITLB_DATA_IN_REG
 	retry
 	.align	128
 	.endm
 
 ENTRY(tl1_immu_miss_set_ref)
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_immu_miss_patch_tsb_2
 tl1_immu_miss_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_immu_miss_patch_tsb_mask_2
 tl1_immu_miss_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the reference bit.
 	 */
 	.globl	tl1_immu_miss_patch_asi_1
 tl1_immu_miss_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_REF(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_ITLB_DATA_IN_REG
 1:	retry
 END(tl1_immu_miss_set_ref)
 
 ENTRY(tl1_immu_miss_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g2
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_INSTRUCTION_MISS | T_KERNEL, %o0
 END(tl1_immu_miss_trap)
 
 	.macro	tl1_dmmu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g5
 
 	/*
 	 * Extract the context from the contents of the tag access register.
 	 * If it's non-zero this is a fault on a user address.  Note that the
 	 * faulting address is passed in %g1.
 	 */
 	sllx	%g5, 64 - TAR_VPN_SHIFT, %g6
 	brnz,a,pn %g6, tl1_dmmu_miss_user
 	 mov	%g5, %g1
 
 	/*
 	 * Check for the direct mapped physical region.  These addresses have
 	 * the high bit set so they are negative.
 	 */
 	brlz,pn %g5, tl1_dmmu_miss_direct
 	 EMPTY
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_miss_patch_tsb_1
 tl1_dmmu_miss_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_miss_patch_tsb_mask_1
 tl1_dmmu_miss_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_dmmu_miss_patch_quad_ldd_1
 tl1_dmmu_miss_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and that the virtual page numbers match.
 	 */
 	brgez,pn %g7, tl1_dmmu_miss_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn %xcc, tl1_dmmu_miss_trap
 	 EMPTY
 
 	/*
 	 * Set the reference bit if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pt	%xcc, tl1_dmmu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g7, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 	.align	128
 	.endm
 
 ENTRY(tl1_dmmu_miss_set_ref)
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_miss_patch_tsb_mask_2
 tl1_dmmu_miss_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_miss_patch_tsb_2
 tl1_dmmu_miss_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the reference bit.
 	 */
 	.globl	tl1_dmmu_miss_patch_asi_1
 tl1_dmmu_miss_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_REF(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl1_dmmu_miss_set_ref)
 
 ENTRY(tl1_dmmu_miss_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	KSTACK_CHECK
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_MISS | T_KERNEL, %o0
 END(tl1_dmmu_miss_trap)
 
 ENTRY(tl1_dmmu_miss_direct)
 	/*
 	 * Mask off the high bits of the virtual address to get the physical
 	 * address, and or in the TTE bits.  The virtual address bits that
 	 * correspond to the TTE valid and page size bits are left set, so
 	 * they don't have to be included in the TTE bits below.  We know they
 	 * are set because the virtual address is in the upper va hole.
 	 * NB: if we are taking advantage of the ASI_ATOMIC_QUAD_LDD_PHYS
 	 * and we get a miss on the directly accessed kernel TSB we must not
 	 * set TD_CV in order to access it uniformly bypassing the D$.
 	 */
 	setx	TLB_DIRECT_ADDRESS_MASK, %g7, %g4
 	and	%g5, %g4, %g4
 	setx	TLB_DIRECT_TO_TTE_MASK, %g7, %g6
 	and	%g5, %g6, %g5
 	.globl	tl1_dmmu_miss_direct_patch_tsb_phys_1
 tl1_dmmu_miss_direct_patch_tsb_phys_1:
 	sethi	%uhi(TSB_KERNEL_PHYS), %g3
 	or	%g3, %ulo(TSB_KERNEL_PHYS), %g3
 	sllx	%g3, 32, %g3
 	sethi	%hi(TSB_KERNEL_PHYS), %g3
 	or	%g7, %g3, %g7
 	cmp	%g4, %g7
 	bl,pt	%xcc, 1f
 	 or	%g5, TD_CP | TD_W, %g5
 	.globl	tl1_dmmu_miss_direct_patch_tsb_phys_end_1
 tl1_dmmu_miss_direct_patch_tsb_phys_end_1:
 	sethi	%uhi(TSB_KERNEL_PHYS_END), %g3
 	or	%g3, %ulo(TSB_KERNEL_PHYS_END), %g3
 	sllx	%g3, 32, %g3
 	sethi	%hi(TSB_KERNEL_PHYS_END), %g7
 	or	%g7, %g3, %g7
 	cmp	%g4, %g7
 	bg,a,pt	%xcc, 1f
 	 nop
 	ba,pt	%xcc, 2f
 	 nop
 1:	or	%g5, TD_CV, %g5
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 2:	stxa	%g5, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 END(tl1_dmmu_miss_direct)
 
 	.macro	tl1_dmmu_prot
 	ba,a	%xcc, tl1_dmmu_prot_1
 	 nop
 	.align	128
 	.endm
 
 ENTRY(tl1_dmmu_prot_1)
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g5
 
 	/*
 	 * Extract the context from the contents of the tag access register.
 	 * If it's non-zero this is a fault on a user address.  Note that the
 	 * faulting address is passed in %g1.
 	 */
 	sllx	%g5, 64 - TAR_VPN_SHIFT, %g6
 	brnz,a,pn %g6, tl1_dmmu_prot_user
 	 mov	%g5, %g1
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_prot_patch_tsb_1
 tl1_dmmu_prot_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_prot_patch_tsb_mask_1
 tl1_dmmu_prot_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_dmmu_prot_patch_quad_ldd_1
 tl1_dmmu_prot_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and writeable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, tl1_dmmu_prot_trap
 	 andcc	%g7, TD_SW, %g0
 	bz,pn	%xcc, tl1_dmmu_prot_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn	%xcc, tl1_dmmu_prot_trap
 	 EMPTY
 
 	/*
 	 * Delete the old TLB entry and clear the SFSR.
 	 */
 	 sllx	%g5, TAR_VPN_SHIFT, %g6
 	or	%g6, TLB_DEMAP_NUCLEUS, %g6
 	stxa	%g0, [%g6] ASI_DMMU_DEMAP
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_prot_patch_tsb_2
 tl1_dmmu_prot_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_prot_patch_tsb_mask_2
 tl1_dmmu_prot_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the hardware write bit.
 	 */
 	.globl	tl1_dmmu_prot_patch_asi_1
 tl1_dmmu_prot_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_W(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 or	%g6, TD_W, %g6
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl1_dmmu_prot_1)
 
 ENTRY(tl1_dmmu_prot_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Load the SFAR, SFSR and TAR.  Clear the SFSR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_PROTECTION | T_KERNEL, %o0
 END(tl1_dmmu_prot_trap)
 
 	.macro	tl1_spill_0_n
 	SPILL(stx, %sp + SPOFF, 8, EMPTY)
 	saved
 	retry
 	.align	32
 	RSF_FATAL(T_SPILL)
 	RSF_FATAL(T_SPILL)
 	.endm
 
 	.macro	tl1_spill_2_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_3_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_7_n
 	btst	1, %sp
 	bnz,a,pn %xcc, tl1_spill_0_n
 	 nop
 	srl	%sp, 0, %sp
 	SPILL(stw, %sp, 4, EMPTY)
 	saved
 	retry
 	.align	32
 	RSF_FATAL(T_SPILL)
 	RSF_FATAL(T_SPILL)
 	.endm
 
 	.macro	tl1_spill_0_o
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_1_o
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_2_o
 	RSF_SPILL_TOPCB
 	.align	128
 	.endm
 
 	.macro	tl1_fill_0_n
 	FILL(ldx, %sp + SPOFF, 8, EMPTY)
 	restored
 	retry
 	.align	32
 	RSF_FATAL(T_FILL)
 	RSF_FATAL(T_FILL)
 	.endm
 
 	.macro	tl1_fill_2_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(ldxa, %sp + SPOFF, 8, %asi)
 	restored
 	retry
 	.align 32
 	RSF_FILL_MAGIC
 	RSF_FILL_MAGIC
 	.endm
 
 	.macro	tl1_fill_3_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(lduwa, %sp, 4, %asi)
 	restored
 	retry
 	.align 32
 	RSF_FILL_MAGIC
 	RSF_FILL_MAGIC
 	.endm
 
 	.macro	tl1_fill_7_n
 	btst	1, %sp
 	bnz,a,pt %xcc, tl1_fill_0_n
 	 nop
 	srl	%sp, 0, %sp
 	FILL(lduw, %sp, 4, EMPTY)
 	restored
 	retry
 	.align	32
 	RSF_FATAL(T_FILL)
 	RSF_FATAL(T_FILL)
 	.endm
 
 /*
  * This is used to spill windows that are still occupied with user
  * data on kernel entry to the pcb.
  */
 ENTRY(tl1_spill_topcb)
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/* Free some globals for our use. */
 	dec	24, ASP_REG
 	stx	%g1, [ASP_REG + 0]
 	stx	%g2, [ASP_REG + 8]
 	stx	%g3, [ASP_REG + 16]
 
 	ldx	[PCB_REG + PCB_NSAVED], %g1
 
 	sllx	%g1, PTR_SHIFT, %g2
 	add	%g2, PCB_REG, %g2
 	stx	%sp, [%g2 + PCB_RWSP]
 
 	sllx	%g1, RW_SHIFT, %g2
 	add	%g2, PCB_REG, %g2
 	SPILL(stx, %g2 + PCB_RW, 8, EMPTY)
 
 	inc	%g1
 	stx	%g1, [PCB_REG + PCB_NSAVED]
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_spill_topcb: pc=%#lx npc=%#lx sp=%#lx nsaved=%d"
 	   , %g1, %g2, %g3, 7, 8, 9)
 	rdpr	%tpc, %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	rdpr	%tnpc, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	stx	%sp, [%g1 + KTR_PARM3]
 	ldx	[PCB_REG + PCB_NSAVED], %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 9:
 #endif
 
 	saved
 
 	ldx	[ASP_REG + 16], %g3
 	ldx	[ASP_REG + 8], %g2
 	ldx	[ASP_REG + 0], %g1
 	inc	24, ASP_REG
 	retry
 END(tl1_spill_topcb)
 
 	.macro	tl1_spill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl1_fill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl1_soft	count
 	.rept	\count
 	tl1_gen	T_SOFT | T_KERNEL
 	.endr
 	.endm
 
 	.sect	.trap
 	.globl	tl_trap_begin
 tl_trap_begin:
 	nop
 
 	.align	0x8000
 	.globl	tl0_base
 
 tl0_base:
 	tl0_reserved	8				! 0x0-0x7
 tl0_insn_excptn:
 	tl0_insn_excptn					! 0x8
 	tl0_reserved	1				! 0x9
 tl0_insn_error:
 	tl0_gen		T_INSTRUCTION_ERROR		! 0xa
 	tl0_reserved	5				! 0xb-0xf
 tl0_insn_illegal:
 	tl0_gen		T_ILLEGAL_INSTRUCTION		! 0x10
 tl0_priv_opcode:
 	tl0_gen		T_PRIVILEGED_OPCODE		! 0x11
 	tl0_reserved	14				! 0x12-0x1f
 tl0_fp_disabled:
 	tl0_gen		T_FP_DISABLED			! 0x20
 tl0_fp_ieee:
 	tl0_gen		T_FP_EXCEPTION_IEEE_754		! 0x21
 tl0_fp_other:
 	tl0_gen		T_FP_EXCEPTION_OTHER		! 0x22
 tl0_tag_ovflw:
 	tl0_gen		T_TAG_OVERFLOW			! 0x23
 tl0_clean_window:
 	clean_window					! 0x24
 tl0_divide:
 	tl0_gen		T_DIVISION_BY_ZERO		! 0x28
 	tl0_reserved	7				! 0x29-0x2f
 tl0_data_excptn:
 	tl0_data_excptn					! 0x30
 	tl0_reserved	1				! 0x31
 tl0_data_error:
 	tl0_gen		T_DATA_ERROR			! 0x32
 	tl0_reserved	1				! 0x33
 tl0_align:
 	tl0_align					! 0x34
 tl0_align_lddf:
 	tl0_gen		T_RESERVED			! 0x35
 tl0_align_stdf:
 	tl0_gen		T_RESERVED			! 0x36
 tl0_priv_action:
 	tl0_gen		T_PRIVILEGED_ACTION		! 0x37
 	tl0_reserved	9				! 0x38-0x40
 tl0_intr_level:
 	tl0_intr_level					! 0x41-0x4f
 	tl0_reserved	16				! 0x50-0x5f
 tl0_intr_vector:
 	intr_vector					! 0x60
 tl0_watch_phys:
 	tl0_gen		T_PA_WATCHPOINT			! 0x61
 tl0_watch_virt:
 	tl0_gen		T_VA_WATCHPOINT			! 0x62
 tl0_ecc:
 	tl0_gen		T_CORRECTED_ECC_ERROR		! 0x63
 tl0_immu_miss:
 	tl0_immu_miss					! 0x64
 tl0_dmmu_miss:
 	tl0_dmmu_miss					! 0x68
 tl0_dmmu_prot:
 	tl0_dmmu_prot					! 0x6c
 	tl0_reserved	16				! 0x70-0x7f
 tl0_spill_0_n:
 	tl0_spill_0_n					! 0x80
 tl0_spill_1_n:
 	tl0_spill_1_n					! 0x84
 	tl0_spill_bad	14				! 0x88-0xbf
 tl0_fill_0_n:
 	tl0_fill_0_n					! 0xc0
 tl0_fill_1_n:
 	tl0_fill_1_n					! 0xc4
 	tl0_fill_bad	14				! 0xc8-0xff
 tl0_soft:
 	tl0_gen		T_SYSCALL			! 0x100
 	tl0_gen		T_BREAKPOINT			! 0x101
 	tl0_gen		T_DIVISION_BY_ZERO		! 0x102
 	tl0_reserved	1				! 0x103
 	tl0_gen		T_CLEAN_WINDOW			! 0x104
 	tl0_gen		T_RANGE_CHECK			! 0x105
 	tl0_gen		T_FIX_ALIGNMENT			! 0x106
 	tl0_gen		T_INTEGER_OVERFLOW		! 0x107
 	tl0_gen		T_SYSCALL			! 0x108
 	tl0_gen		T_SYSCALL			! 0x109
 	tl0_fp_restore					! 0x10a
 	tl0_reserved	5				! 0x10b-0x10f
 	tl0_gen		T_TRAP_INSTRUCTION_16		! 0x110
 	tl0_gen		T_TRAP_INSTRUCTION_17		! 0x111
 	tl0_gen		T_TRAP_INSTRUCTION_18		! 0x112
 	tl0_gen		T_TRAP_INSTRUCTION_19		! 0x113
 	tl0_gen		T_TRAP_INSTRUCTION_20		! 0x114
 	tl0_gen		T_TRAP_INSTRUCTION_21		! 0x115
 	tl0_gen		T_TRAP_INSTRUCTION_22		! 0x116
 	tl0_gen		T_TRAP_INSTRUCTION_23		! 0x117
 	tl0_gen		T_TRAP_INSTRUCTION_24		! 0x118
 	tl0_gen		T_TRAP_INSTRUCTION_25		! 0x119
 	tl0_gen		T_TRAP_INSTRUCTION_26		! 0x11a
 	tl0_gen		T_TRAP_INSTRUCTION_27		! 0x11b
 	tl0_gen		T_TRAP_INSTRUCTION_28		! 0x11c
 	tl0_gen		T_TRAP_INSTRUCTION_29		! 0x11d
 	tl0_gen		T_TRAP_INSTRUCTION_30		! 0x11e
 	tl0_gen		T_TRAP_INSTRUCTION_31		! 0x11f
 	tl0_reserved	32				! 0x120-0x13f
 	tl0_gen		T_SYSCALL			! 0x140
 	tl0_syscall					! 0x141
 	tl0_gen		T_SYSCALL			! 0x142
 	tl0_gen		T_SYSCALL			! 0x143
 	tl0_reserved	188				! 0x144-0x1ff
 
 tl1_base:
 	tl1_reserved	8				! 0x200-0x207
 tl1_insn_excptn:
 	tl1_insn_excptn					! 0x208
 	tl1_reserved	1				! 0x209
 tl1_insn_error:
 	tl1_gen		T_INSTRUCTION_ERROR		! 0x20a
 	tl1_reserved	5				! 0x20b-0x20f
 tl1_insn_illegal:
 	tl1_gen		T_ILLEGAL_INSTRUCTION		! 0x210
 tl1_priv_opcode:
 	tl1_gen		T_PRIVILEGED_OPCODE		! 0x211
 	tl1_reserved	14				! 0x212-0x21f
 tl1_fp_disabled:
 	tl1_fp_disabled					! 0x220
 tl1_fp_ieee:
 	tl1_gen		T_FP_EXCEPTION_IEEE_754		! 0x221
 tl1_fp_other:
 	tl1_gen		T_FP_EXCEPTION_OTHER		! 0x222
 tl1_tag_ovflw:
 	tl1_gen		T_TAG_OVERFLOW			! 0x223
 tl1_clean_window:
 	clean_window					! 0x224
 tl1_divide:
 	tl1_gen		T_DIVISION_BY_ZERO		! 0x228
 	tl1_reserved	7				! 0x229-0x22f
 tl1_data_excptn:
 	tl1_data_excptn					! 0x230
 	tl1_reserved	1				! 0x231
 tl1_data_error:
 	tl1_gen		T_DATA_ERROR			! 0x232
 	tl1_reserved	1				! 0x233
 tl1_align:
 	tl1_align					! 0x234
 tl1_align_lddf:
 	tl1_gen		T_RESERVED			! 0x235
 tl1_align_stdf:
 	tl1_gen		T_RESERVED			! 0x236
 tl1_priv_action:
 	tl1_gen		T_PRIVILEGED_ACTION		! 0x237
 	tl1_reserved	9				! 0x238-0x240
 tl1_intr_level:
 	tl1_intr_level					! 0x241-0x24f
 	tl1_reserved	16				! 0x250-0x25f
 tl1_intr_vector:
 	intr_vector					! 0x260
 tl1_watch_phys:
 	tl1_gen		T_PA_WATCHPOINT			! 0x261
 tl1_watch_virt:
 	tl1_gen		T_VA_WATCHPOINT			! 0x262
 tl1_ecc:
 	tl1_gen		T_CORRECTED_ECC_ERROR		! 0x263
 tl1_immu_miss:
 	tl1_immu_miss					! 0x264
 tl1_dmmu_miss:
 	tl1_dmmu_miss					! 0x268
 tl1_dmmu_prot:
 	tl1_dmmu_prot					! 0x26c
 	tl1_reserved	16				! 0x270-0x27f
 tl1_spill_0_n:
 	tl1_spill_0_n					! 0x280
 	tl1_spill_bad	1				! 0x284
 tl1_spill_2_n:
 	tl1_spill_2_n					! 0x288
 tl1_spill_3_n:
 	tl1_spill_3_n					! 0x28c
 	tl1_spill_bad	3				! 0x290-0x29b
 tl1_spill_7_n:
 	tl1_spill_7_n					! 0x29c
 tl1_spill_0_o:
 	tl1_spill_0_o					! 0x2a0
 tl1_spill_1_o:
 	tl1_spill_1_o					! 0x2a4
 tl1_spill_2_o:
 	tl1_spill_2_o					! 0x2a8
 	tl1_spill_bad	5				! 0x2ac-0x2bf
 tl1_fill_0_n:
 	tl1_fill_0_n					! 0x2c0
 	tl1_fill_bad	1				! 0x2c4
 tl1_fill_2_n:
 	tl1_fill_2_n					! 0x2c8
 tl1_fill_3_n:
 	tl1_fill_3_n					! 0x2cc
 	tl1_fill_bad	3				! 0x2d0-0x2db
 tl1_fill_7_n:
 	tl1_fill_7_n					! 0x2dc
 	tl1_fill_bad	8				! 0x2e0-0x2ff
 	tl1_reserved	1				! 0x300
 tl1_breakpoint:
 	tl1_gen		T_BREAKPOINT			! 0x301
 	tl1_gen		T_RSTRWP_PHYS			! 0x302
 	tl1_gen		T_RSTRWP_VIRT			! 0x303
 	tl1_reserved	252				! 0x304-0x3ff
 
 	.globl	tl_trap_end
 tl_trap_end:
 	nop
 
 /*
  * User trap entry point
  *
  * void tl0_utrap(u_long type, u_long o1, u_long o2, u_long tar, u_long sfar,
  *     u_long sfsr)
  *
  * This handles redirecting a trap back to usermode as a user trap.  The user
  * program must have first registered a trap handler with the kernel using
  * sysarch(SPARC_UTRAP_INSTALL).  The trap handler is passed enough state
  * for it to return to the trapping code directly, it will not return through
  * the kernel.  The trap type is passed in %o0, all out registers must be
  * passed through to tl0_trap or to usermode untouched.  Note that the
  * parameters passed in out registers may be used by the user trap handler.
  * Do not change the registers they are passed in or you will break the ABI.
  *
  * If the trap type allows user traps, setup state to execute the user trap
  * handler and bounce back to usermode, otherwise branch to tl0_trap.
  */
 ENTRY(tl0_utrap)
 	/*
 	 * Check if the trap type allows user traps.
 	 */
 	cmp	%o0, UT_MAX
 	bge,a,pt %xcc, tl0_trap
 	 nop
 
 	/*
 	 * Load the user trap handler from the utrap table.
 	 */
 	ldx	[PCPU(CURTHREAD)], %l0
 	ldx	[%l0 + TD_PROC], %l0
 	ldx	[%l0 + P_MD + MD_UTRAP], %l0
 	brz,pt	%l0, tl0_trap
 	 sllx	%o0, PTR_SHIFT, %l1
 	ldx	[%l0 + %l1], %l0
 	brz,a,pt %l0, tl0_trap
 	 nop
 
 	/*
 	 * If the save we did on entry to the kernel had to spill a window
 	 * to the pcb, pretend we took a spill trap instead.  Any windows
 	 * that are in the pcb must be copied out or the fill handler will
 	 * not be able to find them, since the user trap handler returns
 	 * directly to the trapping code.  Note that we only support precise
 	 * user traps, which implies that the condition that caused the trap
 	 * in the first place is still valid, so it will occur again when we
 	 * re-execute the trapping instruction.
 	 */
 	ldx	[PCB_REG + PCB_NSAVED], %l1
 	brnz,a,pn %l1, tl0_trap
 	 mov	T_SPILL, %o0
 
 	/*
 	 * Pass %fsr in %l4, %tstate in %l5, %tpc in %l6 and %tnpc in %l7.
 	 * The ABI specifies only %l6 and %l7, but we need to pass %fsr or
 	 * it may be clobbered by an interrupt before the user trap code
 	 * can read it, and we must pass %tstate in order to restore %ccr
 	 * and %asi.  The %fsr must be stored to memory, so we use the
 	 * temporary stack for that.
 	 */
 	rd	%fprs, %l1
 	or	%l1, FPRS_FEF, %l2
 	wr	%l2, 0, %fprs
 	dec	8, ASP_REG
 	stx	%fsr, [ASP_REG]
 	ldx	[ASP_REG], %l4
 	inc	8, ASP_REG
 	wr	%l1, 0, %fprs
 
 	rdpr	%tstate, %l5
 	rdpr	%tpc, %l6
 	rdpr	%tnpc, %l7
 
 	/*
 	 * Setup %tnpc to return to.
 	 */
 	wrpr	%l0, 0, %tnpc
 
 	/*
 	 * Setup %wstate for return, clear WSTATE_TRANSITION.
 	 */
 	rdpr	%wstate, %l1
 	and	%l1, WSTATE_NORMAL_MASK, %l1
 	wrpr	%l1, 0, %wstate
 
 	/*
 	 * Setup %tstate for return, change the saved cwp to point to the
 	 * current window instead of the window at the time of the trap.
 	 */
 	andn	%l5, TSTATE_CWP_MASK, %l1
 	rdpr	%cwp, %l2
 	wrpr	%l1, %l2, %tstate
 
 	/*
 	 * Setup %sp.  Userland processes will crash if this is not setup.
 	 */
 	sub	%fp, CCFSZ, %sp
 
 	/*
 	 * Execute the user trap handler.
 	 */
 	done
 END(tl0_utrap)
 
 /*
  * (Real) User trap entry point
  *
  * void tl0_trap(u_int type, u_long o1, u_long o2, u_long tar, u_long sfsr,
  *     u_int sfsr)
  *
  * The following setup has been performed:
  *	- the windows have been split and the active user window has been saved
  *	  (maybe just to the pcb)
  *	- we are on alternate globals and interrupts are disabled
  *
  * We switch to the kernel stack, build a trapframe, switch to normal
  * globals, enable interrupts and call trap.
  *
  * NOTE: We must be very careful setting up the per-cpu pointer.  We know that
  * it has been pre-set in alternate globals, so we read it from there and setup
  * the normal %g7 *before* enabling interrupts.  This avoids any possibility
  * of cpu migration and using the wrong pcpup.
  */
 ENTRY(tl0_trap)
 	/*
 	 * Force kernel store order.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rd	%y, %l3
 	rd	%fprs, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP,
 	    "tl0_trap: td=%p type=%#x pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%l2, [%g1 + KTR_PARM5]
 	stx	%i6, [%g1 + KTR_PARM6]
 9:
 #endif
 
 1:	and	%l5, WSTATE_NORMAL_MASK, %l5
 	sllx	%l5, WSTATE_OTHER_SHIFT, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 	rdpr	%canrestore, %l6
 	wrpr	%l6, 0, %otherwin
 	wrpr	%g0, 0, %canrestore
 
 	sub	PCB_REG, SPOFF + CCFSZ + TF_SIZEOF, %sp
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o3, [%sp + SPOFF + CCFSZ + TF_TAR]
 	stx	%o4, [%sp + SPOFF + CCFSZ + TF_SFAR]
 	stx	%o5, [%sp + SPOFF + CCFSZ + TF_SFSR]
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_Y]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_FPRS]
 	stx	%l5, [%sp + SPOFF + CCFSZ + TF_WSTATE]
 
 	wr	%g0, FPRS_FEF, %fprs
 	stx	%fsr, [%sp + SPOFF + CCFSZ + TF_FSR]
 	rd	%gsr, %l6
 	stx	%l6, [%sp + SPOFF + CCFSZ + TF_GSR]
 	wr	%g0, 0, %fprs
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	set	tl0_ret - 8, %o7
 	jmpl	%o2, %g0
 	 add	%sp, CCFSZ + SPOFF, %o0
 END(tl0_trap)
 
 /*
  * void tl0_intr(u_int level, u_int mask)
  */
 ENTRY(tl0_intr)
 	/*
 	 * Force kernel store order.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rd	%y, %l3
 	rd	%fprs, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR,
 	    "tl0_intr: td=%p level=%#x pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%l2, [%g1 + KTR_PARM5]
 	stx	%i6, [%g1 + KTR_PARM6]
 9:
 #endif
 
 	wrpr	%o0, 0, %pil
 	wr	%o1, 0, %clear_softint
 
 	and	%l5, WSTATE_NORMAL_MASK, %l5
 	sllx	%l5, WSTATE_OTHER_SHIFT, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 	rdpr	%canrestore, %l6
 	wrpr	%l6, 0, %otherwin
 	wrpr	%g0, 0, %canrestore
 
 	sub	PCB_REG, SPOFF + CCFSZ + TF_SIZEOF, %sp
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_Y]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_FPRS]
 	stx	%l5, [%sp + SPOFF + CCFSZ + TF_WSTATE]
 
 	wr	%g0, FPRS_FEF, %fprs
 	stx	%fsr, [%sp + SPOFF + CCFSZ + TF_FSR]
 	rd	%gsr, %l6
 	stx	%l6, [%sp + SPOFF + CCFSZ + TF_GSR]
 	wr	%g0, 0, %fprs
 
 	mov	%o0, %l3
 	mov	T_INTERRUPT, %o1
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_TYPE]
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	SET(intr_handlers, %l1, %l0)
 	sllx	%l3, IH_SHIFT, %l1
 	ldx	[%l0 + %l1], %l1
 	KASSERT(%l1, "tl0_intr: ih null")
 	call	%l1
 	 add	%sp, CCFSZ + SPOFF, %o0
 
 	/* %l3 contains PIL */
 	SET(intrcnt, %l1, %l2)
 	prefetcha [%l2] ASI_N, 1
 	SET(pil_countp, %l1, %l0)
 	sllx	%l3, 1, %l1
 	lduh	[%l0 + %l1], %l0
 	sllx	%l0, 3, %l0
 	add	%l0, %l2, %l0
 	ldx	[%l0], %l1
 	inc	%l1
 	stx	%l1, [%l0]
 
 	lduw	[PCPU(CNT) + V_INTR], %l0
 	inc	%l0
 	stw	%l0, [PCPU(CNT) + V_INTR]
 
 	ba,a	%xcc, tl0_ret
 	 nop
 END(tl0_intr)
 
 /*
  * Initiate return to usermode.
  *
  * Called with a trapframe on the stack.  The window that was setup in
  * tl0_trap may have been used by "fast" trap handlers that pretend to be
  * leaf functions, so all ins and locals may have been clobbered since
  * then.
  *
  * This code is rather long and complicated.
  */
 ENTRY(tl0_ret)
 	/*
 	 * Check for pending asts atomically with returning.  We must raise
 	 * the PIL before checking, and if no asts are found the PIL must
 	 * remain raised until the retry is executed, or we risk missing asts
 	 * caused by interrupts occurring after the test.  If the PIL is
 	 * lowered, as it is when we call ast, the check must be re-executed.
 	 */
 	wrpr	%g0, PIL_TICK, %pil
 	ldx	[PCPU(CURTHREAD)], %l0
 	lduw	[%l0 + TD_FLAGS], %l1
 	set	TDF_ASTPENDING | TDF_NEEDRESCHED, %l2
 	and	%l1, %l2, %l1
 	brz,a,pt %l1, 1f
 	 nop
 
 	/*
 	 * We have an AST.  Re-enable interrupts and handle it, then restart
 	 * the return sequence.
 	 */
 	wrpr	%g0, 0, %pil
 	call	ast
 	 add	%sp, CCFSZ + SPOFF, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 
 	/*
 	 * Check for windows that were spilled to the pcb and need to be
 	 * copied out.  This must be the last thing that is done before the
 	 * return to usermode.  If there are still user windows in the cpu
 	 * and we call a nested function after this, which causes them to be
 	 * spilled to the pcb, they will not be copied out and the stack will
 	 * be inconsistent.
 	 */
 1:	ldx	[PCB_REG + PCB_NSAVED], %l1
 	brz,a,pt %l1, 2f
 	 nop
 	wrpr	%g0, 0, %pil
 	mov	T_SPILL, %o0
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	call	trap
 	 add	%sp, SPOFF + CCFSZ, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 
 	/*
 	 * Restore the out and most global registers from the trapframe.
 	 * The ins will become the outs when we restore below.
 	 */
 2:	ldx	[%sp + SPOFF + CCFSZ + TF_O0], %i0
 	ldx	[%sp + SPOFF + CCFSZ + TF_O1], %i1
 	ldx	[%sp + SPOFF + CCFSZ + TF_O2], %i2
 	ldx	[%sp + SPOFF + CCFSZ + TF_O3], %i3
 	ldx	[%sp + SPOFF + CCFSZ + TF_O4], %i4
 	ldx	[%sp + SPOFF + CCFSZ + TF_O5], %i5
 	ldx	[%sp + SPOFF + CCFSZ + TF_O6], %i6
 	ldx	[%sp + SPOFF + CCFSZ + TF_O7], %i7
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	/*
 	 * Load everything we need to restore below before disabling
 	 * interrupts.
 	 */
 	ldx	[%sp + SPOFF + CCFSZ + TF_FPRS], %l0
 	ldx	[%sp + SPOFF + CCFSZ + TF_GSR], %l1
 	ldx	[%sp + SPOFF + CCFSZ + TF_TNPC], %l2
 	ldx	[%sp + SPOFF + CCFSZ + TF_TPC], %l3
 	ldx	[%sp + SPOFF + CCFSZ + TF_TSTATE], %l4
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l5
 	ldx	[%sp + SPOFF + CCFSZ + TF_WSTATE], %l6
 
 	/*
 	 * Disable interrupts to restore the special globals.  They are not
 	 * saved and restored for all kernel traps, so an interrupt at the
 	 * wrong time would clobber them.
 	 */
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G6], %g6
 	ldx	[%sp + SPOFF + CCFSZ + TF_G7], %g7
 
 	/*
 	 * Switch to alternate globals.  This frees up some registers we
 	 * can use after the restore changes our window.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Drop %pil to zero.  It must have been zero at the time of the
 	 * trap, since we were in usermode, but it was raised above in
 	 * order to check for asts atomically.  We have interrupts disabled
 	 * so any interrupts will not be serviced until we complete the
 	 * return to usermode.
 	 */
 	wrpr	%g0, 0, %pil
 
 	/*
 	 * Save %fprs in an alternate global so it can be restored after the
 	 * restore instruction below.  If we restore it before the restore,
 	 * and the restore traps we may run for a while with floating point
 	 * enabled in the kernel, which we want to avoid.
 	 */
 	mov	%l0, %g1
 
 	/*
 	 * Restore %fsr and %gsr.  These need floating point enabled in %fprs,
 	 * so we set it temporarily and then clear it.
 	 */
 	wr	%g0, FPRS_FEF, %fprs
 	ldx	[%sp + SPOFF + CCFSZ + TF_FSR], %fsr
 	wr	%l1, 0, %gsr
 	wr	%g0, 0, %fprs
 
 	/*
 	 * Restore program counters.  This could be done after the restore
 	 * but we're out of alternate globals to store them in...
 	 */
 	wrpr	%l2, 0, %tnpc
 	wrpr	%l3, 0, %tpc
 
 	/*
 	 * Save %tstate in an alternate global and clear the %cwp field.  %cwp
 	 * will be affected by the restore below and we need to make sure it
 	 * points to the current window at that time, not the window that was
 	 * active at the time of the trap.
 	 */
 	andn	%l4, TSTATE_CWP_MASK, %g2
 
 	/*
 	 * Save %y in an alternate global.
 	 */
 	mov	%l5, %g4
 
 	/*
 	 * Setup %wstate for return.  We need to restore the user window state
 	 * which we saved in wstate.other when we trapped.  We also need to
 	 * set the transition bit so the restore will be handled specially
 	 * if it traps, use the xor feature of wrpr to do that.
 	 */
 	srlx	%l6, WSTATE_OTHER_SHIFT, %g3
 	wrpr	%g3, WSTATE_TRANSITION, %wstate
 
 	/*
 	 * Setup window management registers for return.  If not all user
 	 * windows were spilled in the kernel %otherwin will be non-zero,
 	 * so we need to transfer it to %canrestore to correctly restore
 	 * those windows.  Otherwise everything gets set to zero and the
 	 * restore below will fill a window directly from the user stack.
 	 */
 	rdpr	%otherwin, %o0
 	wrpr	%o0, 0, %canrestore
 	wrpr	%g0, 0, %otherwin
 	wrpr	%o0, 0, %cleanwin
 
 	/*
 	 * Now do the restore.  If this instruction causes a fill trap which
 	 * fails to fill a window from the user stack, we will resume at
 	 * tl0_ret_fill_end and call back into the kernel.
 	 */
 	restore
 tl0_ret_fill:
 
 	/*
 	 * We made it.  We're back in the window that was active at the time
 	 * of the trap, and ready to return to usermode.
 	 */
 
 	/*
 	 * Restore %frps.  This was saved in an alternate global above.
 	 */
 	wr	%g1, 0, %fprs
 
 	/*
 	 * Fixup %tstate so the saved %cwp points to the current window and
 	 * restore it.
 	 */
 	rdpr	%cwp, %g1
 	wrpr	%g2, %g1, %tstate
 
 	/*
 	 * Restore the user window state.  The transition bit was set above
 	 * for special handling of the restore, this clears it.
 	 */
 	wrpr	%g3, 0, %wstate
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%tpc, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	rdpr	%tnpc, %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	/*
 	 * Restore %y.  Note that the CATR above clobbered it.
 	 */
 	wr	%g4, 0, %y
 
 	/*
 	 * Return to usermode.
 	 */
 	retry
 tl0_ret_fill_end:
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl0_ret: fill magic ps=%#lx ws=%#lx sp=%#lx"
 	    , %l0, %l1, %l2, 7, 8, 9)
 	rdpr	%pstate, %l1
 	stx	%l1, [%l0 + KTR_PARM1]
 	stx	%l6, [%l0 + KTR_PARM2]
 	stx	%sp, [%l0 + KTR_PARM3]
 9:
 
 	/*
 	 * Restore %y clobbered by the CATR.  This was saved in %l5 above.
 	 */
 	wr	%l5, 0, %y
 #endif
 
 	/*
 	 * The restore above caused a fill trap and the fill handler was
 	 * unable to fill a window from the user stack.  The special fill
 	 * handler recognized this and punted, sending us here.  We need
 	 * to carefully undo any state that was restored before the restore
 	 * was executed and call trap again.  Trap will copyin a window
 	 * from the user stack which will fault in the page we need so the
 	 * restore above will succeed when we try again.  If this fails
 	 * the process has trashed its stack, so we kill it.
 	 */
 
 	/*
 	 * Restore the kernel window state.  This was saved in %l6 above, and
 	 * since the restore failed we're back in the same window.
 	 */
 	wrpr	%l6, 0, %wstate
 
 	/*
 	 * Restore the normal globals which have predefined values in the
 	 * kernel.  We clobbered them above restoring the user's globals
 	 * so this is very important.
 	 * XXX PSTATE_ALT must already be set.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 	mov	PCB_REG, %o0
 	mov	PCPU_REG, %o1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 	mov	%o0, PCB_REG
 	mov	%o1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	/*
 	 * Simulate a fill trap and then start the whole return sequence over
 	 * again.  This is special because it only copies in 1 window, not 2
 	 * as we would for a normal failed fill.  This may be the first time
 	 * the process has been run, so there may not be 2 windows worth of
 	 * stack to copyin.
 	 */
 	mov	T_FILL_RET, %o0
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	call	trap
 	 add	%sp, SPOFF + CCFSZ, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 END(tl0_ret)
 
 /*
  * Kernel trap entry point
  *
  * void tl1_trap(u_int type, u_long o1, u_long o2, u_long tar, u_long sfar,
  *     u_int sfsr)
  *
  * This is easy because the stack is already setup and the windows don't need
  * to be split.  We build a trapframe and call trap(), the same as above, but
  * the outs don't need to be saved.
  */
 ENTRY(tl1_trap)
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rdpr	%pil, %l3
 	rd	%y, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_trap: td=%p type=%#lx pil=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	stx	%l3, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%i6, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wrpr	%g0, 1, %tl
 
 	and	%l5, WSTATE_OTHER_MASK, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o3, [%sp + SPOFF + CCFSZ + TF_TAR]
 	stx	%o4, [%sp + SPOFF + CCFSZ + TF_SFAR]
 	stx	%o5, [%sp + SPOFF + CCFSZ + TF_SFSR]
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_PIL]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_Y]
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	set	tl1_ret - 8, %o7
 	jmpl	%o2, %g0
 	 add	%sp, CCFSZ + SPOFF, %o0
 END(tl1_trap)
 
 ENTRY(tl1_ret)
 	ldx	[%sp + SPOFF + CCFSZ + TF_O0], %i0
 	ldx	[%sp + SPOFF + CCFSZ + TF_O1], %i1
 	ldx	[%sp + SPOFF + CCFSZ + TF_O2], %i2
 	ldx	[%sp + SPOFF + CCFSZ + TF_O3], %i3
 	ldx	[%sp + SPOFF + CCFSZ + TF_O4], %i4
 	ldx	[%sp + SPOFF + CCFSZ + TF_O5], %i5
 	ldx	[%sp + SPOFF + CCFSZ + TF_O6], %i6
 	ldx	[%sp + SPOFF + CCFSZ + TF_O7], %i7
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_TSTATE], %l0
 	ldx	[%sp + SPOFF + CCFSZ + TF_TPC], %l1
 	ldx	[%sp + SPOFF + CCFSZ + TF_TNPC], %l2
 	ldx	[%sp + SPOFF + CCFSZ + TF_PIL], %l3
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l4
 
 	set	VM_MIN_PROM_ADDRESS, %l5
 	cmp	%l1, %l5
 	bl,a,pt	%xcc, 1f
 	 nop
 	set	VM_MAX_PROM_ADDRESS, %l5
 	cmp	%l1, %l5
 	bg,a,pt	%xcc, 1f
 	 nop
 
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G6], %g6
 	ldx	[%sp + SPOFF + CCFSZ + TF_G7], %g7
 
 1:	wrpr	%g0, PSTATE_ALT, %pstate
 
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
 	mov	%l4, %g4
 
 	wrpr	%l3, 0, %pil
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
 	rdpr	%cwp, %g2
 	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%tstate, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	rdpr	%tpc, %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wr	%g4, 0, %y
 
 	retry
 END(tl1_ret)
 
 /*
  * void tl1_intr(u_int level, u_int mask)
  */
 ENTRY(tl1_intr)
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rdpr	%pil, %l3
 	rd	%y, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR,
 	    "tl1_intr: td=%p level=%#x pil=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	stx	%l3, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%i6, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wrpr	%o0, 0, %pil
 	wr	%o1, 0, %clear_softint
 
 	wrpr	%g0, 1, %tl
 
 	and	%l5, WSTATE_OTHER_MASK, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_PIL]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_Y]
 
 	mov	%o0, %l7
 	mov	T_INTERRUPT | T_KERNEL, %o1
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_TYPE]
 
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	mov	PCB_REG, %l4
 	mov	PCPU_REG, %l5
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	mov	%l4, PCB_REG
 	mov	%l5, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	SET(intr_handlers, %l5, %l4)
 	sllx	%l7, IH_SHIFT, %l5
 	ldx	[%l4 + %l5], %l5
 	KASSERT(%l5, "tl1_intr: ih null")
 	call	%l5
 	 add	%sp, CCFSZ + SPOFF, %o0
 
 	/* %l7 contains PIL */
 	SET(intrcnt, %l5, %l4)
 	prefetcha [%l4] ASI_N, 1
 	SET(pil_countp, %l5, %l6)
 	sllx	%l7, 1, %l5
 	lduh	[%l5 + %l6], %l5
 	sllx	%l5, 3, %l5
 	add	%l5, %l4, %l4
 	ldx	[%l4], %l5
 	inc	%l5
 	stx	%l5, [%l4]
 
 	lduw	[PCPU(CNT) + V_INTR], %l4
 	inc	%l4
 	stw	%l4, [PCPU(CNT) + V_INTR]
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l4
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
 	mov	%l4, %g4
 	wrpr	%l3, 0, %pil
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
 	rdpr	%cwp, %g2
 	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%tstate, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	rdpr	%tpc, %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wr	%g4, 0, %y
 
 	retry
 END(tl1_intr)
 
 	.globl	tl_text_end
 tl_text_end:
 	nop
 
 /*
  * Freshly forked processes come here when switched to for the first time.
  * The arguments to fork_exit() have been setup in the locals, we must move
  * them to the outs.
  */
 ENTRY(fork_trampoline)
 #if KTR_COMPILE & KTR_PROC
 	CATR(KTR_PROC, "fork_trampoline: td=%p (%s) cwp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[%g2 + TD_PROC], %g2
 	add	%g2, P_COMM, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%cwp, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 9:
 #endif
 	mov	%l0, %o0
 	mov	%l1, %o1
 	call	fork_exit
 	 mov	%l2, %o2
 	ba,a	%xcc, tl0_ret
 	 nop
 END(fork_trampoline)
Index: head/sys/sparc64/sparc64/stack_machdep.c
===================================================================
--- head/sys/sparc64/sparc64/stack_machdep.c	(revision 285626)
+++ head/sys/sparc64/sparc64/stack_machdep.c	(revision 285627)
@@ -1,88 +1,90 @@
 /*-
  * Copyright (c) 2005 Antoine Brodin
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/stack.h>
 #include <sys/systm.h>
 
 #include <machine/pcb.h>
 #include <machine/stack.h>
 #include <machine/vmparam.h>
 
 static void stack_capture(struct stack *st, struct frame *frame);
 
 static void
 stack_capture(struct stack *st, struct frame *frame)
 {
 	struct frame *fp;
 	vm_offset_t callpc;
 
 	stack_zero(st);
 	fp = frame;
 	for (;;) {
 		if (!INKERNEL((vm_offset_t)fp) ||
 		    !ALIGNED_POINTER(fp, uint64_t))
                         break;
 		callpc = fp->fr_pc;
 		if (!INKERNEL(callpc))
 			break;
 		/* Don't bother traversing trap frames. */
 		if ((callpc > (uint64_t)tl_trap_begin &&
 		    callpc < (uint64_t)tl_trap_end) ||
 		    (callpc > (uint64_t)tl_text_begin &&
 		    callpc < (uint64_t)tl_text_end))
 			break;
 		if (stack_put(st, callpc) == -1)
 			break;
 		if (v9next_frame(fp) <= fp ||
 		    v9next_frame(fp) >= frame + KSTACK_PAGES * PAGE_SIZE)
 			break;
 		fp = v9next_frame(fp);
 	}
 }
 
 void
 stack_save_td(struct stack *st, struct thread *td)
 {
 
 	if (TD_IS_SWAPPED(td))
 		panic("stack_save_td: swapped");
 	if (TD_IS_RUNNING(td))
 		panic("stack_save_td: running");
 
 	stack_capture(st, (struct frame *)(td->td_pcb->pcb_sp + SPOFF));
 }
 
 void
 stack_save(struct stack *st)
 {
 
 	stack_capture(st, (struct frame *)__builtin_frame_address(1));
 }
Index: head/sys/sparc64/sparc64/support.S
===================================================================
--- head/sys/sparc64/sparc64/support.S	(revision 285626)
+++ head/sys/sparc64/sparc64/support.S	(revision 285627)
@@ -1,960 +1,962 @@
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kstack_pages.h"
+
 #include <sys/errno.h>
 
 #include <machine/asi.h>
 #include <machine/asmacros.h>
 #include <machine/fsr.h>
 #include <machine/intr_machdep.h>
 #include <machine/pcb.h>
 #include <machine/pstate.h>
 #include <machine/wstate.h>
 
 #include "assym.s"
 
 	.register %g2, #ignore
 	.register %g3, #ignore
 	.register %g6, #ignore
 
 /*
  * Common code for copy routines.
  *
  * We use large macros to generate functions for each of the copy routines.
  * This allows the load and store instructions to be generated for the right
  * operation, asi or not.  It is possible to write an asi independent function
  * but this would require 2 expensive wrs in the main loop to switch %asi.
  * It would also screw up profiling (if we ever get it), but may save some I$.
  * We assume that either one of dasi and sasi is empty, or that they are both
  * the same (empty or non-empty).  It is up to the caller to set %asi.
  */
 
 /*
  * ASI independent implementation of copystr(9).
  * Used to implement copyinstr() and copystr().
  *
  * Return value is in %g1.
  */
 #define	_COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \
 	brz	len, 4f ; \
 	 mov	src, %g2 ; \
 1:	deccc	1, len ; \
 	bl,a,pn	%xcc, 3f ; \
 	 nop ; \
 	LD(ub, sa) [src] sasi, %g1 ; \
 	ST(b, da) %g1, [dst] dasi ; \
 	brz,pn	%g1, 3f ; \
 	 inc	src ; \
 	ba	%xcc, 1b ; \
 	 inc	dst ; \
 2:	mov	ENAMETOOLONG, %g1 ; \
 3:	sub	src, %g2, %g2 ; \
 	brnz,a	done, 4f ; \
 	 stx	%g2, [done] ; \
 4:
 
 /*
  * ASI independent implementation of memset(3).
  * Used to implement bzero(), memset() and aszero().
  *
  * If the pattern is non-zero, duplicate it to fill 64 bits.
  * Store bytes until dst is 8-byte aligned, then store 8 bytes.
  * It has yet to be determined how much unrolling is beneficial.
  * Could also read and compare before writing to minimize snoop traffic.
  *
  * XXX bzero() should be implemented as
  * #define bzero(dst, len) (void)memset((dst), 0, (len))
  * if at all.
  */
 #define	_MEMSET(dst, pat, len, da, dasi) \
 	brlez,pn len, 5f ; \
 	 and	pat, 0xff, pat ; \
 	brz,pt	pat, 1f ; \
 	 sllx	pat, 8, %g1 ; \
 	or	pat, %g1, pat ; \
 	sllx	pat, 16, %g1 ; \
 	or	pat, %g1, pat ; \
 	sllx	pat, 32, %g1 ; \
 	or	pat, %g1, pat ; \
 	.align 16 ; \
 1:	deccc	1, len ; \
 	bl,pn	%xcc, 5f ; \
 	 btst	7, dst ; \
 	bz,a,pt	%xcc, 2f ; \
 	 inc	1, len ; \
 	ST(b, da) pat, [dst] dasi ; \
 	ba	%xcc, 1b ; \
 	 inc	dst ; \
 	.align 16 ; \
 2:	deccc	32, len ; \
 	bl,a,pn	%xcc, 3f ; \
 	 inc	32, len ; \
 	ST(x, da) pat, [dst] dasi ; \
 	ST(x, da) pat, [dst + 8] dasi ; \
 	ST(x, da) pat, [dst + 16] dasi ; \
 	ST(x, da) pat, [dst + 24] dasi ; \
 	ba	%xcc, 2b ; \
 	 inc	32, dst ; \
 	.align 16 ; \
 3:	deccc	8, len ; \
 	bl,a,pn	%xcc, 4f ; \
 	 inc	8, len ; \
 	ST(x, da) pat, [dst] dasi ; \
 	ba	%xcc, 3b ; \
 	 inc	8, dst ; \
 	.align 16 ; \
 4:	deccc	1, len ; \
 	bl,a,pn	%xcc, 5f ; \
 	 nop ; \
 	ST(b, da) pat, [dst] dasi ; \
 	ba	%xcc, 4b ; \
 	 inc	1, dst ; \
 5:
 
 /*
  * ASI independent implementation of memcpy(3).
  * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(),
  * ascopyfrom() and ascopyto().
  *
  * Transfer bytes until dst is 8-byte aligned.  If src is then also 8 byte
  * aligned, transfer 8 bytes, otherwise finish with bytes.  The unaligned
  * case could be optimized, but it is expected that this is the uncommon
  * case and of questionable value.  The code to do so is also rather large
  * and ugly.  It has yet to be determined how much unrolling is beneficial.
  *
  * XXX bcopy() must also check for overlap.  This is stupid.
  * XXX bcopy() should be implemented as
  * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len))
  * if at all.
  */
 #define	_MEMCPY(dst, src, len, da, dasi, sa, sasi) \
 1:	deccc	1, len ; \
 	bl,pn	%xcc, 6f ; \
 	 btst	7, dst ; \
 	bz,a,pt	%xcc, 2f ; \
 	 inc	1, len ; \
 	LD(ub, sa) [src] sasi, %g1 ; \
 	ST(b, da) %g1, [dst] dasi ; \
 	inc	1, src ; \
 	ba	%xcc, 1b ; \
 	 inc	1, dst ; \
 	.align 16 ; \
 2:	btst	7, src ; \
 	bz,a,pt	%xcc, 3f ; \
 	 nop ; \
 	ba,a	%xcc, 5f ; \
 	.align 16 ; \
 3:	deccc	32, len ; \
 	bl,a,pn	%xcc, 4f ; \
 	 inc	32, len ; \
 	LD(x, sa) [src] sasi, %g1 ; \
 	LD(x, sa) [src + 8] sasi, %g2 ; \
 	LD(x, sa) [src + 16] sasi, %g3 ; \
 	LD(x, sa) [src + 24] sasi, %g4 ; \
 	ST(x, da) %g1, [dst] dasi ; \
 	ST(x, da) %g2, [dst + 8] dasi ; \
 	ST(x, da) %g3, [dst + 16] dasi ; \
 	ST(x, da) %g4, [dst + 24] dasi ; \
 	inc	32, src ; \
 	ba	%xcc, 3b ; \
 	 inc	32, dst ; \
 	.align 16 ; \
 4:	deccc	8, len ; \
 	bl,a,pn	%xcc, 5f ; \
 	 inc	8, len ; \
 	LD(x, sa) [src] sasi, %g1 ; \
 	ST(x, da) %g1, [dst] dasi ; \
 	inc	8, src ; \
 	ba	%xcc, 4b ; \
 	 inc	8, dst ; \
 	.align 16 ; \
 5:	deccc	1, len ; \
 	bl,a,pn	%xcc, 6f ; \
 	 nop ; \
 	LD(ub, sa) [src] sasi, %g1 ; \
 	ST(b, da) %g1, [dst] dasi ; \
 	inc	src ; \
 	ba	%xcc, 5b ; \
 	 inc	dst ; \
 6:
 
 /*
  * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len)
  */
 ENTRY(ascopy)
 	wr	%o0, 0, %asi
 	_MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi)
 	retl
 	 nop
 END(ascopy)
 
 /*
  * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len)
  */
 ENTRY(ascopyfrom)
 	wr	%o0, 0, %asi
 	_MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi)
 	retl
 	 nop
 END(ascopyfrom)
 
 /*
  * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len)
  */
 ENTRY(ascopyto)
 	wr	%o1, 0, %asi
 	_MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY)
 	retl
 	 nop
 END(ascopyto)
 
 /*
  * void aszero(u_long asi, vm_offset_t pa, size_t len)
  */
 ENTRY(aszero)
 	wr	%o0, 0, %asi
 	_MEMSET(%o1, %g0, %o2, a, %asi)
 	retl
 	 nop
 END(aszero)
 
 /*
  * int bcmp(const void *b1, const void *b2, size_t len)
  */
 ENTRY(bcmp)
 	brz,pn	%o2, 2f
 	 clr	%o3
 1:	ldub	[%o0 + %o3], %o4
 	ldub	[%o1 + %o3], %o5
 	cmp	%o4, %o5
 	bne,pn	%xcc, 2f
 	 inc	%o3
 	deccc	%o2
 	bne,pt	%xcc, 1b
 	 nop
 2:	retl
 	 mov	%o2, %o0
 END(bcmp)
 
 /*
  * void bcopy(const void *src, void *dst, size_t len)
  */
 ENTRY(bcopy)
 	/*
 	 * Check for overlap, and copy backwards if so.
 	 */
 	sub	%o1, %o0, %g1
 	cmp	%g1, %o2
 	bgeu,a,pt %xcc, 3f
 	 nop
 
 	/*
 	 * Copy backwards.
 	 */
 	add	%o0, %o2, %o0
 	add	%o1, %o2, %o1
 1:	deccc	1, %o2
 	bl,a,pn	%xcc, 2f
 	 nop
 	dec	1, %o0
 	ldub	[%o0], %g1
 	dec	1, %o1
 	ba	%xcc, 1b
 	 stb	%g1, [%o1]
 2:	retl
 	 nop
 
 	/*
 	 * Do the fast version.
 	 */
 3:	_MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
 	retl
 	 nop
 END(bcopy)
 
 /*
  * void bzero(void *b, size_t len)
  */
 ENTRY(bzero)
 	_MEMSET(%o0, %g0, %o1, EMPTY, EMPTY)
 	retl
 	 nop
 END(bzero)
 
 /*
  * int copystr(const void *src, void *dst, size_t len, size_t *done)
  */
 ENTRY(copystr)
 	_COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY)
 	retl
 	 mov	%g1, %o0
 END(copystr)
 
 /*
  * void *memcpy(void *dst, const void *src, size_t len)
  */
 ENTRY(memcpy)
 	mov	%o0, %o3
 	_MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY)
 	retl
 	 nop
 END(memcpy)
 
 /*
  * void *memset(void *b, int c, size_t len)
  */
 ENTRY(memset)
 	mov	%o0, %o3
 	_MEMSET(%o3, %o1, %o2, EMPTY, EMPTY)
 	retl
 	 nop
 END(memset)
 
 	.globl	copy_nofault_begin
 copy_nofault_begin:
 	nop
 
 /*
  * int copyin(const void *uaddr, void *kaddr, size_t len)
  */
 ENTRY(copyin)
 	wr	%g0, ASI_AIUP, %asi
 	_MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi)
 	retl
 	 clr	%o0
 END(copyin)
 
 /*
  * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done)
  */
 ENTRY(copyinstr)
 	wr	%g0, ASI_AIUP, %asi
 	_COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY)
 	retl
 	 mov	%g1, %o0
 END(copyinstr)
 
 /*
  * int copyout(const void *kaddr, void *uaddr, size_t len)
  */
 ENTRY(copyout)
 	wr	%g0, ASI_AIUP, %asi
 	_MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY)
 	retl
 	 clr	%o0
 END(copyout)
 
 	.globl	copy_nofault_end
 copy_nofault_end:
 	nop
 
 ENTRY(copy_fault)
 	retl
 	 mov	EFAULT, %o0
 END(copy_fault)
 
 	.globl	fs_nofault_begin
 fs_nofault_begin:
 	nop
 
 /*
  * Chatty aliases for fetch, store functions.
  */
 	.globl	fubyte, fusword, fuword, subyte, susword, suword
 	.set	fubyte, fuword8
 	.set	fusword, fuword16
 	.set	fuword, fuword64
 	.set	subyte, suword8
 	.set	susword, suword16
 	.set	suword, suword64
 
 	.globl	casuword32, casuword, fuptr, suptr
 	.set	casuword, casuword64
 	.set	fuptr, fuword64
 	.set	suptr, suword64
 
 /*
  * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s)
  */
 ENTRY(casuword32)
 	casa	[%o0] ASI_AIUP, %o1, %o2
 	retl
 	 mov	%o2, %o0
 END(casuword32)
 
 /*
  * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s)
  */
 ENTRY(casuword64)
 	casxa	[%o0] ASI_AIUP, %o1, %o2
 	retl
 	 mov	%o2, %o0
 END(casuword64)
 
 /*
  * int fuword8(const void *base)
  */
 ENTRY(fuword8)
 	retl
 	 lduba	[%o0] ASI_AIUP, %o0
 END(fuword8)
 
 /*
  * int fuword16(const void *base)
  */
 ENTRY(fuword16)
 	retl
 	 lduha	[%o0] ASI_AIUP, %o0
 END(fuword16)
 
 /*
  * int32_t fuword32(const void *base)
  */
 ENTRY(fuword32)
 	retl
 	 lduwa	[%o0] ASI_AIUP, %o0
 END(fuword32)
 
 /*
  * int64_t fuword64(const void *base)
  */
 ENTRY(fuword64)
 	retl
 	 ldxa	[%o0] ASI_AIUP, %o0
 END(fuword64)
 
 /*
  * int suword8(const void *base, int word)
  */
 ENTRY(suword8)
 	stba	%o1, [%o0] ASI_AIUP
 	retl
 	 clr	%o0
 END(suword8)
 
 /*
  * int suword16(const void *base, int word)
  */
 ENTRY(suword16)
 	stha	%o1, [%o0] ASI_AIUP
 	retl
 	 clr	%o0
 END(suword16)
 
 /*
  * int suword32(const void *base, int32_t word)
  */
 ENTRY(suword32)
 	stwa	%o1, [%o0] ASI_AIUP
 	retl
 	 clr	%o0
 END(suword32)
 
 /*
  * int suword64(const void *base, int64_t word)
  */
 ENTRY(suword64)
 	stxa	%o1, [%o0] ASI_AIUP
 	retl
 	 clr	%o0
 END(suword64)
 
 	.globl	fs_nofault_intr_begin
 fs_nofault_intr_begin:
 	nop
 
 /*
  * int fuswintr(const void *base)
  */
 ENTRY(fuswintr)
 	retl
 	 lduha	[%o0] ASI_AIUP, %o0
 END(fuswintr)
 
 /*
  * int suswintr(const void *base, int word)
  */
 ENTRY(suswintr)
 	stha	%o1, [%o0] ASI_AIUP
 	retl
 	 clr	%o0
 END(suswintr)
 
 	.globl	fs_nofault_intr_end
 fs_nofault_intr_end:
 	nop
 
 	.globl	fs_nofault_end
 fs_nofault_end:
 	nop
 
 ENTRY(fs_fault)
 	retl
 	 mov	-1, %o0
 END(fs_fault)
 
 	.globl	fas_nofault_begin
 fas_nofault_begin:
 
 /*
  * int fasword8(u_long asi, uint64_t addr, uint8_t *val)
  */
 ENTRY(fasword8)
 	wr	%o0, 0, %asi
 	membar	#Sync
 	lduba	[%o1] %asi, %o3
 	membar	#Sync
 	stb	%o3, [%o2]
 	retl
 	 clr	%o0
 END(fasword8)
 
 /*
  * int fasword16(u_long asi, uint64_t addr, uint16_t *val)
  */
 ENTRY(fasword16)
 	wr	%o0, 0, %asi
 	membar	#Sync
 	lduha	[%o1] %asi, %o3
 	membar	#Sync
 	sth	%o3, [%o2]
 	retl
 	 clr	%o0
 END(fasword16)
 
 /*
  * int fasword32(u_long asi, uint64_t addr, uint32_t *val)
  */
 ENTRY(fasword32)
 	wr	%o0, 0, %asi
 	membar	#Sync
 	lduwa	[%o1] %asi, %o3
 	membar	#Sync
 	stw	%o3, [%o2]
 	retl
 	 clr	%o0
 END(fasword32)
 
 	.globl	fas_nofault_end
 fas_nofault_end:
 	nop
 
 	.globl	fas_fault
 ENTRY(fas_fault)
 	retl
 	 mov	-1, %o0
 END(fas_fault)
 
 	.globl	fpu_fault_begin
 fpu_fault_begin:
 	nop
 
 /*
  * void spitfire_block_copy(void *src, void *dst, size_t len)
  */
 ENTRY(spitfire_block_copy)
 	rdpr	%pstate, %o3
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	wr	%g0, ASI_BLK_S, %asi
 	wr	%g0, FPRS_FEF, %fprs
 
 	sub	PCB_REG, TF_SIZEOF, %o4
 	ldx	[%o4 + TF_FPRS], %o5
 	andcc	%o5, FPRS_FEF, %g0
 	bz,a,pt	%xcc, 1f
 	 nop
 	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
 	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
 	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
 	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
 	membar	#Sync
 
 	andn	%o5, FPRS_FEF, %o5
 	stx	%o5, [%o4 + TF_FPRS]
 	ldx	[PCB_REG + PCB_FLAGS], %o4
 	or	%o4, PCB_FEF, %o4
 	stx	%o4, [PCB_REG + PCB_FLAGS]
 
 1:	wrpr	%o3, 0, %pstate
 
 	ldda	[%o0] %asi, %f0
 	add	%o0, VIS_BLOCKSIZE, %o0
 	sub	%o2, VIS_BLOCKSIZE, %o2
 
 2:	ldda	[%o0] %asi, %f16
 	fsrc1	%f0, %f32
 	fsrc1	%f2, %f34
 	fsrc1	%f4, %f36
 	fsrc1	%f6, %f38
 	fsrc1	%f8, %f40
 	fsrc1	%f10, %f42
 	fsrc1	%f12, %f44
 	fsrc1	%f14, %f46
 	stda	%f32, [%o1] %asi
 	add	%o0, VIS_BLOCKSIZE, %o0
 	subcc	%o2, VIS_BLOCKSIZE, %o2
 	bz,pn	%xcc, 3f
 	 add	%o1, VIS_BLOCKSIZE, %o1
 	ldda	[%o0] %asi, %f0
 	fsrc1	%f16, %f32
 	fsrc1	%f18, %f34
 	fsrc1	%f20, %f36
 	fsrc1	%f22, %f38
 	fsrc1	%f24, %f40
 	fsrc1	%f26, %f42
 	fsrc1	%f28, %f44
 	fsrc1	%f30, %f46
 	stda	%f32, [%o1] %asi
 	add	%o0, VIS_BLOCKSIZE, %o0
 	sub	%o2, VIS_BLOCKSIZE, %o2
 	ba,pt	%xcc, 2b
 	 add	%o1, VIS_BLOCKSIZE, %o1
 
 3:	membar	#Sync
 
 	stda	%f16, [%o1] %asi
 	membar	#Sync
 
 	retl
 	 wr	%g0, 0, %fprs
 END(spitfire_block_copy)
 
 /*
  * void zeus_block_copy(void *src, void *dst, size_t len)
  */
 ENTRY(zeus_block_copy)
 	prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0
 
 	rdpr	%pstate, %o3
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	wr	%g0, ASI_BLK_S, %asi
 	wr	%g0, FPRS_FEF, %fprs
 
 	sub	PCB_REG, TF_SIZEOF, %o4
 	ldx	[%o4 + TF_FPRS], %o5
 	andcc	%o5, FPRS_FEF, %g0
 	bz,a,pt	%xcc, 1f
 	 nop
 	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
 	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
 	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
 	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
 	membar	#Sync
 
 	andn	%o5, FPRS_FEF, %o5
 	stx	%o5, [%o4 + TF_FPRS]
 	ldx	[PCB_REG + PCB_FLAGS], %o4
 	or	%o4, PCB_FEF, %o4
 	stx	%o4, [PCB_REG + PCB_FLAGS]
 
 1:	wrpr	%o3, 0, %pstate
 
 	ldd	[%o0 + (0 * 8)], %f0
 	prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0
 	ldd	[%o0 + (1 * 8)], %f2
 	prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0
 	fmovd	%f0, %f32
 	ldd	[%o0 + (2 * 8)], %f4
 	prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
 	fmovd	%f2, %f34
 	ldd	[%o0 + (3 * 8)], %f6
 	prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1
 	fmovd	%f4, %f36
 	ldd	[%o0 + (4 * 8)], %f8
 	prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1
 	fmovd	%f6, %f38
 	ldd	[%o0 + (5 * 8)], %f10
 	prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
 	fmovd	%f8, %f40
 	ldd	[%o0 + (6 * 8)], %f12
 	prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1
 	fmovd	%f10, %f42
 	ldd	[%o0 + (7 * 8)], %f14
 	ldd	[%o0 + (8 * 8)], %f0
 	sub	%o2, VIS_BLOCKSIZE, %o2
 	add	%o0, VIS_BLOCKSIZE, %o0
 	prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1
 	ba,pt	%xcc, 2f
 	 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1
 	.align	32
 
 2:	ldd	[%o0 + (1 * 8)], %f2
 	fmovd	%f12, %f44
 	ldd	[%o0 + (2 * 8)], %f4
 	fmovd	%f14, %f46
 	stda	%f32, [%o1] %asi
 	ldd	[%o0 + (3 * 8)], %f6
 	fmovd	%f0, %f32
 	ldd	[%o0 + (4 * 8)], %f8
 	fmovd	%f2, %f34
 	ldd	[%o0 + (5 * 8)], %f10
 	fmovd	%f4, %f36
 	ldd	[%o0 + (6 * 8)], %f12
 	fmovd	%f6, %f38
 	ldd	[%o0 + (7 * 8)], %f14
 	fmovd	%f8, %f40
 	ldd	[%o0 + (8 * 8)], %f0
 	fmovd	%f10, %f42
 	sub	%o2, VIS_BLOCKSIZE, %o2
 	prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0
 	add	%o1, VIS_BLOCKSIZE, %o1
 	prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1
 	add	%o0, VIS_BLOCKSIZE, %o0
 	cmp	%o2, VIS_BLOCKSIZE + 8
 	bgu,pt	%xcc, 2b
 	 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1
 	ldd	[%o0 + (1 * 8)], %f2
 	fsrc1	%f12, %f44
 	ldd	[%o0 + (2 * 8)], %f4
 	fsrc1	%f14, %f46
 	stda	%f32, [%o1] %asi
 	ldd	[%o0 + (3 * 8)], %f6
 	fsrc1	%f0, %f32
 	ldd	[%o0 + (4 * 8)], %f8
 	fsrc1	%f2, %f34
 	ldd	[%o0 + (5 * 8)], %f10
 	fsrc1	%f4, %f36
 	ldd	[%o0 + (6 * 8)], %f12
 	fsrc1	%f6, %f38
 	ldd	[%o0 + (7 * 8)], %f14
 	fsrc1	%f8, %f40
 	add	%o1, VIS_BLOCKSIZE, %o1
 	fsrc1	%f10, %f42
 	fsrc1	%f12, %f44
 	fsrc1	%f14, %f46
 	stda	%f32, [%o1] %asi
 	membar	#Sync
 
 	retl
 	 wr	%g0, 0, %fprs
 END(zeus_block_copy)
 
 /*
  * void spitfire_block_zero(void *dst, size_t len)
  * void zeus_block_zero(void *dst, size_t len)
  */
 ALTENTRY(zeus_block_zero)
 ENTRY(spitfire_block_zero)
 	rdpr	%pstate, %o3
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	wr	%g0, ASI_BLK_S, %asi
 	wr	%g0, FPRS_FEF, %fprs
 
 	sub	PCB_REG, TF_SIZEOF, %o4
 	ldx	[%o4 + TF_FPRS], %o5
 	andcc	%o5, FPRS_FEF, %g0
 	bz,a,pt	%xcc, 1f
 	 nop
 	stda	%f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi
 	stda	%f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi
 	stda	%f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi
 	stda	%f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi
 	membar	#Sync
 
 	andn	%o5, FPRS_FEF, %o5
 	stx	%o5, [%o4 + TF_FPRS]
 	ldx	[PCB_REG + PCB_FLAGS], %o4
 	or	%o4, PCB_FEF, %o4
 	stx	%o4, [PCB_REG + PCB_FLAGS]
 
 1:	wrpr	%o3, 0, %pstate
 
 	fzero	%f0
 	fzero	%f2
 	fzero	%f4
 	fzero	%f6
 	fzero	%f8
 	fzero	%f10
 	fzero	%f12
 	fzero	%f14
 
 1:	stda	%f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi
 	stda	%f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi
 	stda	%f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi
 	stda	%f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi
 	sub	%o1, (4 * VIS_BLOCKSIZE), %o1
 	brnz,pt	%o1, 1b
 	 add	%o0, (4 * VIS_BLOCKSIZE), %o0
 	membar	#Sync
 
 	retl
 	 wr	%g0, 0, %fprs
 END(spitfire_block_zero)
 
 	.globl	fpu_fault_end
 fpu_fault_end:
 	nop
 
 	.globl	fpu_fault_size
 	.set	fpu_fault_size, fpu_fault_end - fpu_fault_begin
 
 ENTRY(longjmp)
 	set	1, %g3
 	movrz	%o1, %o1, %g3
 	mov	%o0, %g1
 	ldx	[%g1 + _JB_FP], %g2
 1:	cmp	%fp, %g2
 	bl,a,pt	%xcc, 1b
 	 restore
 	bne,pn	%xcc, 2f
 	 ldx	[%g1 + _JB_SP], %o2
 	cmp	%o2, %sp
 	blt,pn	%xcc, 2f
 	 movge	%xcc, %o2, %sp
 	ldx	[%g1 + _JB_PC], %o7
 	retl
 	 mov	%g3, %o0
 2:	PANIC("longjmp botch", %l1)
 END(longjmp)
 
 ENTRY(setjmp)
 	stx	%sp, [%o0 + _JB_SP]
 	stx	%o7, [%o0 + _JB_PC]
 	stx	%fp, [%o0 + _JB_FP]
 	retl
 	 clr	%o0
 END(setjmp)
 
 /*
  * void ofw_entry(cell_t args[])
  */
 ENTRY(ofw_entry)
 	save	%sp, -CCFSZ, %sp
 	SET(ofw_vec, %l7, %l6)
 	ldx	[%l6], %l6
 	rdpr	%pstate, %l7
 	andn	%l7, PSTATE_AM | PSTATE_IE, %l5
 	wrpr	%l5, 0, %pstate
 	SET(tba_taken_over, %l5, %l4)
 	brz,pn	%l4, 1f
 	 rdpr	%wstate, %l5
 	andn	%l5, WSTATE_PROM_MASK, %l3
 	wrpr	%l3, WSTATE_PROM_KMIX, %wstate
 1:	call	%l6
 	 mov	%i0, %o0
 	brz,pn	%l4, 1f
 	 nop
 	wrpr	%g0, %l5, %wstate
 1:	wrpr	%l7, 0, %pstate
 	ret
 	 restore %o0, %g0, %o0
 END(ofw_entry)
 
 /*
  * void ofw_exit(cell_t args[])
  */
 ENTRY(ofw_exit)
 	save	%sp, -CCFSZ, %sp
 	flushw
 	SET(ofw_tba, %l7, %l5)
 	ldx	[%l5], %l5
 	rdpr	%pstate, %l7
 	andn	%l7, PSTATE_AM | PSTATE_IE, %l7
 	wrpr	%l7, 0, %pstate
 	rdpr	%wstate, %l7
 	andn	%l7, WSTATE_PROM_MASK, %l7
 	wrpr	%l7, WSTATE_PROM_KMIX, %wstate
 	wrpr	%l5, 0, %tba			! restore the OFW trap table
 	SET(ofw_vec, %l7, %l6)
 	ldx	[%l6], %l6
 	SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0)
 	sub	%l0, SPOFF, %fp			! setup a stack in a locked page
 	sub	%l0, SPOFF + CCFSZ, %sp
 	mov	AA_DMMU_PCXR, %l3		! force primary DMMU context 0
 	sethi	%hi(KERNBASE), %l5
 	stxa	%g0, [%l3] ASI_DMMU
 	flush	%l5
 	wrpr	%g0, 0, %tl			! force trap level 0
 	call	%l6
 	 mov	%i0, %o0
 	! never to return
 END(ofw_exit)
 
 #ifdef GPROF
 
 ENTRY(user)
 	nop
 
 ENTRY(btrap)
 	nop
 
 ENTRY(etrap)
 	nop
 
 ENTRY(bintr)
 	nop
 
 ENTRY(eintr)
 	nop
 
 /*
  * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t
  * badness.
  */
 #define	GM_STATE	0x0
 #define	GMON_PROF_OFF	3
 #define	GMON_PROF_HIRES	4
 
 	.globl	_mcount
 	.set	_mcount, __cyg_profile_func_enter
 
 ENTRY(__cyg_profile_func_enter)
 	SET(_gmonparam, %o3, %o2)
 	lduw	[%o2 + GM_STATE], %o3
 	cmp	%o3, GMON_PROF_OFF
 	be,a,pn %icc, 1f
 	 nop
 	SET(mcount, %o3, %o2)
 	jmpl	%o2, %g0
 	 nop
 1:	retl
 	 nop
 END(__cyg_profile_func_enter)
 
 #ifdef GUPROF
 
 ENTRY(__cyg_profile_func_exit)
 	SET(_gmonparam, %o3, %o2)
 	lduw	[%o2 + GM_STATE], %o3
 	cmp	%o3, GMON_PROF_HIRES
 	be,a,pn %icc, 1f
 	 nop
 	SET(mexitcount, %o3, %o2)
 	jmpl	%o2, %g0
 	 nop
 1:	retl
 	 nop
 END(__cyg_profile_func_exit)
 
 #endif /* GUPROF */
 
 #endif /* GPROF */
Index: head/sys/x86/xen/pv.c
===================================================================
--- head/sys/x86/xen/pv.c	(revision 285626)
+++ head/sys/x86/xen/pv.c	(revision 285627)
@@ -1,442 +1,443 @@
 /*
  * Copyright (c) 2004 Christian Limpach.
  * Copyright (c) 2004-2006,2008 Kip Macy
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2013 Roger Pau Monné <roger.pau@citrix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
+#include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/reboot.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/boot.h>
 #include <sys/ctype.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/init.h>
 #include <machine/pc/bios.h>
 #include <machine/smp.h>
 #include <machine/intr_machdep.h>
 #include <machine/metadata.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xenstore/xenstorevar.h>
 #include <xen/xen_pv.h>
 #include <xen/xen_msi.h>
 
 #include <xen/interface/vcpu.h>
 
 #include <dev/xen/timer/timer.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 /* Native initial function */
 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
 /* Xen initial function */
 uint64_t hammer_time_xen(start_info_t *, uint64_t);
 
 #define MAX_E820_ENTRIES	128
 
 /*--------------------------- Forward Declarations ---------------------------*/
 static caddr_t xen_pv_parse_preload_data(u_int64_t);
 static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *);
 
 #ifdef SMP
 static int xen_pv_start_all_aps(void);
 #endif
 
 /*---------------------------- Extern Declarations ---------------------------*/
 #ifdef SMP
 /* Variables used by amd64 mp_machdep to start APs */
 extern struct mtx ap_boot_mtx;
 extern void *bootstacks[];
 extern char *doublefault_stack;
 extern char *nmi_stack;
 extern void *dpcpu;
 extern int bootAP;
 extern char *bootSTK;
 #endif
 
 /*
  * Placed by the linker at the end of the bss section, which is the last
  * section loaded by Xen before loading the symtab and strtab.
  */
 extern uint32_t end;
 
 /*-------------------------------- Global Data -------------------------------*/
 /* Xen init_ops implementation. */
 struct init_ops xen_init_ops = {
 	.parse_preload_data		= xen_pv_parse_preload_data,
 	.early_clock_source_init	= xen_clock_init,
 	.early_delay			= xen_delay,
 	.parse_memmap			= xen_pv_parse_memmap,
 #ifdef SMP
 	.start_all_aps			= xen_pv_start_all_aps,
 #endif
 	.msi_init =			xen_msi_init,
 };
 
 static struct bios_smap xen_smap[MAX_E820_ENTRIES];
 
 /*-------------------------------- Xen PV init -------------------------------*/
 /*
  * First function called by the Xen PVH boot sequence.
  *
  * Set some Xen global variables and prepare the environment so it is
  * as similar as possible to what native FreeBSD init function expects.
  */
 uint64_t
 hammer_time_xen(start_info_t *si, uint64_t xenstack)
 {
 	uint64_t physfree;
 	uint64_t *PT4 = (u_int64_t *)xenstack;
 	uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE);
 	uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE);
 	int i;
 
 	xen_domain_type = XEN_PV_DOMAIN;
 	vm_guest = VM_GUEST_XEN;
 
 	if ((si == NULL) || (xenstack == 0)) {
 		xc_printf("ERROR: invalid start_info or xen stack, halting\n");
 		HYPERVISOR_shutdown(SHUTDOWN_crash);
 	}
 
 	xc_printf("FreeBSD PVH running on %s\n", si->magic);
 
 	/* We use 3 pages of xen stack for the boot pagetables */
 	physfree = xenstack + 3 * PAGE_SIZE - KERNBASE;
 
 	/* Setup Xen global variables */
 	HYPERVISOR_start_info = si;
 	HYPERVISOR_shared_info =
 	    (shared_info_t *)(si->shared_info + KERNBASE);
 
 	/*
 	 * Setup some misc global variables for Xen devices
 	 *
 	 * XXX: Devices that need these specific variables should
 	 *      be rewritten to fetch this info by themselves from the
 	 *      start_info page.
 	 */
 	xen_store = (struct xenstore_domain_interface *)
 	    (ptoa(si->store_mfn) + KERNBASE);
 	console_page = (char *)(ptoa(si->console.domU.mfn) + KERNBASE);
 
 	/*
 	 * Use the stack Xen gives us to build the page tables
 	 * as native FreeBSD expects to find them (created
 	 * by the boot trampoline).
 	 */
 	for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) {
 		/*
 		 * Each slot of the level 4 pages points
 		 * to the same level 3 page
 		 */
 		PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE;
 		PT4[i] |= PG_V | PG_RW | PG_U;
 
 		/*
 		 * Each slot of the level 3 pages points
 		 * to the same level 2 page
 		 */
 		PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE;
 		PT3[i] |= PG_V | PG_RW | PG_U;
 
 		/*
 		 * The level 2 page slots are mapped with
 		 * 2MB pages for 1GB.
 		 */
 		PT2[i] = i * (2 * 1024 * 1024);
 		PT2[i] |= PG_V | PG_RW | PG_PS | PG_U;
 	}
 	load_cr3(((uint64_t)&PT4[0]) - KERNBASE);
 
 	/* Set the hooks for early functions that diverge from bare metal */
 	init_ops = xen_init_ops;
 	apic_ops = xen_apic_ops;
 
 	/* Now we can jump into the native init function */
 	return (hammer_time(0, physfree));
 }
 
 /*-------------------------------- PV specific -------------------------------*/
 #ifdef SMP
 static bool
 start_xen_ap(int cpu)
 {
 	struct vcpu_guest_context *ctxt;
 	int ms, cpus = mp_naps;
 	const size_t stacksize = KSTACK_PAGES * PAGE_SIZE;
 
 	/* allocate and set up an idle stack data page */
 	bootstacks[cpu] =
 	    (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO);
 	doublefault_stack =
 	    (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO);
 	nmi_stack =
 	    (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO);
 	dpcpu =
 	    (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO);
 
 	bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
 	bootAP = cpu;
 
 	ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO);
 	if (ctxt == NULL)
 		panic("unable to allocate memory");
 
 	ctxt->flags = VGCF_IN_KERNEL;
 	ctxt->user_regs.rip = (unsigned long) init_secondary;
 	ctxt->user_regs.rsp = (unsigned long) bootSTK;
 
 	/* Set the AP to use the same page tables */
 	ctxt->ctrlreg[3] = KPML4phys;
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
 		panic("unable to initialize AP#%d", cpu);
 
 	free(ctxt, M_TEMP);
 
 	/* Launch the vCPU */
 	if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 		panic("unable to start AP#%d", cpu);
 
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
 			return (true);
 		DELAY(1000);
 	}
 
 	return (false);
 }
 
 static int
 xen_pv_start_all_aps(void)
 {
 	int cpu;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 
 		/* attempt to start the Application Processor */
 		if (!start_xen_ap(cpu))
 			panic("AP #%d failed to start!", cpu);
 
 		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	return (mp_naps);
 }
 #endif /* SMP */
 
 /*
  * Functions to convert the "extra" parameters passed by Xen
  * into FreeBSD boot options.
  */
 static void
 xen_pv_set_env(void)
 {
 	char *cmd_line_next, *cmd_line;
 	size_t env_size;
 
 	cmd_line = HYPERVISOR_start_info->cmd_line;
 	env_size = sizeof(HYPERVISOR_start_info->cmd_line);
 
 	/* Skip leading spaces */
 	for (; isspace(*cmd_line) && (env_size != 0); cmd_line++)
 		env_size--;
 
 	/* Replace ',' with '\0' */
 	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;)
 		;
 
 	init_static_kenv(cmd_line, env_size);
 }
 
 static void
 xen_pv_set_boothowto(void)
 {
 	int i;
 	char *env;
 
 	/* get equivalents from the environment */
 	for (i = 0; howto_names[i].ev != NULL; i++) {
 		if ((env = kern_getenv(howto_names[i].ev)) != NULL) {
 			boothowto |= howto_names[i].mask;
 			freeenv(env);
 		}
 	}
 }
 
 #ifdef DDB
 /*
  * The way Xen loads the symtab is different from the native boot loader,
  * because it's tailored for NetBSD. So we have to adapt and use the same
  * method as NetBSD. Portions of the code below have been picked from NetBSD:
  * sys/kern/kern_ksyms.c CVS Revision 1.71.
  */
 static void
 xen_pv_parse_symtab(void)
 {
 	Elf_Ehdr *ehdr;
 	Elf_Shdr *shdr;
 	vm_offset_t sym_end;
 	uint32_t size;
 	int i, j;
 
 	size = end;
 	sym_end = HYPERVISOR_start_info->mod_start != 0 ?
 	    HYPERVISOR_start_info->mod_start :
 	    HYPERVISOR_start_info->mfn_list;
 
 	/*
 	 * Make sure the size is right headed, sym_end is just a
 	 * high boundary, but at least allows us to fail earlier.
 	 */
 	if ((vm_offset_t)&end + size > sym_end) {
 		xc_printf("Unable to load ELF symtab: size mismatch\n");
 		return;
 	}
 
 	ehdr = (Elf_Ehdr *)(&end + 1);
 	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) ||
 	    ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
 	    ehdr->e_version > 1) {
 		xc_printf("Unable to load ELF symtab: invalid symbol table\n");
 		return;
 	}
 
 	shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff);
 	/* Find the symbol table and the corresponding string table. */
 	for (i = 1; i < ehdr->e_shnum; i++) {
 		if (shdr[i].sh_type != SHT_SYMTAB)
 			continue;
 		if (shdr[i].sh_offset == 0)
 			continue;
 		ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset);
 		ksymtab_size = shdr[i].sh_size;
 		j = shdr[i].sh_link;
 		if (shdr[j].sh_offset == 0)
 			continue; /* Can this happen? */
 		kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset);
 		break;
 	}
 
 	if (ksymtab == 0 || kstrtab == 0) {
 		xc_printf(
     "Unable to load ELF symtab: could not find symtab or strtab\n");
 		return;
 	}
 }
 #endif
 
 static caddr_t
 xen_pv_parse_preload_data(u_int64_t modulep)
 {
 	caddr_t		 kmdp;
 	vm_ooffset_t	 off;
 	vm_paddr_t	 metadata;
 
 	if (HYPERVISOR_start_info->mod_start != 0) {
 		preload_metadata = (caddr_t)(HYPERVISOR_start_info->mod_start);
 
 		kmdp = preload_search_by_type("elf kernel");
 		if (kmdp == NULL)
 			kmdp = preload_search_by_type("elf64 kernel");
 		KASSERT(kmdp != NULL, ("unable to find kernel"));
 
 		/*
 		 * Xen has relocated the metadata and the modules,
 		 * so we need to recalculate it's position. This is
 		 * done by saving the original modulep address and
 		 * then calculating the offset with mod_start,
 		 * which contains the relocated modulep address.
 		 */
 		metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t);
 		off = HYPERVISOR_start_info->mod_start - metadata;
 
 		preload_bootstrap_relocate(off);
 
 		boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 		kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *);
 		kern_envp += off;
 	} else {
 		/* Parse the extra boot information given by Xen */
 		xen_pv_set_env();
 		xen_pv_set_boothowto();
 		kmdp = NULL;
 	}
 
 #ifdef DDB
 	xen_pv_parse_symtab();
 #endif
 	return (kmdp);
 }
 
 static void
 xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx)
 {
 	struct xen_memory_map memmap;
 	u_int32_t size;
 	int rc;
 
 	/* Fetch the E820 map from Xen */
 	memmap.nr_entries = MAX_E820_ENTRIES;
 	set_xen_guest_handle(memmap.buffer, xen_smap);
 	rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
 	if (rc)
 		panic("unable to fetch Xen E820 memory map");
 	size = memmap.nr_entries * sizeof(xen_smap[0]);
 
 	bios_add_smap_entries(xen_smap, size, physmap, physmap_idx);
 }